eca1dc36af208526c3fbb8dd44be6bf4fef9d3ba
[zfs.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25  * Copyright (c) 2012 by Delphix. All rights reserved.
26  */
27
28 #include <ctype.h>
29 #include <errno.h>
30 #include <devid.h>
31 #include <fcntl.h>
32 #include <libintl.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <strings.h>
36 #include <unistd.h>
37 #include <zone.h>
38 #include <sys/stat.h>
39 #include <sys/efi_partition.h>
40 #include <sys/vtoc.h>
41 #include <sys/zfs_ioctl.h>
42 #include <dlfcn.h>
43
44 #include "zfs_namecheck.h"
45 #include "zfs_prop.h"
46 #include "libzfs_impl.h"
47 #include "zfs_comutil.h"
48 #include "zfeature_common.h"
49
50 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
51
52 typedef struct prop_flags {
53         int create:1;   /* Validate property on creation */
54         int import:1;   /* Validate property on import */
55 } prop_flags_t;
56
57 /*
58  * ====================================================================
59  *   zpool property functions
60  * ====================================================================
61  */
62
63 static int
64 zpool_get_all_props(zpool_handle_t *zhp)
65 {
66         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
67         libzfs_handle_t *hdl = zhp->zpool_hdl;
68
69         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
70
71         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
72                 return (-1);
73
74         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
75                 if (errno == ENOMEM) {
76                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
77                                 zcmd_free_nvlists(&zc);
78                                 return (-1);
79                         }
80                 } else {
81                         zcmd_free_nvlists(&zc);
82                         return (-1);
83                 }
84         }
85
86         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
87                 zcmd_free_nvlists(&zc);
88                 return (-1);
89         }
90
91         zcmd_free_nvlists(&zc);
92
93         return (0);
94 }
95
96 static int
97 zpool_props_refresh(zpool_handle_t *zhp)
98 {
99         nvlist_t *old_props;
100
101         old_props = zhp->zpool_props;
102
103         if (zpool_get_all_props(zhp) != 0)
104                 return (-1);
105
106         nvlist_free(old_props);
107         return (0);
108 }
109
110 static char *
111 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
112     zprop_source_t *src)
113 {
114         nvlist_t *nv, *nvl;
115         uint64_t ival;
116         char *value;
117         zprop_source_t source;
118
119         nvl = zhp->zpool_props;
120         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
121                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
122                 source = ival;
123                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
124         } else {
125                 source = ZPROP_SRC_DEFAULT;
126                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
127                         value = "-";
128         }
129
130         if (src)
131                 *src = source;
132
133         return (value);
134 }
135
136 uint64_t
137 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
138 {
139         nvlist_t *nv, *nvl;
140         uint64_t value;
141         zprop_source_t source;
142
143         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
144                 /*
145                  * zpool_get_all_props() has most likely failed because
146                  * the pool is faulted, but if all we need is the top level
147                  * vdev's guid then get it from the zhp config nvlist.
148                  */
149                 if ((prop == ZPOOL_PROP_GUID) &&
150                     (nvlist_lookup_nvlist(zhp->zpool_config,
151                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
152                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
153                     == 0)) {
154                         return (value);
155                 }
156                 return (zpool_prop_default_numeric(prop));
157         }
158
159         nvl = zhp->zpool_props;
160         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
161                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
162                 source = value;
163                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
164         } else {
165                 source = ZPROP_SRC_DEFAULT;
166                 value = zpool_prop_default_numeric(prop);
167         }
168
169         if (src)
170                 *src = source;
171
172         return (value);
173 }
174
175 /*
176  * Map VDEV STATE to printed strings.
177  */
178 char *
179 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
180 {
181         switch (state) {
182         default:
183                 break;
184         case VDEV_STATE_CLOSED:
185         case VDEV_STATE_OFFLINE:
186                 return (gettext("OFFLINE"));
187         case VDEV_STATE_REMOVED:
188                 return (gettext("REMOVED"));
189         case VDEV_STATE_CANT_OPEN:
190                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
191                         return (gettext("FAULTED"));
192                 else if (aux == VDEV_AUX_SPLIT_POOL)
193                         return (gettext("SPLIT"));
194                 else
195                         return (gettext("UNAVAIL"));
196         case VDEV_STATE_FAULTED:
197                 return (gettext("FAULTED"));
198         case VDEV_STATE_DEGRADED:
199                 return (gettext("DEGRADED"));
200         case VDEV_STATE_HEALTHY:
201                 return (gettext("ONLINE"));
202         }
203
204         return (gettext("UNKNOWN"));
205 }
206
207 /*
208  * Get a zpool property value for 'prop' and return the value in
209  * a pre-allocated buffer.
210  */
211 int
212 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
213     zprop_source_t *srctype)
214 {
215         uint64_t intval;
216         const char *strval;
217         zprop_source_t src = ZPROP_SRC_NONE;
218         nvlist_t *nvroot;
219         vdev_stat_t *vs;
220         uint_t vsc;
221
222         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
223                 switch (prop) {
224                 case ZPOOL_PROP_NAME:
225                         (void) strlcpy(buf, zpool_get_name(zhp), len);
226                         break;
227
228                 case ZPOOL_PROP_HEALTH:
229                         (void) strlcpy(buf, "FAULTED", len);
230                         break;
231
232                 case ZPOOL_PROP_GUID:
233                         intval = zpool_get_prop_int(zhp, prop, &src);
234                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
235                         break;
236
237                 case ZPOOL_PROP_ALTROOT:
238                 case ZPOOL_PROP_CACHEFILE:
239                 case ZPOOL_PROP_COMMENT:
240                         if (zhp->zpool_props != NULL ||
241                             zpool_get_all_props(zhp) == 0) {
242                                 (void) strlcpy(buf,
243                                     zpool_get_prop_string(zhp, prop, &src),
244                                     len);
245                                 if (srctype != NULL)
246                                         *srctype = src;
247                                 return (0);
248                         }
249                         /* FALLTHROUGH */
250                 default:
251                         (void) strlcpy(buf, "-", len);
252                         break;
253                 }
254
255                 if (srctype != NULL)
256                         *srctype = src;
257                 return (0);
258         }
259
260         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
261             prop != ZPOOL_PROP_NAME)
262                 return (-1);
263
264         switch (zpool_prop_get_type(prop)) {
265         case PROP_TYPE_STRING:
266                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
267                     len);
268                 break;
269
270         case PROP_TYPE_NUMBER:
271                 intval = zpool_get_prop_int(zhp, prop, &src);
272
273                 switch (prop) {
274                 case ZPOOL_PROP_SIZE:
275                 case ZPOOL_PROP_ALLOCATED:
276                 case ZPOOL_PROP_FREE:
277                 case ZPOOL_PROP_FREEING:
278                 case ZPOOL_PROP_EXPANDSZ:
279                 case ZPOOL_PROP_ASHIFT:
280                         (void) zfs_nicenum(intval, buf, len);
281                         break;
282
283                 case ZPOOL_PROP_CAPACITY:
284                         (void) snprintf(buf, len, "%llu%%",
285                             (u_longlong_t)intval);
286                         break;
287
288                 case ZPOOL_PROP_DEDUPRATIO:
289                         (void) snprintf(buf, len, "%llu.%02llux",
290                             (u_longlong_t)(intval / 100),
291                             (u_longlong_t)(intval % 100));
292                         break;
293
294                 case ZPOOL_PROP_HEALTH:
295                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
296                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
297                         verify(nvlist_lookup_uint64_array(nvroot,
298                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
299                             == 0);
300
301                         (void) strlcpy(buf, zpool_state_to_name(intval,
302                             vs->vs_aux), len);
303                         break;
304                 case ZPOOL_PROP_VERSION:
305                         if (intval >= SPA_VERSION_FEATURES) {
306                                 (void) snprintf(buf, len, "-");
307                                 break;
308                         }
309                         /* FALLTHROUGH */
310                 default:
311                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
312                 }
313                 break;
314
315         case PROP_TYPE_INDEX:
316                 intval = zpool_get_prop_int(zhp, prop, &src);
317                 if (zpool_prop_index_to_string(prop, intval, &strval)
318                     != 0)
319                         return (-1);
320                 (void) strlcpy(buf, strval, len);
321                 break;
322
323         default:
324                 abort();
325         }
326
327         if (srctype)
328                 *srctype = src;
329
330         return (0);
331 }
332
333 /*
334  * Check if the bootfs name has the same pool name as it is set to.
335  * Assuming bootfs is a valid dataset name.
336  */
337 static boolean_t
338 bootfs_name_valid(const char *pool, char *bootfs)
339 {
340         int len = strlen(pool);
341
342         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
343                 return (B_FALSE);
344
345         if (strncmp(pool, bootfs, len) == 0 &&
346             (bootfs[len] == '/' || bootfs[len] == '\0'))
347                 return (B_TRUE);
348
349         return (B_FALSE);
350 }
351
352 #if defined(__sun__) || defined(__sun)
353 /*
354  * Inspect the configuration to determine if any of the devices contain
355  * an EFI label.
356  */
357 static boolean_t
358 pool_uses_efi(nvlist_t *config)
359 {
360         nvlist_t **child;
361         uint_t c, children;
362
363         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
364             &child, &children) != 0)
365                 return (read_efi_label(config, NULL) >= 0);
366
367         for (c = 0; c < children; c++) {
368                 if (pool_uses_efi(child[c]))
369                         return (B_TRUE);
370         }
371         return (B_FALSE);
372 }
373 #endif
374
375 boolean_t
376 zpool_is_bootable(zpool_handle_t *zhp)
377 {
378         char bootfs[ZPOOL_MAXNAMELEN];
379
380         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
381             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
382             sizeof (bootfs)) != 0);
383 }
384
385
386 /*
387  * Given an nvlist of zpool properties to be set, validate that they are
388  * correct, and parse any numeric properties (index, boolean, etc) if they are
389  * specified as strings.
390  */
391 static nvlist_t *
392 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
393     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
394 {
395         nvpair_t *elem;
396         nvlist_t *retprops;
397         zpool_prop_t prop;
398         char *strval;
399         uint64_t intval;
400         char *slash, *check;
401         struct stat64 statbuf;
402         zpool_handle_t *zhp;
403         nvlist_t *nvroot;
404
405         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
406                 (void) no_memory(hdl);
407                 return (NULL);
408         }
409
410         elem = NULL;
411         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
412                 const char *propname = nvpair_name(elem);
413
414                 prop = zpool_name_to_prop(propname);
415                 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
416                         int err;
417                         zfeature_info_t *feature;
418                         char *fname = strchr(propname, '@') + 1;
419
420                         err = zfeature_lookup_name(fname, &feature);
421                         if (err != 0) {
422                                 ASSERT3U(err, ==, ENOENT);
423                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
424                                     "invalid feature '%s'"), fname);
425                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
426                                 goto error;
427                         }
428
429                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
430                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
431                                     "'%s' must be a string"), propname);
432                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
433                                 goto error;
434                         }
435
436                         (void) nvpair_value_string(elem, &strval);
437                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
438                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
439                                     "property '%s' can only be set to "
440                                     "'enabled'"), propname);
441                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
442                                 goto error;
443                         }
444
445                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
446                                 (void) no_memory(hdl);
447                                 goto error;
448                         }
449                         continue;
450                 }
451
452                 /*
453                  * Make sure this property is valid and applies to this type.
454                  */
455                 if (prop == ZPROP_INVAL) {
456                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
457                             "invalid property '%s'"), propname);
458                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
459                         goto error;
460                 }
461
462                 if (zpool_prop_readonly(prop)) {
463                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
464                             "is readonly"), propname);
465                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
466                         goto error;
467                 }
468
469                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
470                     &strval, &intval, errbuf) != 0)
471                         goto error;
472
473                 /*
474                  * Perform additional checking for specific properties.
475                  */
476                 switch (prop) {
477                 default:
478                         break;
479                 case ZPOOL_PROP_VERSION:
480                         if (intval < version ||
481                             !SPA_VERSION_IS_SUPPORTED(intval)) {
482                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
483                                     "property '%s' number %d is invalid."),
484                                     propname, intval);
485                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
486                                 goto error;
487                         }
488                         break;
489
490                 case ZPOOL_PROP_ASHIFT:
491                         if (!flags.create) {
492                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
493                                     "property '%s' can only be set at "
494                                     "creation time"), propname);
495                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
496                                 goto error;
497                         }
498
499                         if (intval != 0 && (intval < 9 || intval > 13)) {
500                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
501                                     "property '%s' number %d is invalid."),
502                                     propname, intval);
503                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
504                                 goto error;
505                         }
506                         break;
507
508                 case ZPOOL_PROP_BOOTFS:
509                         if (flags.create || flags.import) {
510                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
511                                     "property '%s' cannot be set at creation "
512                                     "or import time"), propname);
513                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
514                                 goto error;
515                         }
516
517                         if (version < SPA_VERSION_BOOTFS) {
518                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
519                                     "pool must be upgraded to support "
520                                     "'%s' property"), propname);
521                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
522                                 goto error;
523                         }
524
525                         /*
526                          * bootfs property value has to be a dataset name and
527                          * the dataset has to be in the same pool as it sets to.
528                          */
529                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
530                             strval)) {
531                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
532                                     "is an invalid name"), strval);
533                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
534                                 goto error;
535                         }
536
537                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
538                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
539                                     "could not open pool '%s'"), poolname);
540                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
541                                 goto error;
542                         }
543                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
544                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
545
546 #if defined(__sun__) || defined(__sun)
547                         /*
548                          * bootfs property cannot be set on a disk which has
549                          * been EFI labeled.
550                          */
551                         if (pool_uses_efi(nvroot)) {
552                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
553                                     "property '%s' not supported on "
554                                     "EFI labeled devices"), propname);
555                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
556                                 zpool_close(zhp);
557                                 goto error;
558                         }
559 #endif
560                         zpool_close(zhp);
561                         break;
562
563                 case ZPOOL_PROP_ALTROOT:
564                         if (!flags.create && !flags.import) {
565                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
566                                     "property '%s' can only be set during pool "
567                                     "creation or import"), propname);
568                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
569                                 goto error;
570                         }
571
572                         if (strval[0] != '/') {
573                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
574                                     "bad alternate root '%s'"), strval);
575                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
576                                 goto error;
577                         }
578                         break;
579
580                 case ZPOOL_PROP_CACHEFILE:
581                         if (strval[0] == '\0')
582                                 break;
583
584                         if (strcmp(strval, "none") == 0)
585                                 break;
586
587                         if (strval[0] != '/') {
588                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
589                                     "property '%s' must be empty, an "
590                                     "absolute path, or 'none'"), propname);
591                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
592                                 goto error;
593                         }
594
595                         slash = strrchr(strval, '/');
596
597                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
598                             strcmp(slash, "/..") == 0) {
599                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
600                                     "'%s' is not a valid file"), strval);
601                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
602                                 goto error;
603                         }
604
605                         *slash = '\0';
606
607                         if (strval[0] != '\0' &&
608                             (stat64(strval, &statbuf) != 0 ||
609                             !S_ISDIR(statbuf.st_mode))) {
610                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
611                                     "'%s' is not a valid directory"),
612                                     strval);
613                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
614                                 goto error;
615                         }
616
617                         *slash = '/';
618                         break;
619
620                 case ZPOOL_PROP_COMMENT:
621                         for (check = strval; *check != '\0'; check++) {
622                                 if (!isprint(*check)) {
623                                         zfs_error_aux(hdl,
624                                             dgettext(TEXT_DOMAIN,
625                                             "comment may only have printable "
626                                             "characters"));
627                                         (void) zfs_error(hdl, EZFS_BADPROP,
628                                             errbuf);
629                                         goto error;
630                                 }
631                         }
632                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
633                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
634                                     "comment must not exceed %d characters"),
635                                     ZPROP_MAX_COMMENT);
636                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
637                                 goto error;
638                         }
639                         break;
640                 case ZPOOL_PROP_READONLY:
641                         if (!flags.import) {
642                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
643                                     "property '%s' can only be set at "
644                                     "import time"), propname);
645                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
646                                 goto error;
647                         }
648                         break;
649                 }
650         }
651
652         return (retprops);
653 error:
654         nvlist_free(retprops);
655         return (NULL);
656 }
657
658 /*
659  * Set zpool property : propname=propval.
660  */
661 int
662 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
663 {
664         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
665         int ret = -1;
666         char errbuf[1024];
667         nvlist_t *nvl = NULL;
668         nvlist_t *realprops;
669         uint64_t version;
670         prop_flags_t flags = { 0 };
671
672         (void) snprintf(errbuf, sizeof (errbuf),
673             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
674             zhp->zpool_name);
675
676         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
677                 return (no_memory(zhp->zpool_hdl));
678
679         if (nvlist_add_string(nvl, propname, propval) != 0) {
680                 nvlist_free(nvl);
681                 return (no_memory(zhp->zpool_hdl));
682         }
683
684         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
685         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
686             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
687                 nvlist_free(nvl);
688                 return (-1);
689         }
690
691         nvlist_free(nvl);
692         nvl = realprops;
693
694         /*
695          * Execute the corresponding ioctl() to set this property.
696          */
697         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
698
699         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
700                 nvlist_free(nvl);
701                 return (-1);
702         }
703
704         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
705
706         zcmd_free_nvlists(&zc);
707         nvlist_free(nvl);
708
709         if (ret)
710                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
711         else
712                 (void) zpool_props_refresh(zhp);
713
714         return (ret);
715 }
716
717 int
718 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
719 {
720         libzfs_handle_t *hdl = zhp->zpool_hdl;
721         zprop_list_t *entry;
722         char buf[ZFS_MAXPROPLEN];
723         nvlist_t *features = NULL;
724         nvpair_t *nvp;
725         zprop_list_t **last;
726         boolean_t firstexpand = (NULL == *plp);
727         int i;
728
729         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
730                 return (-1);
731
732         last = plp;
733         while (*last != NULL)
734                 last = &(*last)->pl_next;
735
736         if ((*plp)->pl_all)
737                 features = zpool_get_features(zhp);
738
739         if ((*plp)->pl_all && firstexpand) {
740                 for (i = 0; i < SPA_FEATURES; i++) {
741                         zprop_list_t *entry = zfs_alloc(hdl,
742                             sizeof (zprop_list_t));
743                         entry->pl_prop = ZPROP_INVAL;
744                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
745                             spa_feature_table[i].fi_uname);
746                         entry->pl_width = strlen(entry->pl_user_prop);
747                         entry->pl_all = B_TRUE;
748
749                         *last = entry;
750                         last = &entry->pl_next;
751                 }
752         }
753
754         /* add any unsupported features */
755         for (nvp = nvlist_next_nvpair(features, NULL);
756             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
757                 char *propname;
758                 boolean_t found;
759                 zprop_list_t *entry;
760
761                 if (zfeature_is_supported(nvpair_name(nvp)))
762                         continue;
763
764                 propname = zfs_asprintf(hdl, "unsupported@%s",
765                     nvpair_name(nvp));
766
767                 /*
768                  * Before adding the property to the list make sure that no
769                  * other pool already added the same property.
770                  */
771                 found = B_FALSE;
772                 entry = *plp;
773                 while (entry != NULL) {
774                         if (entry->pl_user_prop != NULL &&
775                             strcmp(propname, entry->pl_user_prop) == 0) {
776                                 found = B_TRUE;
777                                 break;
778                         }
779                         entry = entry->pl_next;
780                 }
781                 if (found) {
782                         free(propname);
783                         continue;
784                 }
785
786                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
787                 entry->pl_prop = ZPROP_INVAL;
788                 entry->pl_user_prop = propname;
789                 entry->pl_width = strlen(entry->pl_user_prop);
790                 entry->pl_all = B_TRUE;
791
792                 *last = entry;
793                 last = &entry->pl_next;
794         }
795
796         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
797
798                 if (entry->pl_fixed)
799                         continue;
800
801                 if (entry->pl_prop != ZPROP_INVAL &&
802                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
803                     NULL) == 0) {
804                         if (strlen(buf) > entry->pl_width)
805                                 entry->pl_width = strlen(buf);
806                 }
807         }
808
809         return (0);
810 }
811
812 /*
813  * Get the state for the given feature on the given ZFS pool.
814  */
815 int
816 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
817     size_t len)
818 {
819         uint64_t refcount;
820         boolean_t found = B_FALSE;
821         nvlist_t *features = zpool_get_features(zhp);
822         boolean_t supported;
823         const char *feature = strchr(propname, '@') + 1;
824
825         supported = zpool_prop_feature(propname);
826         ASSERT(supported || zpool_prop_unsupported(propname));
827
828         /*
829          * Convert from feature name to feature guid. This conversion is
830          * unecessary for unsupported@... properties because they already
831          * use guids.
832          */
833         if (supported) {
834                 int ret;
835                 zfeature_info_t *fi;
836
837                 ret = zfeature_lookup_name(feature, &fi);
838                 if (ret != 0) {
839                         (void) strlcpy(buf, "-", len);
840                         return (ENOTSUP);
841                 }
842                 feature = fi->fi_guid;
843         }
844
845         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
846                 found = B_TRUE;
847
848         if (supported) {
849                 if (!found) {
850                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
851                 } else  {
852                         if (refcount == 0)
853                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
854                         else
855                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
856                 }
857         } else {
858                 if (found) {
859                         if (refcount == 0) {
860                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
861                         } else {
862                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
863                         }
864                 } else {
865                         (void) strlcpy(buf, "-", len);
866                         return (ENOTSUP);
867                 }
868         }
869
870         return (0);
871 }
872
873 /*
874  * Don't start the slice at the default block of 34; many storage
875  * devices will use a stripe width of 128k, other vendors prefer a 1m
876  * alignment.  It is best to play it safe and ensure a 1m alignment
877  * given 512B blocks.  When the block size is larger by a power of 2
878  * we will still be 1m aligned.  Some devices are sensitive to the
879  * partition ending alignment as well.
880  */
881 #define NEW_START_BLOCK         2048
882 #define PARTITION_END_ALIGNMENT 2048
883
884 /*
885  * Validate the given pool name, optionally putting an extended error message in
886  * 'buf'.
887  */
888 boolean_t
889 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
890 {
891         namecheck_err_t why;
892         char what;
893         int ret;
894
895         ret = pool_namecheck(pool, &why, &what);
896
897         /*
898          * The rules for reserved pool names were extended at a later point.
899          * But we need to support users with existing pools that may now be
900          * invalid.  So we only check for this expanded set of names during a
901          * create (or import), and only in userland.
902          */
903         if (ret == 0 && !isopen &&
904             (strncmp(pool, "mirror", 6) == 0 ||
905             strncmp(pool, "raidz", 5) == 0 ||
906             strncmp(pool, "spare", 5) == 0 ||
907             strcmp(pool, "log") == 0)) {
908                 if (hdl != NULL)
909                         zfs_error_aux(hdl,
910                             dgettext(TEXT_DOMAIN, "name is reserved"));
911                 return (B_FALSE);
912         }
913
914
915         if (ret != 0) {
916                 if (hdl != NULL) {
917                         switch (why) {
918                         case NAME_ERR_TOOLONG:
919                                 zfs_error_aux(hdl,
920                                     dgettext(TEXT_DOMAIN, "name is too long"));
921                                 break;
922
923                         case NAME_ERR_INVALCHAR:
924                                 zfs_error_aux(hdl,
925                                     dgettext(TEXT_DOMAIN, "invalid character "
926                                     "'%c' in pool name"), what);
927                                 break;
928
929                         case NAME_ERR_NOLETTER:
930                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
931                                     "name must begin with a letter"));
932                                 break;
933
934                         case NAME_ERR_RESERVED:
935                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
936                                     "name is reserved"));
937                                 break;
938
939                         case NAME_ERR_DISKLIKE:
940                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
941                                     "pool name is reserved"));
942                                 break;
943
944                         case NAME_ERR_LEADING_SLASH:
945                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
946                                     "leading slash in name"));
947                                 break;
948
949                         case NAME_ERR_EMPTY_COMPONENT:
950                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
951                                     "empty component in name"));
952                                 break;
953
954                         case NAME_ERR_TRAILING_SLASH:
955                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
956                                     "trailing slash in name"));
957                                 break;
958
959                         case NAME_ERR_MULTIPLE_AT:
960                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
961                                     "multiple '@' delimiters in name"));
962                                 break;
963                         case NAME_ERR_NO_AT:
964                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
965                                     "permission set is missing '@'"));
966                                 break;
967                         }
968                 }
969                 return (B_FALSE);
970         }
971
972         return (B_TRUE);
973 }
974
975 /*
976  * Open a handle to the given pool, even if the pool is currently in the FAULTED
977  * state.
978  */
979 zpool_handle_t *
980 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
981 {
982         zpool_handle_t *zhp;
983         boolean_t missing;
984
985         /*
986          * Make sure the pool name is valid.
987          */
988         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
989                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
990                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
991                     pool);
992                 return (NULL);
993         }
994
995         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
996                 return (NULL);
997
998         zhp->zpool_hdl = hdl;
999         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1000
1001         if (zpool_refresh_stats(zhp, &missing) != 0) {
1002                 zpool_close(zhp);
1003                 return (NULL);
1004         }
1005
1006         if (missing) {
1007                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1008                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1009                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1010                 zpool_close(zhp);
1011                 return (NULL);
1012         }
1013
1014         return (zhp);
1015 }
1016
1017 /*
1018  * Like the above, but silent on error.  Used when iterating over pools (because
1019  * the configuration cache may be out of date).
1020  */
1021 int
1022 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1023 {
1024         zpool_handle_t *zhp;
1025         boolean_t missing;
1026
1027         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1028                 return (-1);
1029
1030         zhp->zpool_hdl = hdl;
1031         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1032
1033         if (zpool_refresh_stats(zhp, &missing) != 0) {
1034                 zpool_close(zhp);
1035                 return (-1);
1036         }
1037
1038         if (missing) {
1039                 zpool_close(zhp);
1040                 *ret = NULL;
1041                 return (0);
1042         }
1043
1044         *ret = zhp;
1045         return (0);
1046 }
1047
1048 /*
1049  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1050  * state.
1051  */
1052 zpool_handle_t *
1053 zpool_open(libzfs_handle_t *hdl, const char *pool)
1054 {
1055         zpool_handle_t *zhp;
1056
1057         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1058                 return (NULL);
1059
1060         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1061                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1062                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1063                 zpool_close(zhp);
1064                 return (NULL);
1065         }
1066
1067         return (zhp);
1068 }
1069
1070 /*
1071  * Close the handle.  Simply frees the memory associated with the handle.
1072  */
1073 void
1074 zpool_close(zpool_handle_t *zhp)
1075 {
1076         if (zhp->zpool_config)
1077                 nvlist_free(zhp->zpool_config);
1078         if (zhp->zpool_old_config)
1079                 nvlist_free(zhp->zpool_old_config);
1080         if (zhp->zpool_props)
1081                 nvlist_free(zhp->zpool_props);
1082         free(zhp);
1083 }
1084
1085 /*
1086  * Return the name of the pool.
1087  */
1088 const char *
1089 zpool_get_name(zpool_handle_t *zhp)
1090 {
1091         return (zhp->zpool_name);
1092 }
1093
1094
1095 /*
1096  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1097  */
1098 int
1099 zpool_get_state(zpool_handle_t *zhp)
1100 {
1101         return (zhp->zpool_state);
1102 }
1103
1104 /*
1105  * Create the named pool, using the provided vdev list.  It is assumed
1106  * that the consumer has already validated the contents of the nvlist, so we
1107  * don't have to worry about error semantics.
1108  */
1109 int
1110 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1111     nvlist_t *props, nvlist_t *fsprops)
1112 {
1113         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1114         nvlist_t *zc_fsprops = NULL;
1115         nvlist_t *zc_props = NULL;
1116         char msg[1024];
1117         char *altroot;
1118         int ret = -1;
1119
1120         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1121             "cannot create '%s'"), pool);
1122
1123         if (!zpool_name_valid(hdl, B_FALSE, pool))
1124                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1125
1126         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1127                 return (-1);
1128
1129         if (props) {
1130                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1131
1132                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1133                     SPA_VERSION_1, flags, msg)) == NULL) {
1134                         goto create_failed;
1135                 }
1136         }
1137
1138         if (fsprops) {
1139                 uint64_t zoned;
1140                 char *zonestr;
1141
1142                 zoned = ((nvlist_lookup_string(fsprops,
1143                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1144                     strcmp(zonestr, "on") == 0);
1145
1146                 if ((zc_fsprops = zfs_valid_proplist(hdl,
1147                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
1148                         goto create_failed;
1149                 }
1150                 if (!zc_props &&
1151                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1152                         goto create_failed;
1153                 }
1154                 if (nvlist_add_nvlist(zc_props,
1155                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1156                         goto create_failed;
1157                 }
1158         }
1159
1160         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1161                 goto create_failed;
1162
1163         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1164
1165         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1166
1167                 zcmd_free_nvlists(&zc);
1168                 nvlist_free(zc_props);
1169                 nvlist_free(zc_fsprops);
1170
1171                 switch (errno) {
1172                 case EBUSY:
1173                         /*
1174                          * This can happen if the user has specified the same
1175                          * device multiple times.  We can't reliably detect this
1176                          * until we try to add it and see we already have a
1177                          * label.  This can also happen under if the device is
1178                          * part of an active md or lvm device.
1179                          */
1180                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1181                             "one or more vdevs refer to the same device, or one of\n"
1182                             "the devices is part of an active md or lvm device"));
1183                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1184
1185                 case EOVERFLOW:
1186                         /*
1187                          * This occurs when one of the devices is below
1188                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1189                          * device was the problem device since there's no
1190                          * reliable way to determine device size from userland.
1191                          */
1192                         {
1193                                 char buf[64];
1194
1195                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1196
1197                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1198                                     "one or more devices is less than the "
1199                                     "minimum size (%s)"), buf);
1200                         }
1201                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1202
1203                 case ENOSPC:
1204                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1205                             "one or more devices is out of space"));
1206                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1207
1208                 case ENOTBLK:
1209                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1210                             "cache device must be a disk or disk slice"));
1211                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1212
1213                 default:
1214                         return (zpool_standard_error(hdl, errno, msg));
1215                 }
1216         }
1217
1218         /*
1219          * If this is an alternate root pool, then we automatically set the
1220          * mountpoint of the root dataset to be '/'.
1221          */
1222         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
1223             &altroot) == 0) {
1224                 zfs_handle_t *zhp;
1225
1226                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
1227                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1228                     "/") == 0);
1229
1230                 zfs_close(zhp);
1231         }
1232
1233 create_failed:
1234         zcmd_free_nvlists(&zc);
1235         nvlist_free(zc_props);
1236         nvlist_free(zc_fsprops);
1237         return (ret);
1238 }
1239
1240 /*
1241  * Destroy the given pool.  It is up to the caller to ensure that there are no
1242  * datasets left in the pool.
1243  */
1244 int
1245 zpool_destroy(zpool_handle_t *zhp)
1246 {
1247         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1248         zfs_handle_t *zfp = NULL;
1249         libzfs_handle_t *hdl = zhp->zpool_hdl;
1250         char msg[1024];
1251
1252         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1253             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1254                 return (-1);
1255
1256         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1257
1258         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1259                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1260                     "cannot destroy '%s'"), zhp->zpool_name);
1261
1262                 if (errno == EROFS) {
1263                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1264                             "one or more devices is read only"));
1265                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1266                 } else {
1267                         (void) zpool_standard_error(hdl, errno, msg);
1268                 }
1269
1270                 if (zfp)
1271                         zfs_close(zfp);
1272                 return (-1);
1273         }
1274
1275         if (zfp) {
1276                 remove_mountpoint(zfp);
1277                 zfs_close(zfp);
1278         }
1279
1280         return (0);
1281 }
1282
1283 /*
1284  * Add the given vdevs to the pool.  The caller must have already performed the
1285  * necessary verification to ensure that the vdev specification is well-formed.
1286  */
1287 int
1288 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1289 {
1290         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1291         int ret;
1292         libzfs_handle_t *hdl = zhp->zpool_hdl;
1293         char msg[1024];
1294         nvlist_t **spares, **l2cache;
1295         uint_t nspares, nl2cache;
1296
1297         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1298             "cannot add to '%s'"), zhp->zpool_name);
1299
1300         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1301             SPA_VERSION_SPARES &&
1302             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1303             &spares, &nspares) == 0) {
1304                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1305                     "upgraded to add hot spares"));
1306                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1307         }
1308
1309 #if defined(__sun__) || defined(__sun)
1310         if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1311             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1312                 uint64_t s;
1313
1314                 for (s = 0; s < nspares; s++) {
1315                         char *path;
1316
1317                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1318                             &path) == 0 && pool_uses_efi(spares[s])) {
1319                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1320                                     "device '%s' contains an EFI label and "
1321                                     "cannot be used on root pools."),
1322                                     zpool_vdev_name(hdl, NULL, spares[s],
1323                                     B_FALSE));
1324                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1325                         }
1326                 }
1327         }
1328 #endif
1329
1330         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1331             SPA_VERSION_L2CACHE &&
1332             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1333             &l2cache, &nl2cache) == 0) {
1334                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1335                     "upgraded to add cache devices"));
1336                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1337         }
1338
1339         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1340                 return (-1);
1341         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1342
1343         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1344                 switch (errno) {
1345                 case EBUSY:
1346                         /*
1347                          * This can happen if the user has specified the same
1348                          * device multiple times.  We can't reliably detect this
1349                          * until we try to add it and see we already have a
1350                          * label.
1351                          */
1352                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1353                             "one or more vdevs refer to the same device"));
1354                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1355                         break;
1356
1357                 case EOVERFLOW:
1358                         /*
1359                          * This occurrs when one of the devices is below
1360                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1361                          * device was the problem device since there's no
1362                          * reliable way to determine device size from userland.
1363                          */
1364                         {
1365                                 char buf[64];
1366
1367                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1368
1369                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1370                                     "device is less than the minimum "
1371                                     "size (%s)"), buf);
1372                         }
1373                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1374                         break;
1375
1376                 case ENOTSUP:
1377                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1378                             "pool must be upgraded to add these vdevs"));
1379                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1380                         break;
1381
1382                 case EDOM:
1383                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1384                             "root pool can not have multiple vdevs"
1385                             " or separate logs"));
1386                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1387                         break;
1388
1389                 case ENOTBLK:
1390                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1391                             "cache device must be a disk or disk slice"));
1392                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1393                         break;
1394
1395                 default:
1396                         (void) zpool_standard_error(hdl, errno, msg);
1397                 }
1398
1399                 ret = -1;
1400         } else {
1401                 ret = 0;
1402         }
1403
1404         zcmd_free_nvlists(&zc);
1405
1406         return (ret);
1407 }
1408
1409 /*
1410  * Exports the pool from the system.  The caller must ensure that there are no
1411  * mounted datasets in the pool.
1412  */
1413 int
1414 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1415 {
1416         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1417         char msg[1024];
1418
1419         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1420             "cannot export '%s'"), zhp->zpool_name);
1421
1422         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1423         zc.zc_cookie = force;
1424         zc.zc_guid = hardforce;
1425
1426         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1427                 switch (errno) {
1428                 case EXDEV:
1429                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1430                             "use '-f' to override the following errors:\n"
1431                             "'%s' has an active shared spare which could be"
1432                             " used by other pools once '%s' is exported."),
1433                             zhp->zpool_name, zhp->zpool_name);
1434                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1435                             msg));
1436                 default:
1437                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1438                             msg));
1439                 }
1440         }
1441
1442         return (0);
1443 }
1444
1445 int
1446 zpool_export(zpool_handle_t *zhp, boolean_t force)
1447 {
1448         return (zpool_export_common(zhp, force, B_FALSE));
1449 }
1450
1451 int
1452 zpool_export_force(zpool_handle_t *zhp)
1453 {
1454         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1455 }
1456
1457 static void
1458 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1459     nvlist_t *config)
1460 {
1461         nvlist_t *nv = NULL;
1462         uint64_t rewindto;
1463         int64_t loss = -1;
1464         struct tm t;
1465         char timestr[128];
1466
1467         if (!hdl->libzfs_printerr || config == NULL)
1468                 return;
1469
1470         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1471             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1472                 return;
1473         }
1474
1475         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1476                 return;
1477         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1478
1479         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1480             strftime(timestr, 128, "%c", &t) != 0) {
1481                 if (dryrun) {
1482                         (void) printf(dgettext(TEXT_DOMAIN,
1483                             "Would be able to return %s "
1484                             "to its state as of %s.\n"),
1485                             name, timestr);
1486                 } else {
1487                         (void) printf(dgettext(TEXT_DOMAIN,
1488                             "Pool %s returned to its state as of %s.\n"),
1489                             name, timestr);
1490                 }
1491                 if (loss > 120) {
1492                         (void) printf(dgettext(TEXT_DOMAIN,
1493                             "%s approximately %lld "),
1494                             dryrun ? "Would discard" : "Discarded",
1495                             ((longlong_t)loss + 30) / 60);
1496                         (void) printf(dgettext(TEXT_DOMAIN,
1497                             "minutes of transactions.\n"));
1498                 } else if (loss > 0) {
1499                         (void) printf(dgettext(TEXT_DOMAIN,
1500                             "%s approximately %lld "),
1501                             dryrun ? "Would discard" : "Discarded",
1502                             (longlong_t)loss);
1503                         (void) printf(dgettext(TEXT_DOMAIN,
1504                             "seconds of transactions.\n"));
1505                 }
1506         }
1507 }
1508
1509 void
1510 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1511     nvlist_t *config)
1512 {
1513         nvlist_t *nv = NULL;
1514         int64_t loss = -1;
1515         uint64_t edata = UINT64_MAX;
1516         uint64_t rewindto;
1517         struct tm t;
1518         char timestr[128];
1519
1520         if (!hdl->libzfs_printerr)
1521                 return;
1522
1523         if (reason >= 0)
1524                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1525         else
1526                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1527
1528         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1529         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1530             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1531             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1532                 goto no_info;
1533
1534         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1535         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1536             &edata);
1537
1538         (void) printf(dgettext(TEXT_DOMAIN,
1539             "Recovery is possible, but will result in some data loss.\n"));
1540
1541         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1542             strftime(timestr, 128, "%c", &t) != 0) {
1543                 (void) printf(dgettext(TEXT_DOMAIN,
1544                     "\tReturning the pool to its state as of %s\n"
1545                     "\tshould correct the problem.  "),
1546                     timestr);
1547         } else {
1548                 (void) printf(dgettext(TEXT_DOMAIN,
1549                     "\tReverting the pool to an earlier state "
1550                     "should correct the problem.\n\t"));
1551         }
1552
1553         if (loss > 120) {
1554                 (void) printf(dgettext(TEXT_DOMAIN,
1555                     "Approximately %lld minutes of data\n"
1556                     "\tmust be discarded, irreversibly.  "),
1557                     ((longlong_t)loss + 30) / 60);
1558         } else if (loss > 0) {
1559                 (void) printf(dgettext(TEXT_DOMAIN,
1560                     "Approximately %lld seconds of data\n"
1561                     "\tmust be discarded, irreversibly.  "),
1562                     (longlong_t)loss);
1563         }
1564         if (edata != 0 && edata != UINT64_MAX) {
1565                 if (edata == 1) {
1566                         (void) printf(dgettext(TEXT_DOMAIN,
1567                             "After rewind, at least\n"
1568                             "\tone persistent user-data error will remain.  "));
1569                 } else {
1570                         (void) printf(dgettext(TEXT_DOMAIN,
1571                             "After rewind, several\n"
1572                             "\tpersistent user-data errors will remain.  "));
1573                 }
1574         }
1575         (void) printf(dgettext(TEXT_DOMAIN,
1576             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1577             reason >= 0 ? "clear" : "import", name);
1578
1579         (void) printf(dgettext(TEXT_DOMAIN,
1580             "A scrub of the pool\n"
1581             "\tis strongly recommended after recovery.\n"));
1582         return;
1583
1584 no_info:
1585         (void) printf(dgettext(TEXT_DOMAIN,
1586             "Destroy and re-create the pool from\n\ta backup source.\n"));
1587 }
1588
1589 /*
1590  * zpool_import() is a contracted interface. Should be kept the same
1591  * if possible.
1592  *
1593  * Applications should use zpool_import_props() to import a pool with
1594  * new properties value to be set.
1595  */
1596 int
1597 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1598     char *altroot)
1599 {
1600         nvlist_t *props = NULL;
1601         int ret;
1602
1603         if (altroot != NULL) {
1604                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1605                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1606                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1607                             newname));
1608                 }
1609
1610                 if (nvlist_add_string(props,
1611                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1612                     nvlist_add_string(props,
1613                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1614                         nvlist_free(props);
1615                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1616                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1617                             newname));
1618                 }
1619         }
1620
1621         ret = zpool_import_props(hdl, config, newname, props,
1622             ZFS_IMPORT_NORMAL);
1623         if (props)
1624                 nvlist_free(props);
1625         return (ret);
1626 }
1627
1628 static void
1629 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1630     int indent)
1631 {
1632         nvlist_t **child;
1633         uint_t c, children;
1634         char *vname;
1635         uint64_t is_log = 0;
1636
1637         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1638             &is_log);
1639
1640         if (name != NULL)
1641                 (void) printf("\t%*s%s%s\n", indent, "", name,
1642                     is_log ? " [log]" : "");
1643
1644         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1645             &child, &children) != 0)
1646                 return;
1647
1648         for (c = 0; c < children; c++) {
1649                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1650                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1651                 free(vname);
1652         }
1653 }
1654
1655 void
1656 zpool_print_unsup_feat(nvlist_t *config)
1657 {
1658         nvlist_t *nvinfo, *unsup_feat;
1659         nvpair_t *nvp;
1660
1661         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1662             0);
1663         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1664             &unsup_feat) == 0);
1665
1666         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1667             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1668                 char *desc;
1669
1670                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1671                 verify(nvpair_value_string(nvp, &desc) == 0);
1672
1673                 if (strlen(desc) > 0)
1674                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1675                 else
1676                         (void) printf("\t%s\n", nvpair_name(nvp));
1677         }
1678 }
1679
1680 /*
1681  * Import the given pool using the known configuration and a list of
1682  * properties to be set. The configuration should have come from
1683  * zpool_find_import(). The 'newname' parameters control whether the pool
1684  * is imported with a different name.
1685  */
1686 int
1687 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1688     nvlist_t *props, int flags)
1689 {
1690         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1691         zpool_rewind_policy_t policy;
1692         nvlist_t *nv = NULL;
1693         nvlist_t *nvinfo = NULL;
1694         nvlist_t *missing = NULL;
1695         char *thename;
1696         char *origname;
1697         int ret;
1698         int error = 0;
1699         char errbuf[1024];
1700
1701         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1702             &origname) == 0);
1703
1704         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1705             "cannot import pool '%s'"), origname);
1706
1707         if (newname != NULL) {
1708                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1709                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1710                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1711                             newname));
1712                 thename = (char *)newname;
1713         } else {
1714                 thename = origname;
1715         }
1716
1717         if (props) {
1718                 uint64_t version;
1719                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1720
1721                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1722                     &version) == 0);
1723
1724                 if ((props = zpool_valid_proplist(hdl, origname,
1725                     props, version, flags, errbuf)) == NULL) {
1726                         return (-1);
1727                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1728                         nvlist_free(props);
1729                         return (-1);
1730                 }
1731         }
1732
1733         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1734
1735         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1736             &zc.zc_guid) == 0);
1737
1738         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1739                 nvlist_free(props);
1740                 return (-1);
1741         }
1742         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1743                 nvlist_free(props);
1744                 return (-1);
1745         }
1746
1747         zc.zc_cookie = flags;
1748         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1749             errno == ENOMEM) {
1750                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1751                         zcmd_free_nvlists(&zc);
1752                         return (-1);
1753                 }
1754         }
1755         if (ret != 0)
1756                 error = errno;
1757
1758         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1759         zpool_get_rewind_policy(config, &policy);
1760
1761         if (error) {
1762                 char desc[1024];
1763
1764                 /*
1765                  * Dry-run failed, but we print out what success
1766                  * looks like if we found a best txg
1767                  */
1768                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1769                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1770                             B_TRUE, nv);
1771                         nvlist_free(nv);
1772                         return (-1);
1773                 }
1774
1775                 if (newname == NULL)
1776                         (void) snprintf(desc, sizeof (desc),
1777                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1778                             thename);
1779                 else
1780                         (void) snprintf(desc, sizeof (desc),
1781                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1782                             origname, thename);
1783
1784                 switch (error) {
1785                 case ENOTSUP:
1786                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1787                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1788                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1789                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1790                                     "pool uses the following feature(s) not "
1791                                     "supported by this system:\n"));
1792                                 zpool_print_unsup_feat(nv);
1793                                 if (nvlist_exists(nvinfo,
1794                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1795                                         (void) printf(dgettext(TEXT_DOMAIN,
1796                                             "All unsupported features are only "
1797                                             "required for writing to the pool."
1798                                             "\nThe pool can be imported using "
1799                                             "'-o readonly=on'.\n"));
1800                                 }
1801                         }
1802                         /*
1803                          * Unsupported version.
1804                          */
1805                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1806                         break;
1807
1808                 case EINVAL:
1809                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1810                         break;
1811
1812                 case EROFS:
1813                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1814                             "one or more devices is read only"));
1815                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1816                         break;
1817
1818                 case ENXIO:
1819                         if (nv && nvlist_lookup_nvlist(nv,
1820                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1821                             nvlist_lookup_nvlist(nvinfo,
1822                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1823                                 (void) printf(dgettext(TEXT_DOMAIN,
1824                                     "The devices below are missing, use "
1825                                     "'-m' to import the pool anyway:\n"));
1826                                 print_vdev_tree(hdl, NULL, missing, 2);
1827                                 (void) printf("\n");
1828                         }
1829                         (void) zpool_standard_error(hdl, error, desc);
1830                         break;
1831
1832                 case EEXIST:
1833                         (void) zpool_standard_error(hdl, error, desc);
1834                         break;
1835
1836                 case EBUSY:
1837                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1838                             "one or more devices are already in use\n"));
1839                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1840                         break;
1841
1842                 default:
1843                         (void) zpool_standard_error(hdl, error, desc);
1844                         zpool_explain_recover(hdl,
1845                             newname ? origname : thename, -error, nv);
1846                         break;
1847                 }
1848
1849                 nvlist_free(nv);
1850                 ret = -1;
1851         } else {
1852                 zpool_handle_t *zhp;
1853
1854                 /*
1855                  * This should never fail, but play it safe anyway.
1856                  */
1857                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1858                         ret = -1;
1859                 else if (zhp != NULL)
1860                         zpool_close(zhp);
1861                 if (policy.zrp_request &
1862                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1863                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1864                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1865                 }
1866                 nvlist_free(nv);
1867                 return (0);
1868         }
1869
1870         zcmd_free_nvlists(&zc);
1871         nvlist_free(props);
1872
1873         return (ret);
1874 }
1875
1876 /*
1877  * Scan the pool.
1878  */
1879 int
1880 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1881 {
1882         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1883         char msg[1024];
1884         libzfs_handle_t *hdl = zhp->zpool_hdl;
1885
1886         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1887         zc.zc_cookie = func;
1888
1889         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1890             (errno == ENOENT && func != POOL_SCAN_NONE))
1891                 return (0);
1892
1893         if (func == POOL_SCAN_SCRUB) {
1894                 (void) snprintf(msg, sizeof (msg),
1895                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1896         } else if (func == POOL_SCAN_NONE) {
1897                 (void) snprintf(msg, sizeof (msg),
1898                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1899                     zc.zc_name);
1900         } else {
1901                 assert(!"unexpected result");
1902         }
1903
1904         if (errno == EBUSY) {
1905                 nvlist_t *nvroot;
1906                 pool_scan_stat_t *ps = NULL;
1907                 uint_t psc;
1908
1909                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1910                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1911                 (void) nvlist_lookup_uint64_array(nvroot,
1912                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1913                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1914                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1915                 else
1916                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1917         } else if (errno == ENOENT) {
1918                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1919         } else {
1920                 return (zpool_standard_error(hdl, errno, msg));
1921         }
1922 }
1923
1924 /*
1925  * Find a vdev that matches the search criteria specified. We use the
1926  * the nvpair name to determine how we should look for the device.
1927  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1928  * spare; but FALSE if its an INUSE spare.
1929  */
1930 static nvlist_t *
1931 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1932     boolean_t *l2cache, boolean_t *log)
1933 {
1934         uint_t c, children;
1935         nvlist_t **child;
1936         nvlist_t *ret;
1937         uint64_t is_log;
1938         char *srchkey;
1939         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1940
1941         /* Nothing to look for */
1942         if (search == NULL || pair == NULL)
1943                 return (NULL);
1944
1945         /* Obtain the key we will use to search */
1946         srchkey = nvpair_name(pair);
1947
1948         switch (nvpair_type(pair)) {
1949         case DATA_TYPE_UINT64:
1950                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1951                         uint64_t srchval, theguid;
1952
1953                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1954                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1955                             &theguid) == 0);
1956                         if (theguid == srchval)
1957                                 return (nv);
1958                 }
1959                 break;
1960
1961         case DATA_TYPE_STRING: {
1962                 char *srchval, *val;
1963
1964                 verify(nvpair_value_string(pair, &srchval) == 0);
1965                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1966                         break;
1967
1968                 /*
1969                  * Search for the requested value. Special cases:
1970                  *
1971                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
1972                  *   "-part1", or "p1".  The suffix is hidden from the user,
1973                  *   but included in the string, so this matches around it.
1974                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
1975                  *   is used to check all possible expanded paths.
1976                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1977                  *
1978                  * Otherwise, all other searches are simple string compares.
1979                  */
1980                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
1981                         uint64_t wholedisk = 0;
1982
1983                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1984                             &wholedisk);
1985                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
1986                                 return (nv);
1987
1988                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1989                         char *type, *idx, *end, *p;
1990                         uint64_t id, vdev_id;
1991
1992                         /*
1993                          * Determine our vdev type, keeping in mind
1994                          * that the srchval is composed of a type and
1995                          * vdev id pair (i.e. mirror-4).
1996                          */
1997                         if ((type = strdup(srchval)) == NULL)
1998                                 return (NULL);
1999
2000                         if ((p = strrchr(type, '-')) == NULL) {
2001                                 free(type);
2002                                 break;
2003                         }
2004                         idx = p + 1;
2005                         *p = '\0';
2006
2007                         /*
2008                          * If the types don't match then keep looking.
2009                          */
2010                         if (strncmp(val, type, strlen(val)) != 0) {
2011                                 free(type);
2012                                 break;
2013                         }
2014
2015                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
2016                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2017                             strncmp(type, VDEV_TYPE_MIRROR,
2018                             strlen(VDEV_TYPE_MIRROR)) == 0);
2019                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2020                             &id) == 0);
2021
2022                         errno = 0;
2023                         vdev_id = strtoull(idx, &end, 10);
2024
2025                         free(type);
2026                         if (errno != 0)
2027                                 return (NULL);
2028
2029                         /*
2030                          * Now verify that we have the correct vdev id.
2031                          */
2032                         if (vdev_id == id)
2033                                 return (nv);
2034                 }
2035
2036                 /*
2037                  * Common case
2038                  */
2039                 if (strcmp(srchval, val) == 0)
2040                         return (nv);
2041                 break;
2042         }
2043
2044         default:
2045                 break;
2046         }
2047
2048         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2049             &child, &children) != 0)
2050                 return (NULL);
2051
2052         for (c = 0; c < children; c++) {
2053                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2054                     avail_spare, l2cache, NULL)) != NULL) {
2055                         /*
2056                          * The 'is_log' value is only set for the toplevel
2057                          * vdev, not the leaf vdevs.  So we always lookup the
2058                          * log device from the root of the vdev tree (where
2059                          * 'log' is non-NULL).
2060                          */
2061                         if (log != NULL &&
2062                             nvlist_lookup_uint64(child[c],
2063                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2064                             is_log) {
2065                                 *log = B_TRUE;
2066                         }
2067                         return (ret);
2068                 }
2069         }
2070
2071         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2072             &child, &children) == 0) {
2073                 for (c = 0; c < children; c++) {
2074                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2075                             avail_spare, l2cache, NULL)) != NULL) {
2076                                 *avail_spare = B_TRUE;
2077                                 return (ret);
2078                         }
2079                 }
2080         }
2081
2082         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2083             &child, &children) == 0) {
2084                 for (c = 0; c < children; c++) {
2085                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2086                             avail_spare, l2cache, NULL)) != NULL) {
2087                                 *l2cache = B_TRUE;
2088                                 return (ret);
2089                         }
2090                 }
2091         }
2092
2093         return (NULL);
2094 }
2095
2096 /*
2097  * Given a physical path (minus the "/devices" prefix), find the
2098  * associated vdev.
2099  */
2100 nvlist_t *
2101 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2102     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2103 {
2104         nvlist_t *search, *nvroot, *ret;
2105
2106         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2107         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2108
2109         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2110             &nvroot) == 0);
2111
2112         *avail_spare = B_FALSE;
2113         *l2cache = B_FALSE;
2114         if (log != NULL)
2115                 *log = B_FALSE;
2116         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2117         nvlist_free(search);
2118
2119         return (ret);
2120 }
2121
2122 /*
2123  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2124  */
2125 boolean_t
2126 zpool_vdev_is_interior(const char *name)
2127 {
2128         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2129             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2130                 return (B_TRUE);
2131         return (B_FALSE);
2132 }
2133
2134 nvlist_t *
2135 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2136     boolean_t *l2cache, boolean_t *log)
2137 {
2138         char *end;
2139         nvlist_t *nvroot, *search, *ret;
2140         uint64_t guid;
2141
2142         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2143
2144         guid = strtoull(path, &end, 10);
2145         if (guid != 0 && *end == '\0') {
2146                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2147         } else if (zpool_vdev_is_interior(path)) {
2148                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2149         } else {
2150                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2151         }
2152
2153         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2154             &nvroot) == 0);
2155
2156         *avail_spare = B_FALSE;
2157         *l2cache = B_FALSE;
2158         if (log != NULL)
2159                 *log = B_FALSE;
2160         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2161         nvlist_free(search);
2162
2163         return (ret);
2164 }
2165
2166 static int
2167 vdev_online(nvlist_t *nv)
2168 {
2169         uint64_t ival;
2170
2171         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2172             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2173             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2174                 return (0);
2175
2176         return (1);
2177 }
2178
2179 /*
2180  * Helper function for zpool_get_physpaths().
2181  */
2182 static int
2183 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2184     size_t *bytes_written)
2185 {
2186         size_t bytes_left, pos, rsz;
2187         char *tmppath;
2188         const char *format;
2189
2190         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2191             &tmppath) != 0)
2192                 return (EZFS_NODEVICE);
2193
2194         pos = *bytes_written;
2195         bytes_left = physpath_size - pos;
2196         format = (pos == 0) ? "%s" : " %s";
2197
2198         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2199         *bytes_written += rsz;
2200
2201         if (rsz >= bytes_left) {
2202                 /* if physpath was not copied properly, clear it */
2203                 if (bytes_left != 0) {
2204                         physpath[pos] = 0;
2205                 }
2206                 return (EZFS_NOSPC);
2207         }
2208         return (0);
2209 }
2210
2211 static int
2212 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2213     size_t *rsz, boolean_t is_spare)
2214 {
2215         char *type;
2216         int ret;
2217
2218         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2219                 return (EZFS_INVALCONFIG);
2220
2221         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2222                 /*
2223                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2224                  * For a spare vdev, we only want to boot from the active
2225                  * spare device.
2226                  */
2227                 if (is_spare) {
2228                         uint64_t spare = 0;
2229                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2230                             &spare);
2231                         if (!spare)
2232                                 return (EZFS_INVALCONFIG);
2233                 }
2234
2235                 if (vdev_online(nv)) {
2236                         if ((ret = vdev_get_one_physpath(nv, physpath,
2237                             phypath_size, rsz)) != 0)
2238                                 return (ret);
2239                 }
2240         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2241             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2242             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2243                 nvlist_t **child;
2244                 uint_t count;
2245                 int i, ret;
2246
2247                 if (nvlist_lookup_nvlist_array(nv,
2248                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2249                         return (EZFS_INVALCONFIG);
2250
2251                 for (i = 0; i < count; i++) {
2252                         ret = vdev_get_physpaths(child[i], physpath,
2253                             phypath_size, rsz, is_spare);
2254                         if (ret == EZFS_NOSPC)
2255                                 return (ret);
2256                 }
2257         }
2258
2259         return (EZFS_POOL_INVALARG);
2260 }
2261
2262 /*
2263  * Get phys_path for a root pool config.
2264  * Return 0 on success; non-zero on failure.
2265  */
2266 static int
2267 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2268 {
2269         size_t rsz;
2270         nvlist_t *vdev_root;
2271         nvlist_t **child;
2272         uint_t count;
2273         char *type;
2274
2275         rsz = 0;
2276
2277         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2278             &vdev_root) != 0)
2279                 return (EZFS_INVALCONFIG);
2280
2281         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2282             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2283             &child, &count) != 0)
2284                 return (EZFS_INVALCONFIG);
2285
2286 #if defined(__sun__) || defined(__sun)
2287         /*
2288          * root pool can not have EFI labeled disks and can only have
2289          * a single top-level vdev.
2290          */
2291         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2292             pool_uses_efi(vdev_root))
2293                 return (EZFS_POOL_INVALARG);
2294 #endif
2295
2296         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2297             B_FALSE);
2298
2299         /* No online devices */
2300         if (rsz == 0)
2301                 return (EZFS_NODEVICE);
2302
2303         return (0);
2304 }
2305
2306 /*
2307  * Get phys_path for a root pool
2308  * Return 0 on success; non-zero on failure.
2309  */
2310 int
2311 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2312 {
2313         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2314             phypath_size));
2315 }
2316
2317 /*
2318  * If the device has being dynamically expanded then we need to relabel
2319  * the disk to use the new unallocated space.
2320  */
2321 static int
2322 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2323 {
2324         int fd, error;
2325
2326         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2327                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2328                     "relabel '%s': unable to open device: %d"), path, errno);
2329                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2330         }
2331
2332         /*
2333          * It's possible that we might encounter an error if the device
2334          * does not have any unallocated space left. If so, we simply
2335          * ignore that error and continue on.
2336          *
2337          * Also, we don't call efi_rescan() - that would just return EBUSY.
2338          * The module will do it for us in vdev_disk_open().
2339          */
2340         error = efi_use_whole_disk(fd);
2341         (void) close(fd);
2342         if (error && error != VT_ENOSPC) {
2343                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2344                     "relabel '%s': unable to read disk capacity"), path);
2345                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2346         }
2347         return (0);
2348 }
2349
2350 /*
2351  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2352  * ZFS_ONLINE_* flags.
2353  */
2354 int
2355 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2356     vdev_state_t *newstate)
2357 {
2358         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2359         char msg[1024];
2360         nvlist_t *tgt;
2361         boolean_t avail_spare, l2cache, islog;
2362         libzfs_handle_t *hdl = zhp->zpool_hdl;
2363         int error;
2364
2365         if (flags & ZFS_ONLINE_EXPAND) {
2366                 (void) snprintf(msg, sizeof (msg),
2367                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2368         } else {
2369                 (void) snprintf(msg, sizeof (msg),
2370                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2371         }
2372
2373         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2374         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2375             &islog)) == NULL)
2376                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2377
2378         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2379
2380         if (avail_spare)
2381                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2382
2383         if (flags & ZFS_ONLINE_EXPAND ||
2384             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2385                 uint64_t wholedisk = 0;
2386
2387                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2388                     &wholedisk);
2389
2390                 /*
2391                  * XXX - L2ARC 1.0 devices can't support expansion.
2392                  */
2393                 if (l2cache) {
2394                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2395                             "cannot expand cache devices"));
2396                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2397                 }
2398
2399                 if (wholedisk) {
2400                         const char *fullpath = path;
2401                         char buf[MAXPATHLEN];
2402
2403                         if (path[0] != '/') {
2404                                 error = zfs_resolve_shortname(path, buf,
2405                                     sizeof(buf));
2406                                 if (error != 0)
2407                                         return (zfs_error(hdl, EZFS_NODEVICE,
2408                                             msg));
2409
2410                                 fullpath = buf;
2411                         }
2412
2413                         error = zpool_relabel_disk(hdl, fullpath, msg);
2414                         if (error != 0)
2415                                 return (error);
2416                 }
2417         }
2418
2419         zc.zc_cookie = VDEV_STATE_ONLINE;
2420         zc.zc_obj = flags;
2421
2422         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2423                 if (errno == EINVAL) {
2424                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2425                             "from this pool into a new one.  Use '%s' "
2426                             "instead"), "zpool detach");
2427                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2428                 }
2429                 return (zpool_standard_error(hdl, errno, msg));
2430         }
2431
2432         *newstate = zc.zc_cookie;
2433         return (0);
2434 }
2435
2436 /*
2437  * Take the specified vdev offline
2438  */
2439 int
2440 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2441 {
2442         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2443         char msg[1024];
2444         nvlist_t *tgt;
2445         boolean_t avail_spare, l2cache;
2446         libzfs_handle_t *hdl = zhp->zpool_hdl;
2447
2448         (void) snprintf(msg, sizeof (msg),
2449             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2450
2451         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2452         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2453             NULL)) == NULL)
2454                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2455
2456         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2457
2458         if (avail_spare)
2459                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2460
2461         zc.zc_cookie = VDEV_STATE_OFFLINE;
2462         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2463
2464         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2465                 return (0);
2466
2467         switch (errno) {
2468         case EBUSY:
2469
2470                 /*
2471                  * There are no other replicas of this device.
2472                  */
2473                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2474
2475         case EEXIST:
2476                 /*
2477                  * The log device has unplayed logs
2478                  */
2479                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2480
2481         default:
2482                 return (zpool_standard_error(hdl, errno, msg));
2483         }
2484 }
2485
2486 /*
2487  * Mark the given vdev faulted.
2488  */
2489 int
2490 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2491 {
2492         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2493         char msg[1024];
2494         libzfs_handle_t *hdl = zhp->zpool_hdl;
2495
2496         (void) snprintf(msg, sizeof (msg),
2497            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2498
2499         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2500         zc.zc_guid = guid;
2501         zc.zc_cookie = VDEV_STATE_FAULTED;
2502         zc.zc_obj = aux;
2503
2504         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2505                 return (0);
2506
2507         switch (errno) {
2508         case EBUSY:
2509
2510                 /*
2511                  * There are no other replicas of this device.
2512                  */
2513                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2514
2515         default:
2516                 return (zpool_standard_error(hdl, errno, msg));
2517         }
2518
2519 }
2520
2521 /*
2522  * Mark the given vdev degraded.
2523  */
2524 int
2525 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2526 {
2527         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2528         char msg[1024];
2529         libzfs_handle_t *hdl = zhp->zpool_hdl;
2530
2531         (void) snprintf(msg, sizeof (msg),
2532            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2533
2534         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2535         zc.zc_guid = guid;
2536         zc.zc_cookie = VDEV_STATE_DEGRADED;
2537         zc.zc_obj = aux;
2538
2539         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2540                 return (0);
2541
2542         return (zpool_standard_error(hdl, errno, msg));
2543 }
2544
2545 /*
2546  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2547  * a hot spare.
2548  */
2549 static boolean_t
2550 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2551 {
2552         nvlist_t **child;
2553         uint_t c, children;
2554         char *type;
2555
2556         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2557             &children) == 0) {
2558                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2559                     &type) == 0);
2560
2561                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2562                     children == 2 && child[which] == tgt)
2563                         return (B_TRUE);
2564
2565                 for (c = 0; c < children; c++)
2566                         if (is_replacing_spare(child[c], tgt, which))
2567                                 return (B_TRUE);
2568         }
2569
2570         return (B_FALSE);
2571 }
2572
2573 /*
2574  * Attach new_disk (fully described by nvroot) to old_disk.
2575  * If 'replacing' is specified, the new disk will replace the old one.
2576  */
2577 int
2578 zpool_vdev_attach(zpool_handle_t *zhp,
2579     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2580 {
2581         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2582         char msg[1024];
2583         int ret;
2584         nvlist_t *tgt;
2585         boolean_t avail_spare, l2cache, islog;
2586         uint64_t val;
2587         char *newname;
2588         nvlist_t **child;
2589         uint_t children;
2590         nvlist_t *config_root;
2591         libzfs_handle_t *hdl = zhp->zpool_hdl;
2592         boolean_t rootpool = zpool_is_bootable(zhp);
2593
2594         if (replacing)
2595                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2596                     "cannot replace %s with %s"), old_disk, new_disk);
2597         else
2598                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2599                     "cannot attach %s to %s"), new_disk, old_disk);
2600
2601 #if defined(__sun__) || defined(__sun)
2602         /*
2603          * If this is a root pool, make sure that we're not attaching an
2604          * EFI labeled device.
2605          */
2606         if (rootpool && pool_uses_efi(nvroot)) {
2607                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2608                     "EFI labeled devices are not supported on root pools."));
2609                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2610         }
2611 #endif
2612
2613         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2614         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2615             &islog)) == 0)
2616                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2617
2618         if (avail_spare)
2619                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2620
2621         if (l2cache)
2622                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2623
2624         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2625         zc.zc_cookie = replacing;
2626
2627         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2628             &child, &children) != 0 || children != 1) {
2629                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2630                     "new device must be a single disk"));
2631                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2632         }
2633
2634         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2635             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2636
2637         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2638                 return (-1);
2639
2640         /*
2641          * If the target is a hot spare that has been swapped in, we can only
2642          * replace it with another hot spare.
2643          */
2644         if (replacing &&
2645             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2646             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2647             NULL) == NULL || !avail_spare) &&
2648             is_replacing_spare(config_root, tgt, 1)) {
2649                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2650                     "can only be replaced by another hot spare"));
2651                 free(newname);
2652                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2653         }
2654
2655         free(newname);
2656
2657         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2658                 return (-1);
2659
2660         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2661
2662         zcmd_free_nvlists(&zc);
2663
2664         if (ret == 0) {
2665                 if (rootpool) {
2666                         /*
2667                          * XXX need a better way to prevent user from
2668                          * booting up a half-baked vdev.
2669                          */
2670                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2671                             "sure to wait until resilver is done "
2672                             "before rebooting.\n"));
2673                 }
2674                 return (0);
2675         }
2676
2677         switch (errno) {
2678         case ENOTSUP:
2679                 /*
2680                  * Can't attach to or replace this type of vdev.
2681                  */
2682                 if (replacing) {
2683                         uint64_t version = zpool_get_prop_int(zhp,
2684                             ZPOOL_PROP_VERSION, NULL);
2685
2686                         if (islog)
2687                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2688                                     "cannot replace a log with a spare"));
2689                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2690                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2691                                     "already in replacing/spare config; wait "
2692                                     "for completion or use 'zpool detach'"));
2693                         else
2694                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2695                                     "cannot replace a replacing device"));
2696                 } else {
2697                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2698                             "can only attach to mirrors and top-level "
2699                             "disks"));
2700                 }
2701                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2702                 break;
2703
2704         case EINVAL:
2705                 /*
2706                  * The new device must be a single disk.
2707                  */
2708                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2709                     "new device must be a single disk"));
2710                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2711                 break;
2712
2713         case EBUSY:
2714                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2715                     new_disk);
2716                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2717                 break;
2718
2719         case EOVERFLOW:
2720                 /*
2721                  * The new device is too small.
2722                  */
2723                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2724                     "device is too small"));
2725                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2726                 break;
2727
2728         case EDOM:
2729                 /*
2730                  * The new device has a different alignment requirement.
2731                  */
2732                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2733                     "devices have different sector alignment"));
2734                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2735                 break;
2736
2737         case ENAMETOOLONG:
2738                 /*
2739                  * The resulting top-level vdev spec won't fit in the label.
2740                  */
2741                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2742                 break;
2743
2744         default:
2745                 (void) zpool_standard_error(hdl, errno, msg);
2746         }
2747
2748         return (-1);
2749 }
2750
2751 /*
2752  * Detach the specified device.
2753  */
2754 int
2755 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2756 {
2757         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2758         char msg[1024];
2759         nvlist_t *tgt;
2760         boolean_t avail_spare, l2cache;
2761         libzfs_handle_t *hdl = zhp->zpool_hdl;
2762
2763         (void) snprintf(msg, sizeof (msg),
2764             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2765
2766         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2767         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2768             NULL)) == 0)
2769                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2770
2771         if (avail_spare)
2772                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2773
2774         if (l2cache)
2775                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2776
2777         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2778
2779         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2780                 return (0);
2781
2782         switch (errno) {
2783
2784         case ENOTSUP:
2785                 /*
2786                  * Can't detach from this type of vdev.
2787                  */
2788                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2789                     "applicable to mirror and replacing vdevs"));
2790                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2791                 break;
2792
2793         case EBUSY:
2794                 /*
2795                  * There are no other replicas of this device.
2796                  */
2797                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2798                 break;
2799
2800         default:
2801                 (void) zpool_standard_error(hdl, errno, msg);
2802         }
2803
2804         return (-1);
2805 }
2806
2807 /*
2808  * Find a mirror vdev in the source nvlist.
2809  *
2810  * The mchild array contains a list of disks in one of the top-level mirrors
2811  * of the source pool.  The schild array contains a list of disks that the
2812  * user specified on the command line.  We loop over the mchild array to
2813  * see if any entry in the schild array matches.
2814  *
2815  * If a disk in the mchild array is found in the schild array, we return
2816  * the index of that entry.  Otherwise we return -1.
2817  */
2818 static int
2819 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2820     nvlist_t **schild, uint_t schildren)
2821 {
2822         uint_t mc;
2823
2824         for (mc = 0; mc < mchildren; mc++) {
2825                 uint_t sc;
2826                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2827                     mchild[mc], B_FALSE);
2828
2829                 for (sc = 0; sc < schildren; sc++) {
2830                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2831                             schild[sc], B_FALSE);
2832                         boolean_t result = (strcmp(mpath, spath) == 0);
2833
2834                         free(spath);
2835                         if (result) {
2836                                 free(mpath);
2837                                 return (mc);
2838                         }
2839                 }
2840
2841                 free(mpath);
2842         }
2843
2844         return (-1);
2845 }
2846
2847 /*
2848  * Split a mirror pool.  If newroot points to null, then a new nvlist
2849  * is generated and it is the responsibility of the caller to free it.
2850  */
2851 int
2852 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2853     nvlist_t *props, splitflags_t flags)
2854 {
2855         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2856         char msg[1024];
2857         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2858         nvlist_t **varray = NULL, *zc_props = NULL;
2859         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2860         libzfs_handle_t *hdl = zhp->zpool_hdl;
2861         uint64_t vers;
2862         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2863         int retval = 0;
2864
2865         (void) snprintf(msg, sizeof (msg),
2866             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2867
2868         if (!zpool_name_valid(hdl, B_FALSE, newname))
2869                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2870
2871         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2872                 (void) fprintf(stderr, gettext("Internal error: unable to "
2873                     "retrieve pool configuration\n"));
2874                 return (-1);
2875         }
2876
2877         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2878             == 0);
2879         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2880
2881         if (props) {
2882                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2883                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2884                     props, vers, flags, msg)) == NULL)
2885                         return (-1);
2886         }
2887
2888         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2889             &children) != 0) {
2890                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2891                     "Source pool is missing vdev tree"));
2892                 if (zc_props)
2893                         nvlist_free(zc_props);
2894                 return (-1);
2895         }
2896
2897         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2898         vcount = 0;
2899
2900         if (*newroot == NULL ||
2901             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2902             &newchild, &newchildren) != 0)
2903                 newchildren = 0;
2904
2905         for (c = 0; c < children; c++) {
2906                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2907                 char *type;
2908                 nvlist_t **mchild, *vdev;
2909                 uint_t mchildren;
2910                 int entry;
2911
2912                 /*
2913                  * Unlike cache & spares, slogs are stored in the
2914                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2915                  */
2916                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2917                     &is_log);
2918                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2919                     &is_hole);
2920                 if (is_log || is_hole) {
2921                         /*
2922                          * Create a hole vdev and put it in the config.
2923                          */
2924                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2925                                 goto out;
2926                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2927                             VDEV_TYPE_HOLE) != 0)
2928                                 goto out;
2929                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2930                             1) != 0)
2931                                 goto out;
2932                         if (lastlog == 0)
2933                                 lastlog = vcount;
2934                         varray[vcount++] = vdev;
2935                         continue;
2936                 }
2937                 lastlog = 0;
2938                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2939                     == 0);
2940                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2941                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2942                             "Source pool must be composed only of mirrors\n"));
2943                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2944                         goto out;
2945                 }
2946
2947                 verify(nvlist_lookup_nvlist_array(child[c],
2948                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2949
2950                 /* find or add an entry for this top-level vdev */
2951                 if (newchildren > 0 &&
2952                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2953                     newchild, newchildren)) >= 0) {
2954                         /* We found a disk that the user specified. */
2955                         vdev = mchild[entry];
2956                         ++found;
2957                 } else {
2958                         /* User didn't specify a disk for this vdev. */
2959                         vdev = mchild[mchildren - 1];
2960                 }
2961
2962                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2963                         goto out;
2964         }
2965
2966         /* did we find every disk the user specified? */
2967         if (found != newchildren) {
2968                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2969                     "include at most one disk from each mirror"));
2970                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2971                 goto out;
2972         }
2973
2974         /* Prepare the nvlist for populating. */
2975         if (*newroot == NULL) {
2976                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2977                         goto out;
2978                 freelist = B_TRUE;
2979                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2980                     VDEV_TYPE_ROOT) != 0)
2981                         goto out;
2982         } else {
2983                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2984         }
2985
2986         /* Add all the children we found */
2987         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2988             lastlog == 0 ? vcount : lastlog) != 0)
2989                 goto out;
2990
2991         /*
2992          * If we're just doing a dry run, exit now with success.
2993          */
2994         if (flags.dryrun) {
2995                 memory_err = B_FALSE;
2996                 freelist = B_FALSE;
2997                 goto out;
2998         }
2999
3000         /* now build up the config list & call the ioctl */
3001         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3002                 goto out;
3003
3004         if (nvlist_add_nvlist(newconfig,
3005             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3006             nvlist_add_string(newconfig,
3007             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3008             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3009                 goto out;
3010
3011         /*
3012          * The new pool is automatically part of the namespace unless we
3013          * explicitly export it.
3014          */
3015         if (!flags.import)
3016                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3017         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3018         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3019         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3020                 goto out;
3021         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3022                 goto out;
3023
3024         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3025                 retval = zpool_standard_error(hdl, errno, msg);
3026                 goto out;
3027         }
3028
3029         freelist = B_FALSE;
3030         memory_err = B_FALSE;
3031
3032 out:
3033         if (varray != NULL) {
3034                 int v;
3035
3036                 for (v = 0; v < vcount; v++)
3037                         nvlist_free(varray[v]);
3038                 free(varray);
3039         }
3040         zcmd_free_nvlists(&zc);
3041         if (zc_props)
3042                 nvlist_free(zc_props);
3043         if (newconfig)
3044                 nvlist_free(newconfig);
3045         if (freelist) {
3046                 nvlist_free(*newroot);
3047                 *newroot = NULL;
3048         }
3049
3050         if (retval != 0)
3051                 return (retval);
3052
3053         if (memory_err)
3054                 return (no_memory(hdl));
3055
3056         return (0);
3057 }
3058
3059 /*
3060  * Remove the given device.  Currently, this is supported only for hot spares
3061  * and level 2 cache devices.
3062  */
3063 int
3064 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3065 {
3066         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3067         char msg[1024];
3068         nvlist_t *tgt;
3069         boolean_t avail_spare, l2cache, islog;
3070         libzfs_handle_t *hdl = zhp->zpool_hdl;
3071         uint64_t version;
3072
3073         (void) snprintf(msg, sizeof (msg),
3074             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3075
3076         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3077         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3078             &islog)) == 0)
3079                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3080         /*
3081          * XXX - this should just go away.
3082          */
3083         if (!avail_spare && !l2cache && !islog) {
3084                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3085                     "only inactive hot spares, cache, top-level, "
3086                     "or log devices can be removed"));
3087                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3088         }
3089
3090         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3091         if (islog && version < SPA_VERSION_HOLES) {
3092                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3093                     "pool must be upgrade to support log removal"));
3094                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3095         }
3096
3097         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3098
3099         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3100                 return (0);
3101
3102         return (zpool_standard_error(hdl, errno, msg));
3103 }
3104
3105 /*
3106  * Clear the errors for the pool, or the particular device if specified.
3107  */
3108 int
3109 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3110 {
3111         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3112         char msg[1024];
3113         nvlist_t *tgt;
3114         zpool_rewind_policy_t policy;
3115         boolean_t avail_spare, l2cache;
3116         libzfs_handle_t *hdl = zhp->zpool_hdl;
3117         nvlist_t *nvi = NULL;
3118         int error;
3119
3120         if (path)
3121                 (void) snprintf(msg, sizeof (msg),
3122                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3123                     path);
3124         else
3125                 (void) snprintf(msg, sizeof (msg),
3126                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3127                     zhp->zpool_name);
3128
3129         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3130         if (path) {
3131                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3132                     &l2cache, NULL)) == 0)
3133                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3134
3135                 /*
3136                  * Don't allow error clearing for hot spares.  Do allow
3137                  * error clearing for l2cache devices.
3138                  */
3139                 if (avail_spare)
3140                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3141
3142                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3143                     &zc.zc_guid) == 0);
3144         }
3145
3146         zpool_get_rewind_policy(rewindnvl, &policy);
3147         zc.zc_cookie = policy.zrp_request;
3148
3149         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3150                 return (-1);
3151
3152         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3153                 return (-1);
3154
3155         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3156             errno == ENOMEM) {
3157                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3158                         zcmd_free_nvlists(&zc);
3159                         return (-1);
3160                 }
3161         }
3162
3163         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
3164             errno != EPERM && errno != EACCES)) {
3165                 if (policy.zrp_request &
3166                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3167                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3168                         zpool_rewind_exclaim(hdl, zc.zc_name,
3169                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3170                             nvi);
3171                         nvlist_free(nvi);
3172                 }
3173                 zcmd_free_nvlists(&zc);
3174                 return (0);
3175         }
3176
3177         zcmd_free_nvlists(&zc);
3178         return (zpool_standard_error(hdl, errno, msg));
3179 }
3180
3181 /*
3182  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3183  */
3184 int
3185 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3186 {
3187         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3188         char msg[1024];
3189         libzfs_handle_t *hdl = zhp->zpool_hdl;
3190
3191         (void) snprintf(msg, sizeof (msg),
3192             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3193            (u_longlong_t)guid);
3194
3195         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3196         zc.zc_guid = guid;
3197         zc.zc_cookie = ZPOOL_NO_REWIND;
3198
3199         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3200                 return (0);
3201
3202         return (zpool_standard_error(hdl, errno, msg));
3203 }
3204
3205 /*
3206  * Change the GUID for a pool.
3207  */
3208 int
3209 zpool_reguid(zpool_handle_t *zhp)
3210 {
3211         char msg[1024];
3212         libzfs_handle_t *hdl = zhp->zpool_hdl;
3213         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3214
3215         (void) snprintf(msg, sizeof (msg),
3216             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3217
3218         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3219         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3220                 return (0);
3221
3222         return (zpool_standard_error(hdl, errno, msg));
3223 }
3224
3225 /*
3226  * Reopen the pool.
3227  */
3228 int
3229 zpool_reopen(zpool_handle_t *zhp)
3230 {
3231         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3232         char msg[1024];
3233         libzfs_handle_t *hdl = zhp->zpool_hdl;
3234
3235         (void) snprintf(msg, sizeof (msg),
3236             dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3237             zhp->zpool_name);
3238
3239         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3240         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3241                 return (0);
3242         return (zpool_standard_error(hdl, errno, msg));
3243 }
3244
3245 /*
3246  * Convert from a devid string to a path.
3247  */
3248 static char *
3249 devid_to_path(char *devid_str)
3250 {
3251         ddi_devid_t devid;
3252         char *minor;
3253         char *path;
3254         devid_nmlist_t *list = NULL;
3255         int ret;
3256
3257         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3258                 return (NULL);
3259
3260         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3261
3262         devid_str_free(minor);
3263         devid_free(devid);
3264
3265         if (ret != 0)
3266                 return (NULL);
3267
3268         if ((path = strdup(list[0].devname)) == NULL)
3269                 return (NULL);
3270
3271         devid_free_nmlist(list);
3272
3273         return (path);
3274 }
3275
3276 /*
3277  * Convert from a path to a devid string.
3278  */
3279 static char *
3280 path_to_devid(const char *path)
3281 {
3282         int fd;
3283         ddi_devid_t devid;
3284         char *minor, *ret;
3285
3286         if ((fd = open(path, O_RDONLY)) < 0)
3287                 return (NULL);
3288
3289         minor = NULL;
3290         ret = NULL;
3291         if (devid_get(fd, &devid) == 0) {
3292                 if (devid_get_minor_name(fd, &minor) == 0)
3293                         ret = devid_str_encode(devid, minor);
3294                 if (minor != NULL)
3295                         devid_str_free(minor);
3296                 devid_free(devid);
3297         }
3298         (void) close(fd);
3299
3300         return (ret);
3301 }
3302
3303 /*
3304  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3305  * ignore any failure here, since a common case is for an unprivileged user to
3306  * type 'zpool status', and we'll display the correct information anyway.
3307  */
3308 static void
3309 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3310 {
3311         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3312
3313         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3314         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3315         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3316             &zc.zc_guid) == 0);
3317
3318         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3319 }
3320
3321 /*
3322  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3323  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3324  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3325  * third case only occurs when preceded by a string matching the regular
3326  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
3327  */
3328 static char *
3329 strip_partition(libzfs_handle_t *hdl, char *path)
3330 {
3331         char *tmp = zfs_strdup(hdl, path);
3332         char *part = NULL, *d = NULL;
3333
3334         if ((part = strstr(tmp, "-part")) && part != tmp) {
3335                 d = part + 5;
3336         } else if ((part = strrchr(tmp, 'p')) &&
3337             part > tmp + 1 && isdigit(*(part-1))) {
3338                 d = part + 1;
3339         } else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
3340                 for (d = &tmp[2]; isalpha(*d); part = ++d);
3341         }
3342         if (part && d && *d != '\0') {
3343                 for (; isdigit(*d); d++);
3344                 if (*d == '\0')
3345                         *part = '\0';
3346         }
3347         return (tmp);
3348 }
3349
3350 #define PATH_BUF_LEN    64
3351
3352 /*
3353  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3354  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3355  * We also check if this is a whole disk, in which case we strip off the
3356  * trailing 's0' slice name.
3357  *
3358  * This routine is also responsible for identifying when disks have been
3359  * reconfigured in a new location.  The kernel will have opened the device by
3360  * devid, but the path will still refer to the old location.  To catch this, we
3361  * first do a path -> devid translation (which is fast for the common case).  If
3362  * the devid matches, we're done.  If not, we do a reverse devid -> path
3363  * translation and issue the appropriate ioctl() to update the path of the vdev.
3364  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3365  * of these checks.
3366  */
3367 char *
3368 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3369     boolean_t verbose)
3370 {
3371         char *path, *devid, *type;
3372         uint64_t value;
3373         char buf[PATH_BUF_LEN];
3374         char tmpbuf[PATH_BUF_LEN];
3375         vdev_stat_t *vs;
3376         uint_t vsc;
3377
3378         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3379             &value) == 0) {
3380                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3381                     &value) == 0);
3382                 (void) snprintf(buf, sizeof (buf), "%llu",
3383                     (u_longlong_t)value);
3384                 path = buf;
3385         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3386                 /*
3387                  * If the device is dead (faulted, offline, etc) then don't
3388                  * bother opening it.  Otherwise we may be forcing the user to
3389                  * open a misbehaving device, which can have undesirable
3390                  * effects.
3391                  */
3392                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3393                     (uint64_t **)&vs, &vsc) != 0 ||
3394                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3395                     zhp != NULL &&
3396                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3397                         /*
3398                          * Determine if the current path is correct.
3399                          */
3400                         char *newdevid = path_to_devid(path);
3401
3402                         if (newdevid == NULL ||
3403                             strcmp(devid, newdevid) != 0) {
3404                                 char *newpath;
3405
3406                                 if ((newpath = devid_to_path(devid)) != NULL) {
3407                                         /*
3408                                          * Update the path appropriately.
3409                                          */
3410                                         set_path(zhp, nv, newpath);
3411                                         if (nvlist_add_string(nv,
3412                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3413                                                 verify(nvlist_lookup_string(nv,
3414                                                     ZPOOL_CONFIG_PATH,
3415                                                     &path) == 0);
3416                                         free(newpath);
3417                                 }
3418                         }
3419
3420                         if (newdevid)
3421                                 devid_str_free(newdevid);
3422                 }
3423
3424                 /*
3425                  * For a block device only use the name.
3426                  */
3427                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3428                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3429                         path = strrchr(path, '/');
3430                         path++;
3431                 }
3432
3433                 /*
3434                  * Remove the partition from the path it this is a whole disk.
3435                  */
3436                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3437                     &value) == 0 && value) {
3438                         return strip_partition(hdl, path);
3439                 }
3440         } else {
3441                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3442
3443                 /*
3444                  * If it's a raidz device, we need to stick in the parity level.
3445                  */
3446                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3447
3448                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3449                             &value) == 0);
3450                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3451                             (u_longlong_t)value);
3452                         path = buf;
3453                 }
3454
3455                 /*
3456                  * We identify each top-level vdev by using a <type-id>
3457                  * naming convention.
3458                  */
3459                 if (verbose) {
3460                         uint64_t id;
3461
3462                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3463                             &id) == 0);
3464                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3465                             path, (u_longlong_t)id);
3466                         path = tmpbuf;
3467                 }
3468         }
3469
3470         return (zfs_strdup(hdl, path));
3471 }
3472
3473 static int
3474 zbookmark_compare(const void *a, const void *b)
3475 {
3476         return (memcmp(a, b, sizeof (zbookmark_t)));
3477 }
3478
3479 /*
3480  * Retrieve the persistent error log, uniquify the members, and return to the
3481  * caller.
3482  */
3483 int
3484 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3485 {
3486         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3487         uint64_t count;
3488         zbookmark_t *zb = NULL;
3489         int i;
3490
3491         /*
3492          * Retrieve the raw error list from the kernel.  If the number of errors
3493          * has increased, allocate more space and continue until we get the
3494          * entire list.
3495          */
3496         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3497             &count) == 0);
3498         if (count == 0)
3499                 return (0);
3500         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3501             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3502                 return (-1);
3503         zc.zc_nvlist_dst_size = count;
3504         (void) strcpy(zc.zc_name, zhp->zpool_name);
3505         for (;;) {
3506                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3507                     &zc) != 0) {
3508                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3509                         if (errno == ENOMEM) {
3510                                 count = zc.zc_nvlist_dst_size;
3511                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3512                                     zfs_alloc(zhp->zpool_hdl, count *
3513                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3514                                         return (-1);
3515                         } else {
3516                                 return (-1);
3517                         }
3518                 } else {
3519                         break;
3520                 }
3521         }
3522
3523         /*
3524          * Sort the resulting bookmarks.  This is a little confusing due to the
3525          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3526          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3527          * _not_ copied as part of the process.  So we point the start of our
3528          * array appropriate and decrement the total number of elements.
3529          */
3530         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3531             zc.zc_nvlist_dst_size;
3532         count -= zc.zc_nvlist_dst_size;
3533
3534         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3535
3536         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3537
3538         /*
3539          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3540          */
3541         for (i = 0; i < count; i++) {
3542                 nvlist_t *nv;
3543
3544                 /* ignoring zb_blkid and zb_level for now */
3545                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3546                     zb[i-1].zb_object == zb[i].zb_object)
3547                         continue;
3548
3549                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3550                         goto nomem;
3551                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3552                     zb[i].zb_objset) != 0) {
3553                         nvlist_free(nv);
3554                         goto nomem;
3555                 }
3556                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3557                     zb[i].zb_object) != 0) {
3558                         nvlist_free(nv);
3559                         goto nomem;
3560                 }
3561                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3562                         nvlist_free(nv);
3563                         goto nomem;
3564                 }
3565                 nvlist_free(nv);
3566         }
3567
3568         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3569         return (0);
3570
3571 nomem:
3572         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3573         return (no_memory(zhp->zpool_hdl));
3574 }
3575
3576 /*
3577  * Upgrade a ZFS pool to the latest on-disk version.
3578  */
3579 int
3580 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3581 {
3582         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3583         libzfs_handle_t *hdl = zhp->zpool_hdl;
3584
3585         (void) strcpy(zc.zc_name, zhp->zpool_name);
3586         zc.zc_cookie = new_version;
3587
3588         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3589                 return (zpool_standard_error_fmt(hdl, errno,
3590                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3591                     zhp->zpool_name));
3592         return (0);
3593 }
3594
3595 void
3596 zpool_set_history_str(const char *subcommand, int argc, char **argv,
3597     char *history_str)
3598 {
3599         int i;
3600
3601         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3602         for (i = 1; i < argc; i++) {
3603                 if (strlen(history_str) + 1 + strlen(argv[i]) >
3604                     HIS_MAX_RECORD_LEN)
3605                         break;
3606                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3607                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3608         }
3609 }
3610
3611 /*
3612  * Stage command history for logging.
3613  */
3614 int
3615 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3616 {
3617         if (history_str == NULL)
3618                 return (EINVAL);
3619
3620         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3621                 return (EINVAL);
3622
3623         if (hdl->libzfs_log_str != NULL)
3624                 free(hdl->libzfs_log_str);
3625
3626         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3627                 return (no_memory(hdl));
3628
3629         return (0);
3630 }
3631
3632 /*
3633  * Perform ioctl to get some command history of a pool.
3634  *
3635  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3636  * logical offset of the history buffer to start reading from.
3637  *
3638  * Upon return, 'off' is the next logical offset to read from and
3639  * 'len' is the actual amount of bytes read into 'buf'.
3640  */
3641 static int
3642 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3643 {
3644         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3645         libzfs_handle_t *hdl = zhp->zpool_hdl;
3646
3647         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3648
3649         zc.zc_history = (uint64_t)(uintptr_t)buf;
3650         zc.zc_history_len = *len;
3651         zc.zc_history_offset = *off;
3652
3653         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3654                 switch (errno) {
3655                 case EPERM:
3656                         return (zfs_error_fmt(hdl, EZFS_PERM,
3657                             dgettext(TEXT_DOMAIN,
3658                             "cannot show history for pool '%s'"),
3659                             zhp->zpool_name));
3660                 case ENOENT:
3661                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3662                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3663                             "'%s'"), zhp->zpool_name));
3664                 case ENOTSUP:
3665                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3666                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3667                             "'%s', pool must be upgraded"), zhp->zpool_name));
3668                 default:
3669                         return (zpool_standard_error_fmt(hdl, errno,
3670                             dgettext(TEXT_DOMAIN,
3671                             "cannot get history for '%s'"), zhp->zpool_name));
3672                 }
3673         }
3674
3675         *len = zc.zc_history_len;
3676         *off = zc.zc_history_offset;
3677
3678         return (0);
3679 }
3680
3681 /*
3682  * Process the buffer of nvlists, unpacking and storing each nvlist record
3683  * into 'records'.  'leftover' is set to the number of bytes that weren't
3684  * processed as there wasn't a complete record.
3685  */
3686 int
3687 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3688     nvlist_t ***records, uint_t *numrecords)
3689 {
3690         uint64_t reclen;
3691         nvlist_t *nv;
3692         int i;
3693
3694         while (bytes_read > sizeof (reclen)) {
3695
3696                 /* get length of packed record (stored as little endian) */
3697                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3698                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3699
3700                 if (bytes_read < sizeof (reclen) + reclen)
3701                         break;
3702
3703                 /* unpack record */
3704                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3705                         return (ENOMEM);
3706                 bytes_read -= sizeof (reclen) + reclen;
3707                 buf += sizeof (reclen) + reclen;
3708
3709                 /* add record to nvlist array */
3710                 (*numrecords)++;
3711                 if (ISP2(*numrecords + 1)) {
3712                         *records = realloc(*records,
3713                             *numrecords * 2 * sizeof (nvlist_t *));
3714                 }
3715                 (*records)[*numrecords - 1] = nv;
3716         }
3717
3718         *leftover = bytes_read;
3719         return (0);
3720 }
3721
3722 #define HIS_BUF_LEN     (128*1024)
3723
3724 /*
3725  * Retrieve the command history of a pool.
3726  */
3727 int
3728 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3729 {
3730         char buf[HIS_BUF_LEN];
3731         uint64_t off = 0;
3732         nvlist_t **records = NULL;
3733         uint_t numrecords = 0;
3734         int err, i;
3735
3736         do {
3737                 uint64_t bytes_read = sizeof (buf);
3738                 uint64_t leftover;
3739
3740                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3741                         break;
3742
3743                 /* if nothing else was read in, we're at EOF, just return */
3744                 if (!bytes_read)
3745                         break;
3746
3747                 if ((err = zpool_history_unpack(buf, bytes_read,
3748                     &leftover, &records, &numrecords)) != 0)
3749                         break;
3750                 off -= leftover;
3751
3752                 /* CONSTCOND */
3753         } while (1);
3754
3755         if (!err) {
3756                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3757                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3758                     records, numrecords) == 0);
3759         }
3760         for (i = 0; i < numrecords; i++)
3761                 nvlist_free(records[i]);
3762         free(records);
3763
3764         return (err);
3765 }
3766
3767 /*
3768  * Retrieve the next event.  If there is a new event available 'nvp' will
3769  * contain a newly allocated nvlist and 'dropped' will be set to the number
3770  * of missed events since the last call to this function.  When 'nvp' is
3771  * set to NULL it indicates no new events are available.  In either case
3772  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3773  * the case of a fatal error the function will return a non-zero value.
3774  * When the function is called in blocking mode it will not return until
3775  * a new event is available.
3776  */
3777 int
3778 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3779     int *dropped, int block, int cleanup_fd)
3780 {
3781         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3782         int error = 0;
3783
3784         *nvp = NULL;
3785         *dropped = 0;
3786         zc.zc_cleanup_fd = cleanup_fd;
3787
3788         if (!block)
3789                 zc.zc_guid = ZEVENT_NONBLOCK;
3790
3791         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3792                 return (-1);
3793
3794 retry:
3795         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3796                 switch (errno) {
3797                 case ESHUTDOWN:
3798                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3799                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3800                         goto out;
3801                 case ENOENT:
3802                         /* Blocking error case should not occur */
3803                         if (block)
3804                                 error = zpool_standard_error_fmt(hdl, errno,
3805                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3806
3807                         goto out;
3808                 case ENOMEM:
3809                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3810                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3811                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3812                                 goto out;
3813                         } else {
3814                                 goto retry;
3815                         }
3816                 default:
3817                         error = zpool_standard_error_fmt(hdl, errno,
3818                             dgettext(TEXT_DOMAIN, "cannot get event"));
3819                         goto out;
3820                 }
3821         }
3822
3823         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3824         if (error != 0)
3825                 goto out;
3826
3827         *dropped = (int)zc.zc_cookie;
3828 out:
3829         zcmd_free_nvlists(&zc);
3830
3831         return (error);
3832 }
3833
3834 /*
3835  * Clear all events.
3836  */
3837 int
3838 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3839 {
3840         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3841         char msg[1024];
3842
3843         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3844             "cannot clear events"));
3845
3846         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3847                 return (zpool_standard_error_fmt(hdl, errno, msg));
3848
3849         if (count != NULL)
3850                 *count = (int)zc.zc_cookie; /* # of events cleared */
3851
3852         return (0);
3853 }
3854
3855 void
3856 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3857     char *pathname, size_t len)
3858 {
3859         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3860         boolean_t mounted = B_FALSE;
3861         char *mntpnt = NULL;
3862         char dsname[MAXNAMELEN];
3863
3864         if (dsobj == 0) {
3865                 /* special case for the MOS */
3866                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
3867                 return;
3868         }
3869
3870         /* get the dataset's name */
3871         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3872         zc.zc_obj = dsobj;
3873         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3874             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3875                 /* just write out a path of two object numbers */
3876                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3877                     (longlong_t)dsobj, (longlong_t)obj);
3878                 return;
3879         }
3880         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3881
3882         /* find out if the dataset is mounted */
3883         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3884
3885         /* get the corrupted object's path */
3886         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3887         zc.zc_obj = obj;
3888         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3889             &zc) == 0) {
3890                 if (mounted) {
3891                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3892                             zc.zc_value);
3893                 } else {
3894                         (void) snprintf(pathname, len, "%s:%s",
3895                             dsname, zc.zc_value);
3896                 }
3897         } else {
3898                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
3899         }
3900         free(mntpnt);
3901 }
3902
3903 /*
3904  * Read the EFI label from the config, if a label does not exist then
3905  * pass back the error to the caller. If the caller has passed a non-NULL
3906  * diskaddr argument then we set it to the starting address of the EFI
3907  * partition.
3908  */
3909 static int
3910 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3911 {
3912         char *path;
3913         int fd;
3914         char diskname[MAXPATHLEN];
3915         int err = -1;
3916
3917         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3918                 return (err);
3919
3920         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
3921             strrchr(path, '/'));
3922         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3923                 struct dk_gpt *vtoc;
3924
3925                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3926                         if (sb != NULL)
3927                                 *sb = vtoc->efi_parts[0].p_start;
3928                         efi_free(vtoc);
3929                 }
3930                 (void) close(fd);
3931         }
3932         return (err);
3933 }
3934
3935 /*
3936  * determine where a partition starts on a disk in the current
3937  * configuration
3938  */
3939 static diskaddr_t
3940 find_start_block(nvlist_t *config)
3941 {
3942         nvlist_t **child;
3943         uint_t c, children;
3944         diskaddr_t sb = MAXOFFSET_T;
3945         uint64_t wholedisk;
3946
3947         if (nvlist_lookup_nvlist_array(config,
3948             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3949                 if (nvlist_lookup_uint64(config,
3950                     ZPOOL_CONFIG_WHOLE_DISK,
3951                     &wholedisk) != 0 || !wholedisk) {
3952                         return (MAXOFFSET_T);
3953                 }
3954                 if (read_efi_label(config, &sb) < 0)
3955                         sb = MAXOFFSET_T;
3956                 return (sb);
3957         }
3958
3959         for (c = 0; c < children; c++) {
3960                 sb = find_start_block(child[c]);
3961                 if (sb != MAXOFFSET_T) {
3962                         return (sb);
3963                 }
3964         }
3965         return (MAXOFFSET_T);
3966 }
3967
3968 int
3969 zpool_label_disk_wait(char *path, int timeout)
3970 {
3971         struct stat64 statbuf;
3972         int i;
3973
3974         /*
3975          * Wait timeout miliseconds for a newly created device to be available
3976          * from the given path.  There is a small window when a /dev/ device
3977          * will exist and the udev link will not, so we must wait for the
3978          * symlink.  Depending on the udev rules this may take a few seconds.
3979          */
3980         for (i = 0; i < timeout; i++) {
3981                 usleep(1000);
3982
3983                 errno = 0;
3984                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
3985                         return (0);
3986         }
3987
3988         return (ENOENT);
3989 }
3990
3991 int
3992 zpool_label_disk_check(char *path)
3993 {
3994         struct dk_gpt *vtoc;
3995         int fd, err;
3996
3997         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
3998                 return errno;
3999
4000         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4001                 (void) close(fd);
4002                 return err;
4003         }
4004
4005         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4006                 efi_free(vtoc);
4007                 (void) close(fd);
4008                 return EIDRM;
4009         }
4010
4011         efi_free(vtoc);
4012         (void) close(fd);
4013         return 0;
4014 }
4015
4016 /*
4017  * Label an individual disk.  The name provided is the short name,
4018  * stripped of any leading /dev path.
4019  */
4020 int
4021 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4022 {
4023         char path[MAXPATHLEN];
4024         struct dk_gpt *vtoc;
4025         int rval, fd;
4026         size_t resv = EFI_MIN_RESV_SIZE;
4027         uint64_t slice_size;
4028         diskaddr_t start_block;
4029         char errbuf[1024];
4030
4031         /* prepare an error message just in case */
4032         (void) snprintf(errbuf, sizeof (errbuf),
4033             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4034
4035         if (zhp) {
4036                 nvlist_t *nvroot;
4037
4038 #if defined(__sun__) || defined(__sun)
4039                 if (zpool_is_bootable(zhp)) {
4040                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4041                             "EFI labeled devices are not supported on root "
4042                             "pools."));
4043                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
4044                 }
4045 #endif
4046
4047                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4048                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4049
4050                 if (zhp->zpool_start_block == 0)
4051                         start_block = find_start_block(nvroot);
4052                 else
4053                         start_block = zhp->zpool_start_block;
4054                 zhp->zpool_start_block = start_block;
4055         } else {
4056                 /* new pool */
4057                 start_block = NEW_START_BLOCK;
4058         }
4059
4060         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4061
4062         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
4063                 /*
4064                  * This shouldn't happen.  We've long since verified that this
4065                  * is a valid device.
4066                  */
4067                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4068                     "label '%s': unable to open device: %d"), path, errno);
4069                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4070         }
4071
4072         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4073                 /*
4074                  * The only way this can fail is if we run out of memory, or we
4075                  * were unable to read the disk's capacity
4076                  */
4077                 if (errno == ENOMEM)
4078                         (void) no_memory(hdl);
4079
4080                 (void) close(fd);
4081                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4082                     "label '%s': unable to read disk capacity"), path);
4083
4084                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4085         }
4086
4087         slice_size = vtoc->efi_last_u_lba + 1;
4088         slice_size -= EFI_MIN_RESV_SIZE;
4089         if (start_block == MAXOFFSET_T)
4090                 start_block = NEW_START_BLOCK;
4091         slice_size -= start_block;
4092         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4093
4094         vtoc->efi_parts[0].p_start = start_block;
4095         vtoc->efi_parts[0].p_size = slice_size;
4096
4097         /*
4098          * Why we use V_USR: V_BACKUP confuses users, and is considered
4099          * disposable by some EFI utilities (since EFI doesn't have a backup
4100          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4101          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4102          * etc. were all pretty specific.  V_USR is as close to reality as we
4103          * can get, in the absence of V_OTHER.
4104          */
4105         vtoc->efi_parts[0].p_tag = V_USR;
4106         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
4107
4108         vtoc->efi_parts[8].p_start = slice_size + start_block;
4109         vtoc->efi_parts[8].p_size = resv;
4110         vtoc->efi_parts[8].p_tag = V_RESERVED;
4111
4112         if ((rval = efi_write(fd, vtoc)) != 0 || (rval = efi_rescan(fd)) != 0) {
4113                 /*
4114                  * Some block drivers (like pcata) may not support EFI
4115                  * GPT labels.  Print out a helpful error message dir-
4116                  * ecting the user to manually label the disk and give
4117                  * a specific slice.
4118                  */
4119                 (void) close(fd);
4120                 efi_free(vtoc);
4121
4122                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4123                     "parted(8) and then provide a specific slice: %d"), rval);
4124                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4125         }
4126
4127         (void) close(fd);
4128         efi_free(vtoc);
4129
4130         /* Wait for the first expected partition to appear. */
4131
4132         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4133         (void) zfs_append_partition(path, MAXPATHLEN);
4134
4135         rval = zpool_label_disk_wait(path, 3000);
4136         if (rval) {
4137                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4138                     "detect device partitions on '%s': %d"), path, rval);
4139                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4140         }
4141
4142         /* We can't be to paranoid.  Read the label back and verify it. */
4143         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4144         rval = zpool_label_disk_check(path);
4145         if (rval) {
4146                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4147                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4148                     "is not in in use, and is functioning properly: %d"),
4149                     path, rval);
4150                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4151         }
4152
4153         return 0;
4154 }