Add linux user disk support
[zfs.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <ctype.h>
27 #include <errno.h>
28 #include <devid.h>
29 #include <fcntl.h>
30 #include <libintl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <sys/stat.h>
37 #include <sys/efi_partition.h>
38 #include <sys/vtoc.h>
39 #include <sys/zfs_ioctl.h>
40 #include <dlfcn.h>
41
42 #include "zfs_namecheck.h"
43 #include "zfs_prop.h"
44 #include "libzfs_impl.h"
45 #include "zfs_comutil.h"
46
47 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
48
49 typedef struct prop_flags {
50         int create:1;   /* Validate property on creation */
51         int import:1;   /* Validate property on import */
52 } prop_flags_t;
53
54 /*
55  * ====================================================================
56  *   zpool property functions
57  * ====================================================================
58  */
59
60 static int
61 zpool_get_all_props(zpool_handle_t *zhp)
62 {
63         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
64         libzfs_handle_t *hdl = zhp->zpool_hdl;
65
66         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
67
68         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
69                 return (-1);
70
71         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
72                 if (errno == ENOMEM) {
73                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
74                                 zcmd_free_nvlists(&zc);
75                                 return (-1);
76                         }
77                 } else {
78                         zcmd_free_nvlists(&zc);
79                         return (-1);
80                 }
81         }
82
83         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
84                 zcmd_free_nvlists(&zc);
85                 return (-1);
86         }
87
88         zcmd_free_nvlists(&zc);
89
90         return (0);
91 }
92
93 static int
94 zpool_props_refresh(zpool_handle_t *zhp)
95 {
96         nvlist_t *old_props;
97
98         old_props = zhp->zpool_props;
99
100         if (zpool_get_all_props(zhp) != 0)
101                 return (-1);
102
103         nvlist_free(old_props);
104         return (0);
105 }
106
107 static char *
108 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
109     zprop_source_t *src)
110 {
111         nvlist_t *nv, *nvl;
112         uint64_t ival;
113         char *value;
114         zprop_source_t source;
115
116         nvl = zhp->zpool_props;
117         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
118                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
119                 source = ival;
120                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
121         } else {
122                 source = ZPROP_SRC_DEFAULT;
123                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
124                         value = "-";
125         }
126
127         if (src)
128                 *src = source;
129
130         return (value);
131 }
132
133 uint64_t
134 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
135 {
136         nvlist_t *nv, *nvl;
137         uint64_t value;
138         zprop_source_t source;
139
140         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
141                 /*
142                  * zpool_get_all_props() has most likely failed because
143                  * the pool is faulted, but if all we need is the top level
144                  * vdev's guid then get it from the zhp config nvlist.
145                  */
146                 if ((prop == ZPOOL_PROP_GUID) &&
147                     (nvlist_lookup_nvlist(zhp->zpool_config,
148                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
149                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
150                     == 0)) {
151                         return (value);
152                 }
153                 return (zpool_prop_default_numeric(prop));
154         }
155
156         nvl = zhp->zpool_props;
157         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
158                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
159                 source = value;
160                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
161         } else {
162                 source = ZPROP_SRC_DEFAULT;
163                 value = zpool_prop_default_numeric(prop);
164         }
165
166         if (src)
167                 *src = source;
168
169         return (value);
170 }
171
172 /*
173  * Map VDEV STATE to printed strings.
174  */
175 char *
176 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
177 {
178         switch (state) {
179         default:
180                 break;
181         case VDEV_STATE_CLOSED:
182         case VDEV_STATE_OFFLINE:
183                 return (gettext("OFFLINE"));
184         case VDEV_STATE_REMOVED:
185                 return (gettext("REMOVED"));
186         case VDEV_STATE_CANT_OPEN:
187                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
188                         return (gettext("FAULTED"));
189                 else if (aux == VDEV_AUX_SPLIT_POOL)
190                         return (gettext("SPLIT"));
191                 else
192                         return (gettext("UNAVAIL"));
193         case VDEV_STATE_FAULTED:
194                 return (gettext("FAULTED"));
195         case VDEV_STATE_DEGRADED:
196                 return (gettext("DEGRADED"));
197         case VDEV_STATE_HEALTHY:
198                 return (gettext("ONLINE"));
199         }
200
201         return (gettext("UNKNOWN"));
202 }
203
204 /*
205  * Get a zpool property value for 'prop' and return the value in
206  * a pre-allocated buffer.
207  */
208 int
209 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
210     zprop_source_t *srctype)
211 {
212         uint64_t intval;
213         const char *strval;
214         zprop_source_t src = ZPROP_SRC_NONE;
215         nvlist_t *nvroot;
216         vdev_stat_t *vs;
217         uint_t vsc;
218
219         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
220                 switch (prop) {
221                 case ZPOOL_PROP_NAME:
222                         (void) strlcpy(buf, zpool_get_name(zhp), len);
223                         break;
224
225                 case ZPOOL_PROP_HEALTH:
226                         (void) strlcpy(buf, "FAULTED", len);
227                         break;
228
229                 case ZPOOL_PROP_GUID:
230                         intval = zpool_get_prop_int(zhp, prop, &src);
231                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
232                         break;
233
234                 case ZPOOL_PROP_ALTROOT:
235                 case ZPOOL_PROP_CACHEFILE:
236                         if (zhp->zpool_props != NULL ||
237                             zpool_get_all_props(zhp) == 0) {
238                                 (void) strlcpy(buf,
239                                     zpool_get_prop_string(zhp, prop, &src),
240                                     len);
241                                 if (srctype != NULL)
242                                         *srctype = src;
243                                 return (0);
244                         }
245                         /* FALLTHROUGH */
246                 default:
247                         (void) strlcpy(buf, "-", len);
248                         break;
249                 }
250
251                 if (srctype != NULL)
252                         *srctype = src;
253                 return (0);
254         }
255
256         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
257             prop != ZPOOL_PROP_NAME)
258                 return (-1);
259
260         switch (zpool_prop_get_type(prop)) {
261         case PROP_TYPE_STRING:
262                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
263                     len);
264                 break;
265
266         case PROP_TYPE_NUMBER:
267                 intval = zpool_get_prop_int(zhp, prop, &src);
268
269                 switch (prop) {
270                 case ZPOOL_PROP_SIZE:
271                 case ZPOOL_PROP_ALLOCATED:
272                 case ZPOOL_PROP_FREE:
273                         (void) zfs_nicenum(intval, buf, len);
274                         break;
275
276                 case ZPOOL_PROP_CAPACITY:
277                         (void) snprintf(buf, len, "%llu%%",
278                             (u_longlong_t)intval);
279                         break;
280
281                 case ZPOOL_PROP_DEDUPRATIO:
282                         (void) snprintf(buf, len, "%llu.%02llux",
283                             (u_longlong_t)(intval / 100),
284                             (u_longlong_t)(intval % 100));
285                         break;
286
287                 case ZPOOL_PROP_HEALTH:
288                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
289                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
290                         verify(nvlist_lookup_uint64_array(nvroot,
291                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
292                             == 0);
293
294                         (void) strlcpy(buf, zpool_state_to_name(intval,
295                             vs->vs_aux), len);
296                         break;
297                 default:
298                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
299                 }
300                 break;
301
302         case PROP_TYPE_INDEX:
303                 intval = zpool_get_prop_int(zhp, prop, &src);
304                 if (zpool_prop_index_to_string(prop, intval, &strval)
305                     != 0)
306                         return (-1);
307                 (void) strlcpy(buf, strval, len);
308                 break;
309
310         default:
311                 abort();
312         }
313
314         if (srctype)
315                 *srctype = src;
316
317         return (0);
318 }
319
320 /*
321  * Check if the bootfs name has the same pool name as it is set to.
322  * Assuming bootfs is a valid dataset name.
323  */
324 static boolean_t
325 bootfs_name_valid(const char *pool, char *bootfs)
326 {
327         int len = strlen(pool);
328
329         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
330                 return (B_FALSE);
331
332         if (strncmp(pool, bootfs, len) == 0 &&
333             (bootfs[len] == '/' || bootfs[len] == '\0'))
334                 return (B_TRUE);
335
336         return (B_FALSE);
337 }
338
339 /*
340  * Inspect the configuration to determine if any of the devices contain
341  * an EFI label.
342  */
343 static boolean_t
344 pool_uses_efi(nvlist_t *config)
345 {
346         nvlist_t **child;
347         uint_t c, children;
348
349         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
350             &child, &children) != 0)
351                 return (read_efi_label(config, NULL) >= 0);
352
353         for (c = 0; c < children; c++) {
354                 if (pool_uses_efi(child[c]))
355                         return (B_TRUE);
356         }
357         return (B_FALSE);
358 }
359
360 static boolean_t
361 pool_is_bootable(zpool_handle_t *zhp)
362 {
363         char bootfs[ZPOOL_MAXNAMELEN];
364
365         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
366             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
367             sizeof (bootfs)) != 0);
368 }
369
370
371 /*
372  * Given an nvlist of zpool properties to be set, validate that they are
373  * correct, and parse any numeric properties (index, boolean, etc) if they are
374  * specified as strings.
375  */
376 static nvlist_t *
377 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
378     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
379 {
380         nvpair_t *elem;
381         nvlist_t *retprops;
382         zpool_prop_t prop;
383         char *strval;
384         uint64_t intval;
385         char *slash;
386         struct stat64 statbuf;
387         zpool_handle_t *zhp;
388         nvlist_t *nvroot;
389
390         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
391                 (void) no_memory(hdl);
392                 return (NULL);
393         }
394
395         elem = NULL;
396         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
397                 const char *propname = nvpair_name(elem);
398
399                 /*
400                  * Make sure this property is valid and applies to this type.
401                  */
402                 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
403                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
404                             "invalid property '%s'"), propname);
405                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
406                         goto error;
407                 }
408
409                 if (zpool_prop_readonly(prop)) {
410                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
411                             "is readonly"), propname);
412                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
413                         goto error;
414                 }
415
416                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
417                     &strval, &intval, errbuf) != 0)
418                         goto error;
419
420                 /*
421                  * Perform additional checking for specific properties.
422                  */
423                 switch (prop) {
424                 default:
425                         break;
426                 case ZPOOL_PROP_VERSION:
427                         if (intval < version || intval > SPA_VERSION) {
428                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
429                                     "property '%s' number %d is invalid."),
430                                     propname, intval);
431                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
432                                 goto error;
433                         }
434                         break;
435
436                 case ZPOOL_PROP_BOOTFS:
437                         if (flags.create || flags.import) {
438                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
439                                     "property '%s' cannot be set at creation "
440                                     "or import time"), propname);
441                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
442                                 goto error;
443                         }
444
445                         if (version < SPA_VERSION_BOOTFS) {
446                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
447                                     "pool must be upgraded to support "
448                                     "'%s' property"), propname);
449                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
450                                 goto error;
451                         }
452
453                         /*
454                          * bootfs property value has to be a dataset name and
455                          * the dataset has to be in the same pool as it sets to.
456                          */
457                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
458                             strval)) {
459                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
460                                     "is an invalid name"), strval);
461                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
462                                 goto error;
463                         }
464
465                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
466                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
467                                     "could not open pool '%s'"), poolname);
468                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
469                                 goto error;
470                         }
471                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
472                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
473
474                         /*
475                          * bootfs property cannot be set on a disk which has
476                          * been EFI labeled.
477                          */
478                         if (pool_uses_efi(nvroot)) {
479                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
480                                     "property '%s' not supported on "
481                                     "EFI labeled devices"), propname);
482                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
483                                 zpool_close(zhp);
484                                 goto error;
485                         }
486                         zpool_close(zhp);
487                         break;
488
489                 case ZPOOL_PROP_ALTROOT:
490                         if (!flags.create && !flags.import) {
491                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
492                                     "property '%s' can only be set during pool "
493                                     "creation or import"), propname);
494                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
495                                 goto error;
496                         }
497
498                         if (strval[0] != '/') {
499                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
500                                     "bad alternate root '%s'"), strval);
501                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
502                                 goto error;
503                         }
504                         break;
505
506                 case ZPOOL_PROP_CACHEFILE:
507                         if (strval[0] == '\0')
508                                 break;
509
510                         if (strcmp(strval, "none") == 0)
511                                 break;
512
513                         if (strval[0] != '/') {
514                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
515                                     "property '%s' must be empty, an "
516                                     "absolute path, or 'none'"), propname);
517                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
518                                 goto error;
519                         }
520
521                         slash = strrchr(strval, '/');
522
523                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
524                             strcmp(slash, "/..") == 0) {
525                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
526                                     "'%s' is not a valid file"), strval);
527                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
528                                 goto error;
529                         }
530
531                         *slash = '\0';
532
533                         if (strval[0] != '\0' &&
534                             (stat64(strval, &statbuf) != 0 ||
535                             !S_ISDIR(statbuf.st_mode))) {
536                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
537                                     "'%s' is not a valid directory"),
538                                     strval);
539                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
540                                 goto error;
541                         }
542
543                         *slash = '/';
544                         break;
545
546                 case ZPOOL_PROP_READONLY:
547                         if (!flags.import) {
548                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
549                                     "property '%s' can only be set at "
550                                     "import time"), propname);
551                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
552                                 goto error;
553                         }
554                         break;
555                 }
556         }
557
558         return (retprops);
559 error:
560         nvlist_free(retprops);
561         return (NULL);
562 }
563
564 /*
565  * Set zpool property : propname=propval.
566  */
567 int
568 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
569 {
570         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
571         int ret = -1;
572         char errbuf[1024];
573         nvlist_t *nvl = NULL;
574         nvlist_t *realprops;
575         uint64_t version;
576         prop_flags_t flags = { 0 };
577
578         (void) snprintf(errbuf, sizeof (errbuf),
579             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
580             zhp->zpool_name);
581
582         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
583                 return (no_memory(zhp->zpool_hdl));
584
585         if (nvlist_add_string(nvl, propname, propval) != 0) {
586                 nvlist_free(nvl);
587                 return (no_memory(zhp->zpool_hdl));
588         }
589
590         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
591         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
592             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
593                 nvlist_free(nvl);
594                 return (-1);
595         }
596
597         nvlist_free(nvl);
598         nvl = realprops;
599
600         /*
601          * Execute the corresponding ioctl() to set this property.
602          */
603         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
604
605         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
606                 nvlist_free(nvl);
607                 return (-1);
608         }
609
610         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
611
612         zcmd_free_nvlists(&zc);
613         nvlist_free(nvl);
614
615         if (ret)
616                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
617         else
618                 (void) zpool_props_refresh(zhp);
619
620         return (ret);
621 }
622
623 int
624 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
625 {
626         libzfs_handle_t *hdl = zhp->zpool_hdl;
627         zprop_list_t *entry;
628         char buf[ZFS_MAXPROPLEN];
629
630         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
631                 return (-1);
632
633         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
634
635                 if (entry->pl_fixed)
636                         continue;
637
638                 if (entry->pl_prop != ZPROP_INVAL &&
639                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
640                     NULL) == 0) {
641                         if (strlen(buf) > entry->pl_width)
642                                 entry->pl_width = strlen(buf);
643                 }
644         }
645
646         return (0);
647 }
648
649
650 /*
651  * Don't start the slice at the default block of 34; many storage
652  * devices will use a stripe width of 128k, other vendors prefer a 1m
653  * alignment.  It is best to play it safe and ensure a 1m alignment
654  * give 512b blocks.  When the block size is larger by a power of 2
655  * we will still be 1m aligned.
656  */
657 #define NEW_START_BLOCK 2048
658
659 /*
660  * Validate the given pool name, optionally putting an extended error message in
661  * 'buf'.
662  */
663 boolean_t
664 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
665 {
666         namecheck_err_t why;
667         char what;
668         int ret;
669
670         ret = pool_namecheck(pool, &why, &what);
671
672         /*
673          * The rules for reserved pool names were extended at a later point.
674          * But we need to support users with existing pools that may now be
675          * invalid.  So we only check for this expanded set of names during a
676          * create (or import), and only in userland.
677          */
678         if (ret == 0 && !isopen &&
679             (strncmp(pool, "mirror", 6) == 0 ||
680             strncmp(pool, "raidz", 5) == 0 ||
681             strncmp(pool, "spare", 5) == 0 ||
682             strcmp(pool, "log") == 0)) {
683                 if (hdl != NULL)
684                         zfs_error_aux(hdl,
685                             dgettext(TEXT_DOMAIN, "name is reserved"));
686                 return (B_FALSE);
687         }
688
689
690         if (ret != 0) {
691                 if (hdl != NULL) {
692                         switch (why) {
693                         case NAME_ERR_TOOLONG:
694                                 zfs_error_aux(hdl,
695                                     dgettext(TEXT_DOMAIN, "name is too long"));
696                                 break;
697
698                         case NAME_ERR_INVALCHAR:
699                                 zfs_error_aux(hdl,
700                                     dgettext(TEXT_DOMAIN, "invalid character "
701                                     "'%c' in pool name"), what);
702                                 break;
703
704                         case NAME_ERR_NOLETTER:
705                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
706                                     "name must begin with a letter"));
707                                 break;
708
709                         case NAME_ERR_RESERVED:
710                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
711                                     "name is reserved"));
712                                 break;
713
714                         case NAME_ERR_DISKLIKE:
715                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
716                                     "pool name is reserved"));
717                                 break;
718
719                         case NAME_ERR_LEADING_SLASH:
720                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
721                                     "leading slash in name"));
722                                 break;
723
724                         case NAME_ERR_EMPTY_COMPONENT:
725                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
726                                     "empty component in name"));
727                                 break;
728
729                         case NAME_ERR_TRAILING_SLASH:
730                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
731                                     "trailing slash in name"));
732                                 break;
733
734                         case NAME_ERR_MULTIPLE_AT:
735                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
736                                     "multiple '@' delimiters in name"));
737                                 break;
738                         case NAME_ERR_NO_AT:
739                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
740                                     "permission set is missing '@'"));
741                                 break;
742                         }
743                 }
744                 return (B_FALSE);
745         }
746
747         return (B_TRUE);
748 }
749
750 /*
751  * Open a handle to the given pool, even if the pool is currently in the FAULTED
752  * state.
753  */
754 zpool_handle_t *
755 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
756 {
757         zpool_handle_t *zhp;
758         boolean_t missing;
759
760         /*
761          * Make sure the pool name is valid.
762          */
763         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
764                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
765                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
766                     pool);
767                 return (NULL);
768         }
769
770         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
771                 return (NULL);
772
773         zhp->zpool_hdl = hdl;
774         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
775
776         if (zpool_refresh_stats(zhp, &missing) != 0) {
777                 zpool_close(zhp);
778                 return (NULL);
779         }
780
781         if (missing) {
782                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
783                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
784                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
785                 zpool_close(zhp);
786                 return (NULL);
787         }
788
789         return (zhp);
790 }
791
792 /*
793  * Like the above, but silent on error.  Used when iterating over pools (because
794  * the configuration cache may be out of date).
795  */
796 int
797 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
798 {
799         zpool_handle_t *zhp;
800         boolean_t missing;
801
802         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
803                 return (-1);
804
805         zhp->zpool_hdl = hdl;
806         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
807
808         if (zpool_refresh_stats(zhp, &missing) != 0) {
809                 zpool_close(zhp);
810                 return (-1);
811         }
812
813         if (missing) {
814                 zpool_close(zhp);
815                 *ret = NULL;
816                 return (0);
817         }
818
819         *ret = zhp;
820         return (0);
821 }
822
823 /*
824  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
825  * state.
826  */
827 zpool_handle_t *
828 zpool_open(libzfs_handle_t *hdl, const char *pool)
829 {
830         zpool_handle_t *zhp;
831
832         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
833                 return (NULL);
834
835         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
836                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
837                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
838                 zpool_close(zhp);
839                 return (NULL);
840         }
841
842         return (zhp);
843 }
844
845 /*
846  * Close the handle.  Simply frees the memory associated with the handle.
847  */
848 void
849 zpool_close(zpool_handle_t *zhp)
850 {
851         if (zhp->zpool_config)
852                 nvlist_free(zhp->zpool_config);
853         if (zhp->zpool_old_config)
854                 nvlist_free(zhp->zpool_old_config);
855         if (zhp->zpool_props)
856                 nvlist_free(zhp->zpool_props);
857         free(zhp);
858 }
859
860 /*
861  * Return the name of the pool.
862  */
863 const char *
864 zpool_get_name(zpool_handle_t *zhp)
865 {
866         return (zhp->zpool_name);
867 }
868
869
870 /*
871  * Return the state of the pool (ACTIVE or UNAVAILABLE)
872  */
873 int
874 zpool_get_state(zpool_handle_t *zhp)
875 {
876         return (zhp->zpool_state);
877 }
878
879 /*
880  * Create the named pool, using the provided vdev list.  It is assumed
881  * that the consumer has already validated the contents of the nvlist, so we
882  * don't have to worry about error semantics.
883  */
884 int
885 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
886     nvlist_t *props, nvlist_t *fsprops)
887 {
888         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
889         nvlist_t *zc_fsprops = NULL;
890         nvlist_t *zc_props = NULL;
891         char msg[1024];
892         char *altroot;
893         int ret = -1;
894
895         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
896             "cannot create '%s'"), pool);
897
898         if (!zpool_name_valid(hdl, B_FALSE, pool))
899                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
900
901         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
902                 return (-1);
903
904         if (props) {
905                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
906
907                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
908                     SPA_VERSION_1, flags, msg)) == NULL) {
909                         goto create_failed;
910                 }
911         }
912
913         if (fsprops) {
914                 uint64_t zoned;
915                 char *zonestr;
916
917                 zoned = ((nvlist_lookup_string(fsprops,
918                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
919                     strcmp(zonestr, "on") == 0);
920
921                 if ((zc_fsprops = zfs_valid_proplist(hdl,
922                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
923                         goto create_failed;
924                 }
925                 if (!zc_props &&
926                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
927                         goto create_failed;
928                 }
929                 if (nvlist_add_nvlist(zc_props,
930                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
931                         goto create_failed;
932                 }
933         }
934
935         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
936                 goto create_failed;
937
938         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
939
940         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
941
942                 zcmd_free_nvlists(&zc);
943                 nvlist_free(zc_props);
944                 nvlist_free(zc_fsprops);
945
946                 switch (errno) {
947                 case EBUSY:
948                         /*
949                          * This can happen if the user has specified the same
950                          * device multiple times.  We can't reliably detect this
951                          * until we try to add it and see we already have a
952                          * label.  This can also happen under if the device is
953                          * part of an active md or lvm device.
954                          */
955                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
956                             "one or more vdevs refer to the same device, or one of\n"
957                             "the devices is part of an active md or lvm device"));
958                         return (zfs_error(hdl, EZFS_BADDEV, msg));
959
960                 case EOVERFLOW:
961                         /*
962                          * This occurs when one of the devices is below
963                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
964                          * device was the problem device since there's no
965                          * reliable way to determine device size from userland.
966                          */
967                         {
968                                 char buf[64];
969
970                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
971
972                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
973                                     "one or more devices is less than the "
974                                     "minimum size (%s)"), buf);
975                         }
976                         return (zfs_error(hdl, EZFS_BADDEV, msg));
977
978                 case ENOSPC:
979                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
980                             "one or more devices is out of space"));
981                         return (zfs_error(hdl, EZFS_BADDEV, msg));
982
983                 case ENOTBLK:
984                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
985                             "cache device must be a disk or disk slice"));
986                         return (zfs_error(hdl, EZFS_BADDEV, msg));
987
988                 default:
989                         return (zpool_standard_error(hdl, errno, msg));
990                 }
991         }
992
993         /*
994          * If this is an alternate root pool, then we automatically set the
995          * mountpoint of the root dataset to be '/'.
996          */
997         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
998             &altroot) == 0) {
999                 zfs_handle_t *zhp;
1000
1001                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
1002                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1003                     "/") == 0);
1004
1005                 zfs_close(zhp);
1006         }
1007
1008 create_failed:
1009         zcmd_free_nvlists(&zc);
1010         nvlist_free(zc_props);
1011         nvlist_free(zc_fsprops);
1012         return (ret);
1013 }
1014
1015 /*
1016  * Destroy the given pool.  It is up to the caller to ensure that there are no
1017  * datasets left in the pool.
1018  */
1019 int
1020 zpool_destroy(zpool_handle_t *zhp)
1021 {
1022         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1023         zfs_handle_t *zfp = NULL;
1024         libzfs_handle_t *hdl = zhp->zpool_hdl;
1025         char msg[1024];
1026
1027         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1028             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1029                 return (-1);
1030
1031         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1032
1033         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1034                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1035                     "cannot destroy '%s'"), zhp->zpool_name);
1036
1037                 if (errno == EROFS) {
1038                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1039                             "one or more devices is read only"));
1040                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1041                 } else {
1042                         (void) zpool_standard_error(hdl, errno, msg);
1043                 }
1044
1045                 if (zfp)
1046                         zfs_close(zfp);
1047                 return (-1);
1048         }
1049
1050         if (zfp) {
1051                 remove_mountpoint(zfp);
1052                 zfs_close(zfp);
1053         }
1054
1055         return (0);
1056 }
1057
1058 /*
1059  * Add the given vdevs to the pool.  The caller must have already performed the
1060  * necessary verification to ensure that the vdev specification is well-formed.
1061  */
1062 int
1063 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1064 {
1065         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1066         int ret;
1067         libzfs_handle_t *hdl = zhp->zpool_hdl;
1068         char msg[1024];
1069         nvlist_t **spares, **l2cache;
1070         uint_t nspares, nl2cache;
1071
1072         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1073             "cannot add to '%s'"), zhp->zpool_name);
1074
1075         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1076             SPA_VERSION_SPARES &&
1077             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1078             &spares, &nspares) == 0) {
1079                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1080                     "upgraded to add hot spares"));
1081                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1082         }
1083
1084         if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1085             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1086                 uint64_t s;
1087
1088                 for (s = 0; s < nspares; s++) {
1089                         char *path;
1090
1091                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1092                             &path) == 0 && pool_uses_efi(spares[s])) {
1093                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1094                                     "device '%s' contains an EFI label and "
1095                                     "cannot be used on root pools."),
1096                                     zpool_vdev_name(hdl, NULL, spares[s],
1097                                     B_FALSE));
1098                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1099                         }
1100                 }
1101         }
1102
1103         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1104             SPA_VERSION_L2CACHE &&
1105             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1106             &l2cache, &nl2cache) == 0) {
1107                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1108                     "upgraded to add cache devices"));
1109                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1110         }
1111
1112         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1113                 return (-1);
1114         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1115
1116         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1117                 switch (errno) {
1118                 case EBUSY:
1119                         /*
1120                          * This can happen if the user has specified the same
1121                          * device multiple times.  We can't reliably detect this
1122                          * until we try to add it and see we already have a
1123                          * label.
1124                          */
1125                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1126                             "one or more vdevs refer to the same device"));
1127                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1128                         break;
1129
1130                 case EOVERFLOW:
1131                         /*
1132                          * This occurrs when one of the devices is below
1133                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1134                          * device was the problem device since there's no
1135                          * reliable way to determine device size from userland.
1136                          */
1137                         {
1138                                 char buf[64];
1139
1140                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1141
1142                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1143                                     "device is less than the minimum "
1144                                     "size (%s)"), buf);
1145                         }
1146                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1147                         break;
1148
1149                 case ENOTSUP:
1150                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1151                             "pool must be upgraded to add these vdevs"));
1152                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1153                         break;
1154
1155                 case EDOM:
1156                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1157                             "root pool can not have multiple vdevs"
1158                             " or separate logs"));
1159                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1160                         break;
1161
1162                 case ENOTBLK:
1163                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1164                             "cache device must be a disk or disk slice"));
1165                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1166                         break;
1167
1168                 default:
1169                         (void) zpool_standard_error(hdl, errno, msg);
1170                 }
1171
1172                 ret = -1;
1173         } else {
1174                 ret = 0;
1175         }
1176
1177         zcmd_free_nvlists(&zc);
1178
1179         return (ret);
1180 }
1181
1182 /*
1183  * Exports the pool from the system.  The caller must ensure that there are no
1184  * mounted datasets in the pool.
1185  */
1186 int
1187 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1188 {
1189         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1190         char msg[1024];
1191
1192         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1193             "cannot export '%s'"), zhp->zpool_name);
1194
1195         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1196         zc.zc_cookie = force;
1197         zc.zc_guid = hardforce;
1198
1199         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1200                 switch (errno) {
1201                 case EXDEV:
1202                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1203                             "use '-f' to override the following errors:\n"
1204                             "'%s' has an active shared spare which could be"
1205                             " used by other pools once '%s' is exported."),
1206                             zhp->zpool_name, zhp->zpool_name);
1207                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1208                             msg));
1209                 default:
1210                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1211                             msg));
1212                 }
1213         }
1214
1215         return (0);
1216 }
1217
1218 int
1219 zpool_export(zpool_handle_t *zhp, boolean_t force)
1220 {
1221         return (zpool_export_common(zhp, force, B_FALSE));
1222 }
1223
1224 int
1225 zpool_export_force(zpool_handle_t *zhp)
1226 {
1227         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1228 }
1229
1230 static void
1231 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1232     nvlist_t *config)
1233 {
1234         nvlist_t *nv = NULL;
1235         uint64_t rewindto;
1236         int64_t loss = -1;
1237         struct tm t;
1238         char timestr[128];
1239
1240         if (!hdl->libzfs_printerr || config == NULL)
1241                 return;
1242
1243         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
1244                 return;
1245
1246         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1247                 return;
1248         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1249
1250         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1251             strftime(timestr, 128, "%c", &t) != 0) {
1252                 if (dryrun) {
1253                         (void) printf(dgettext(TEXT_DOMAIN,
1254                             "Would be able to return %s "
1255                             "to its state as of %s.\n"),
1256                             name, timestr);
1257                 } else {
1258                         (void) printf(dgettext(TEXT_DOMAIN,
1259                             "Pool %s returned to its state as of %s.\n"),
1260                             name, timestr);
1261                 }
1262                 if (loss > 120) {
1263                         (void) printf(dgettext(TEXT_DOMAIN,
1264                             "%s approximately %lld "),
1265                             dryrun ? "Would discard" : "Discarded",
1266                             ((longlong_t)loss + 30) / 60);
1267                         (void) printf(dgettext(TEXT_DOMAIN,
1268                             "minutes of transactions.\n"));
1269                 } else if (loss > 0) {
1270                         (void) printf(dgettext(TEXT_DOMAIN,
1271                             "%s approximately %lld "),
1272                             dryrun ? "Would discard" : "Discarded",
1273                             (longlong_t)loss);
1274                         (void) printf(dgettext(TEXT_DOMAIN,
1275                             "seconds of transactions.\n"));
1276                 }
1277         }
1278 }
1279
1280 void
1281 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1282     nvlist_t *config)
1283 {
1284         nvlist_t *nv = NULL;
1285         int64_t loss = -1;
1286         uint64_t edata = UINT64_MAX;
1287         uint64_t rewindto;
1288         struct tm t;
1289         char timestr[128];
1290
1291         if (!hdl->libzfs_printerr)
1292                 return;
1293
1294         if (reason >= 0)
1295                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1296         else
1297                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1298
1299         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1300         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1301             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1302                 goto no_info;
1303
1304         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1305         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1306             &edata);
1307
1308         (void) printf(dgettext(TEXT_DOMAIN,
1309             "Recovery is possible, but will result in some data loss.\n"));
1310
1311         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1312             strftime(timestr, 128, "%c", &t) != 0) {
1313                 (void) printf(dgettext(TEXT_DOMAIN,
1314                     "\tReturning the pool to its state as of %s\n"
1315                     "\tshould correct the problem.  "),
1316                     timestr);
1317         } else {
1318                 (void) printf(dgettext(TEXT_DOMAIN,
1319                     "\tReverting the pool to an earlier state "
1320                     "should correct the problem.\n\t"));
1321         }
1322
1323         if (loss > 120) {
1324                 (void) printf(dgettext(TEXT_DOMAIN,
1325                     "Approximately %lld minutes of data\n"
1326                     "\tmust be discarded, irreversibly.  "),
1327                     ((longlong_t)loss + 30) / 60);
1328         } else if (loss > 0) {
1329                 (void) printf(dgettext(TEXT_DOMAIN,
1330                     "Approximately %lld seconds of data\n"
1331                     "\tmust be discarded, irreversibly.  "),
1332                     (longlong_t)loss);
1333         }
1334         if (edata != 0 && edata != UINT64_MAX) {
1335                 if (edata == 1) {
1336                         (void) printf(dgettext(TEXT_DOMAIN,
1337                             "After rewind, at least\n"
1338                             "\tone persistent user-data error will remain.  "));
1339                 } else {
1340                         (void) printf(dgettext(TEXT_DOMAIN,
1341                             "After rewind, several\n"
1342                             "\tpersistent user-data errors will remain.  "));
1343                 }
1344         }
1345         (void) printf(dgettext(TEXT_DOMAIN,
1346             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1347             reason >= 0 ? "clear" : "import", name);
1348
1349         (void) printf(dgettext(TEXT_DOMAIN,
1350             "A scrub of the pool\n"
1351             "\tis strongly recommended after recovery.\n"));
1352         return;
1353
1354 no_info:
1355         (void) printf(dgettext(TEXT_DOMAIN,
1356             "Destroy and re-create the pool from\n\ta backup source.\n"));
1357 }
1358
1359 /*
1360  * zpool_import() is a contracted interface. Should be kept the same
1361  * if possible.
1362  *
1363  * Applications should use zpool_import_props() to import a pool with
1364  * new properties value to be set.
1365  */
1366 int
1367 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1368     char *altroot)
1369 {
1370         nvlist_t *props = NULL;
1371         int ret;
1372
1373         if (altroot != NULL) {
1374                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1375                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1376                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1377                             newname));
1378                 }
1379
1380                 if (nvlist_add_string(props,
1381                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1382                     nvlist_add_string(props,
1383                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1384                         nvlist_free(props);
1385                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1386                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1387                             newname));
1388                 }
1389         }
1390
1391         ret = zpool_import_props(hdl, config, newname, props,
1392             ZFS_IMPORT_NORMAL);
1393         if (props)
1394                 nvlist_free(props);
1395         return (ret);
1396 }
1397
1398 static void
1399 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1400     int indent)
1401 {
1402         nvlist_t **child;
1403         uint_t c, children;
1404         char *vname;
1405         uint64_t is_log = 0;
1406
1407         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1408             &is_log);
1409
1410         if (name != NULL)
1411                 (void) printf("\t%*s%s%s\n", indent, "", name,
1412                     is_log ? " [log]" : "");
1413
1414         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1415             &child, &children) != 0)
1416                 return;
1417
1418         for (c = 0; c < children; c++) {
1419                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1420                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1421                 free(vname);
1422         }
1423 }
1424
1425 /*
1426  * Import the given pool using the known configuration and a list of
1427  * properties to be set. The configuration should have come from
1428  * zpool_find_import(). The 'newname' parameters control whether the pool
1429  * is imported with a different name.
1430  */
1431 int
1432 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1433     nvlist_t *props, int flags)
1434 {
1435         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1436         zpool_rewind_policy_t policy;
1437         nvlist_t *nv = NULL;
1438         nvlist_t *nvinfo = NULL;
1439         nvlist_t *missing = NULL;
1440         char *thename;
1441         char *origname;
1442         int ret;
1443         int error = 0;
1444         char errbuf[1024];
1445
1446         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1447             &origname) == 0);
1448
1449         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1450             "cannot import pool '%s'"), origname);
1451
1452         if (newname != NULL) {
1453                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1454                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1455                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1456                             newname));
1457                 thename = (char *)newname;
1458         } else {
1459                 thename = origname;
1460         }
1461
1462         if (props) {
1463                 uint64_t version;
1464                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1465
1466                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1467                     &version) == 0);
1468
1469                 if ((props = zpool_valid_proplist(hdl, origname,
1470                     props, version, flags, errbuf)) == NULL) {
1471                         return (-1);
1472                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1473                         nvlist_free(props);
1474                         return (-1);
1475                 }
1476         }
1477
1478         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1479
1480         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1481             &zc.zc_guid) == 0);
1482
1483         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1484                 nvlist_free(props);
1485                 return (-1);
1486         }
1487         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1488                 nvlist_free(props);
1489                 return (-1);
1490         }
1491
1492         zc.zc_cookie = flags;
1493         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1494             errno == ENOMEM) {
1495                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1496                         zcmd_free_nvlists(&zc);
1497                         return (-1);
1498                 }
1499         }
1500         if (ret != 0)
1501                 error = errno;
1502
1503         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1504         zpool_get_rewind_policy(config, &policy);
1505
1506         if (error) {
1507                 char desc[1024];
1508
1509                 /*
1510                  * Dry-run failed, but we print out what success
1511                  * looks like if we found a best txg
1512                  */
1513                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1514                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1515                             B_TRUE, nv);
1516                         nvlist_free(nv);
1517                         return (-1);
1518                 }
1519
1520                 if (newname == NULL)
1521                         (void) snprintf(desc, sizeof (desc),
1522                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1523                             thename);
1524                 else
1525                         (void) snprintf(desc, sizeof (desc),
1526                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1527                             origname, thename);
1528
1529                 switch (error) {
1530                 case ENOTSUP:
1531                         /*
1532                          * Unsupported version.
1533                          */
1534                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1535                         break;
1536
1537                 case EINVAL:
1538                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1539                         break;
1540
1541                 case EROFS:
1542                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1543                             "one or more devices is read only"));
1544                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1545                         break;
1546
1547                 case ENXIO:
1548                         if (nv && nvlist_lookup_nvlist(nv,
1549                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1550                             nvlist_lookup_nvlist(nvinfo,
1551                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1552                                 (void) printf(dgettext(TEXT_DOMAIN,
1553                                     "The devices below are missing, use "
1554                                     "'-m' to import the pool anyway:\n"));
1555                                 print_vdev_tree(hdl, NULL, missing, 2);
1556                                 (void) printf("\n");
1557                         }
1558                         (void) zpool_standard_error(hdl, error, desc);
1559                         break;
1560
1561                 case EEXIST:
1562                         (void) zpool_standard_error(hdl, error, desc);
1563                         break;
1564
1565                 default:
1566                         (void) zpool_standard_error(hdl, error, desc);
1567                         zpool_explain_recover(hdl,
1568                             newname ? origname : thename, -error, nv);
1569                         break;
1570                 }
1571
1572                 nvlist_free(nv);
1573                 ret = -1;
1574         } else {
1575                 zpool_handle_t *zhp;
1576
1577                 /*
1578                  * This should never fail, but play it safe anyway.
1579                  */
1580                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1581                         ret = -1;
1582                 else if (zhp != NULL)
1583                         zpool_close(zhp);
1584                 if (policy.zrp_request &
1585                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1586                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1587                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1588                 }
1589                 nvlist_free(nv);
1590                 return (0);
1591         }
1592
1593         zcmd_free_nvlists(&zc);
1594         nvlist_free(props);
1595
1596         return (ret);
1597 }
1598
1599 /*
1600  * Scan the pool.
1601  */
1602 int
1603 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1604 {
1605         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1606         char msg[1024];
1607         libzfs_handle_t *hdl = zhp->zpool_hdl;
1608
1609         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1610         zc.zc_cookie = func;
1611
1612         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1613             (errno == ENOENT && func != POOL_SCAN_NONE))
1614                 return (0);
1615
1616         if (func == POOL_SCAN_SCRUB) {
1617                 (void) snprintf(msg, sizeof (msg),
1618                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1619         } else if (func == POOL_SCAN_NONE) {
1620                 (void) snprintf(msg, sizeof (msg),
1621                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1622                     zc.zc_name);
1623         } else {
1624                 assert(!"unexpected result");
1625         }
1626
1627         if (errno == EBUSY) {
1628                 nvlist_t *nvroot;
1629                 pool_scan_stat_t *ps = NULL;
1630                 uint_t psc;
1631
1632                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1633                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1634                 (void) nvlist_lookup_uint64_array(nvroot,
1635                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1636                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1637                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1638                 else
1639                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1640         } else if (errno == ENOENT) {
1641                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1642         } else {
1643                 return (zpool_standard_error(hdl, errno, msg));
1644         }
1645 }
1646
1647 /*
1648  * This provides a very minimal check whether a given string is likely a
1649  * c#t#d# style string.  Users of this are expected to do their own
1650  * verification of the s# part.
1651  */
1652 #define CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
1653
1654 /*
1655  * More elaborate version for ones which may start with "/dev/dsk/"
1656  * and the like.
1657  */
1658 static int
1659 ctd_check_path(char *str) {
1660         /*
1661          * If it starts with a slash, check the last component.
1662          */
1663         if (str && str[0] == '/') {
1664                 char *tmp = strrchr(str, '/');
1665
1666                 /*
1667                  * If it ends in "/old", check the second-to-last
1668                  * component of the string instead.
1669                  */
1670                 if (tmp != str && strcmp(tmp, "/old") == 0) {
1671                         for (tmp--; *tmp != '/'; tmp--)
1672                                 ;
1673                 }
1674                 str = tmp + 1;
1675         }
1676         return (CTD_CHECK(str));
1677 }
1678
1679 /*
1680  * Find a vdev that matches the search criteria specified. We use the
1681  * the nvpair name to determine how we should look for the device.
1682  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1683  * spare; but FALSE if its an INUSE spare.
1684  */
1685 static nvlist_t *
1686 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1687     boolean_t *l2cache, boolean_t *log)
1688 {
1689         uint_t c, children;
1690         nvlist_t **child;
1691         nvlist_t *ret;
1692         uint64_t is_log;
1693         char *srchkey;
1694         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1695
1696         /* Nothing to look for */
1697         if (search == NULL || pair == NULL)
1698                 return (NULL);
1699
1700         /* Obtain the key we will use to search */
1701         srchkey = nvpair_name(pair);
1702
1703         switch (nvpair_type(pair)) {
1704         case DATA_TYPE_UINT64:
1705                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1706                         uint64_t srchval, theguid;
1707
1708                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1709                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1710                             &theguid) == 0);
1711                         if (theguid == srchval)
1712                                 return (nv);
1713                 }
1714                 break;
1715
1716         case DATA_TYPE_STRING: {
1717                 char *srchval, *val;
1718
1719                 verify(nvpair_value_string(pair, &srchval) == 0);
1720                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1721                         break;
1722
1723                 /*
1724                  * Search for the requested value. Special cases:
1725                  *
1726                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
1727                  *   "s0" or "s0/old".  The "s0" part is hidden from the user,
1728                  *   but included in the string, so this matches around it.
1729                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1730                  *
1731                  * Otherwise, all other searches are simple string compares.
1732                  */
1733                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
1734                     ctd_check_path(val)) {
1735                         uint64_t wholedisk = 0;
1736
1737                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1738                             &wholedisk);
1739                         if (wholedisk) {
1740                                 int slen = strlen(srchval);
1741                                 int vlen = strlen(val);
1742
1743                                 if (slen != vlen - 2)
1744                                         break;
1745
1746                                 /*
1747                                  * make_leaf_vdev() should only set
1748                                  * wholedisk for ZPOOL_CONFIG_PATHs which
1749                                  * will include "/dev/dsk/", giving plenty of
1750                                  * room for the indices used next.
1751                                  */
1752                                 ASSERT(vlen >= 6);
1753
1754                                 /*
1755                                  * strings identical except trailing "s0"
1756                                  */
1757                                 if (strcmp(&val[vlen - 2], "s0") == 0 &&
1758                                     strncmp(srchval, val, slen) == 0)
1759                                         return (nv);
1760
1761                                 /*
1762                                  * strings identical except trailing "s0/old"
1763                                  */
1764                                 if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
1765                                     strcmp(&srchval[slen - 4], "/old") == 0 &&
1766                                     strncmp(srchval, val, slen - 4) == 0)
1767                                         return (nv);
1768
1769                                 break;
1770                         }
1771                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1772                         char *type, *idx, *end, *p;
1773                         uint64_t id, vdev_id;
1774
1775                         /*
1776                          * Determine our vdev type, keeping in mind
1777                          * that the srchval is composed of a type and
1778                          * vdev id pair (i.e. mirror-4).
1779                          */
1780                         if ((type = strdup(srchval)) == NULL)
1781                                 return (NULL);
1782
1783                         if ((p = strrchr(type, '-')) == NULL) {
1784                                 free(type);
1785                                 break;
1786                         }
1787                         idx = p + 1;
1788                         *p = '\0';
1789
1790                         /*
1791                          * If the types don't match then keep looking.
1792                          */
1793                         if (strncmp(val, type, strlen(val)) != 0) {
1794                                 free(type);
1795                                 break;
1796                         }
1797
1798                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
1799                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1800                             strncmp(type, VDEV_TYPE_MIRROR,
1801                             strlen(VDEV_TYPE_MIRROR)) == 0);
1802                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
1803                             &id) == 0);
1804
1805                         errno = 0;
1806                         vdev_id = strtoull(idx, &end, 10);
1807
1808                         free(type);
1809                         if (errno != 0)
1810                                 return (NULL);
1811
1812                         /*
1813                          * Now verify that we have the correct vdev id.
1814                          */
1815                         if (vdev_id == id)
1816                                 return (nv);
1817                 }
1818
1819                 /*
1820                  * Common case
1821                  */
1822                 if (strcmp(srchval, val) == 0)
1823                         return (nv);
1824                 break;
1825         }
1826
1827         default:
1828                 break;
1829         }
1830
1831         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1832             &child, &children) != 0)
1833                 return (NULL);
1834
1835         for (c = 0; c < children; c++) {
1836                 if ((ret = vdev_to_nvlist_iter(child[c], search,
1837                     avail_spare, l2cache, NULL)) != NULL) {
1838                         /*
1839                          * The 'is_log' value is only set for the toplevel
1840                          * vdev, not the leaf vdevs.  So we always lookup the
1841                          * log device from the root of the vdev tree (where
1842                          * 'log' is non-NULL).
1843                          */
1844                         if (log != NULL &&
1845                             nvlist_lookup_uint64(child[c],
1846                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1847                             is_log) {
1848                                 *log = B_TRUE;
1849                         }
1850                         return (ret);
1851                 }
1852         }
1853
1854         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1855             &child, &children) == 0) {
1856                 for (c = 0; c < children; c++) {
1857                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1858                             avail_spare, l2cache, NULL)) != NULL) {
1859                                 *avail_spare = B_TRUE;
1860                                 return (ret);
1861                         }
1862                 }
1863         }
1864
1865         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1866             &child, &children) == 0) {
1867                 for (c = 0; c < children; c++) {
1868                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1869                             avail_spare, l2cache, NULL)) != NULL) {
1870                                 *l2cache = B_TRUE;
1871                                 return (ret);
1872                         }
1873                 }
1874         }
1875
1876         return (NULL);
1877 }
1878
1879 /*
1880  * Given a physical path (minus the "/devices" prefix), find the
1881  * associated vdev.
1882  */
1883 nvlist_t *
1884 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
1885     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1886 {
1887         nvlist_t *search, *nvroot, *ret;
1888
1889         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1890         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
1891
1892         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1893             &nvroot) == 0);
1894
1895         *avail_spare = B_FALSE;
1896         *l2cache = B_FALSE;
1897         if (log != NULL)
1898                 *log = B_FALSE;
1899         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1900         nvlist_free(search);
1901
1902         return (ret);
1903 }
1904
1905 /*
1906  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
1907  */
1908 boolean_t
1909 zpool_vdev_is_interior(const char *name)
1910 {
1911         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1912             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
1913                 return (B_TRUE);
1914         return (B_FALSE);
1915 }
1916
1917 nvlist_t *
1918 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1919     boolean_t *l2cache, boolean_t *log)
1920 {
1921         char buf[MAXPATHLEN];
1922         char *end;
1923         nvlist_t *nvroot, *search, *ret;
1924         uint64_t guid;
1925
1926         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1927
1928         guid = strtoull(path, &end, 10);
1929         if (guid != 0 && *end == '\0') {
1930                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
1931         } else if (zpool_vdev_is_interior(path)) {
1932                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
1933         } else if (path[0] != '/') {
1934                 (void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path);
1935                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
1936         } else {
1937                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
1938         }
1939
1940         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1941             &nvroot) == 0);
1942
1943         *avail_spare = B_FALSE;
1944         *l2cache = B_FALSE;
1945         if (log != NULL)
1946                 *log = B_FALSE;
1947         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1948         nvlist_free(search);
1949
1950         return (ret);
1951 }
1952
1953 static int
1954 vdev_online(nvlist_t *nv)
1955 {
1956         uint64_t ival;
1957
1958         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1959             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1960             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1961                 return (0);
1962
1963         return (1);
1964 }
1965
1966 /*
1967  * Helper function for zpool_get_physpaths().
1968  */
1969 static int
1970 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
1971     size_t *bytes_written)
1972 {
1973         size_t bytes_left, pos, rsz;
1974         char *tmppath;
1975         const char *format;
1976
1977         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
1978             &tmppath) != 0)
1979                 return (EZFS_NODEVICE);
1980
1981         pos = *bytes_written;
1982         bytes_left = physpath_size - pos;
1983         format = (pos == 0) ? "%s" : " %s";
1984
1985         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
1986         *bytes_written += rsz;
1987
1988         if (rsz >= bytes_left) {
1989                 /* if physpath was not copied properly, clear it */
1990                 if (bytes_left != 0) {
1991                         physpath[pos] = 0;
1992                 }
1993                 return (EZFS_NOSPC);
1994         }
1995         return (0);
1996 }
1997
1998 static int
1999 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2000     size_t *rsz, boolean_t is_spare)
2001 {
2002         char *type;
2003         int ret;
2004
2005         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2006                 return (EZFS_INVALCONFIG);
2007
2008         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2009                 /*
2010                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2011                  * For a spare vdev, we only want to boot from the active
2012                  * spare device.
2013                  */
2014                 if (is_spare) {
2015                         uint64_t spare = 0;
2016                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2017                             &spare);
2018                         if (!spare)
2019                                 return (EZFS_INVALCONFIG);
2020                 }
2021
2022                 if (vdev_online(nv)) {
2023                         if ((ret = vdev_get_one_physpath(nv, physpath,
2024                             phypath_size, rsz)) != 0)
2025                                 return (ret);
2026                 }
2027         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2028             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2029             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2030                 nvlist_t **child;
2031                 uint_t count;
2032                 int i, ret;
2033
2034                 if (nvlist_lookup_nvlist_array(nv,
2035                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2036                         return (EZFS_INVALCONFIG);
2037
2038                 for (i = 0; i < count; i++) {
2039                         ret = vdev_get_physpaths(child[i], physpath,
2040                             phypath_size, rsz, is_spare);
2041                         if (ret == EZFS_NOSPC)
2042                                 return (ret);
2043                 }
2044         }
2045
2046         return (EZFS_POOL_INVALARG);
2047 }
2048
2049 /*
2050  * Get phys_path for a root pool config.
2051  * Return 0 on success; non-zero on failure.
2052  */
2053 static int
2054 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2055 {
2056         size_t rsz;
2057         nvlist_t *vdev_root;
2058         nvlist_t **child;
2059         uint_t count;
2060         char *type;
2061
2062         rsz = 0;
2063
2064         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2065             &vdev_root) != 0)
2066                 return (EZFS_INVALCONFIG);
2067
2068         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2069             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2070             &child, &count) != 0)
2071                 return (EZFS_INVALCONFIG);
2072
2073         /*
2074          * root pool can not have EFI labeled disks and can only have
2075          * a single top-level vdev.
2076          */
2077         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2078             pool_uses_efi(vdev_root))
2079                 return (EZFS_POOL_INVALARG);
2080
2081         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2082             B_FALSE);
2083
2084         /* No online devices */
2085         if (rsz == 0)
2086                 return (EZFS_NODEVICE);
2087
2088         return (0);
2089 }
2090
2091 /*
2092  * Get phys_path for a root pool
2093  * Return 0 on success; non-zero on failure.
2094  */
2095 int
2096 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2097 {
2098         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2099             phypath_size));
2100 }
2101
2102 /*
2103  * If the device has being dynamically expanded then we need to relabel
2104  * the disk to use the new unallocated space.
2105  */
2106 static int
2107 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
2108 {
2109         char errbuf[1024];
2110         int fd, error;
2111
2112         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2113                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2114                     "relabel '%s': unable to open device"), path);
2115                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2116         }
2117
2118         /*
2119          * It's possible that we might encounter an error if the device
2120          * does not have any unallocated space left. If so, we simply
2121          * ignore that error and continue on.
2122          */
2123         error = efi_use_whole_disk(fd);
2124         (void) close(fd);
2125         if (error && error != VT_ENOSPC) {
2126                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2127                     "relabel '%s': unable to read disk capacity"), path);
2128                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2129         }
2130         return (0);
2131 }
2132
2133 /*
2134  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2135  * ZFS_ONLINE_* flags.
2136  */
2137 int
2138 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2139     vdev_state_t *newstate)
2140 {
2141         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2142         char msg[1024];
2143         nvlist_t *tgt;
2144         boolean_t avail_spare, l2cache, islog;
2145         libzfs_handle_t *hdl = zhp->zpool_hdl;
2146
2147         if (flags & ZFS_ONLINE_EXPAND) {
2148                 (void) snprintf(msg, sizeof (msg),
2149                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2150         } else {
2151                 (void) snprintf(msg, sizeof (msg),
2152                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2153         }
2154
2155         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2156         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2157             &islog)) == NULL)
2158                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2159
2160         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2161
2162         if (avail_spare)
2163                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2164
2165         if (flags & ZFS_ONLINE_EXPAND ||
2166             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2167                 char *pathname = NULL;
2168                 uint64_t wholedisk = 0;
2169
2170                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2171                     &wholedisk);
2172                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2173                     &pathname) == 0);
2174
2175                 /*
2176                  * XXX - L2ARC 1.0 devices can't support expansion.
2177                  */
2178                 if (l2cache) {
2179                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2180                             "cannot expand cache devices"));
2181                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2182                 }
2183
2184                 if (wholedisk) {
2185                         pathname += strlen(DISK_ROOT) + 1;
2186                         (void) zpool_relabel_disk(hdl, pathname);
2187                 }
2188         }
2189
2190         zc.zc_cookie = VDEV_STATE_ONLINE;
2191         zc.zc_obj = flags;
2192
2193         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2194                 if (errno == EINVAL) {
2195                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2196                             "from this pool into a new one.  Use '%s' "
2197                             "instead"), "zpool detach");
2198                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2199                 }
2200                 return (zpool_standard_error(hdl, errno, msg));
2201         }
2202
2203         *newstate = zc.zc_cookie;
2204         return (0);
2205 }
2206
2207 /*
2208  * Take the specified vdev offline
2209  */
2210 int
2211 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2212 {
2213         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2214         char msg[1024];
2215         nvlist_t *tgt;
2216         boolean_t avail_spare, l2cache;
2217         libzfs_handle_t *hdl = zhp->zpool_hdl;
2218
2219         (void) snprintf(msg, sizeof (msg),
2220             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2221
2222         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2223         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2224             NULL)) == NULL)
2225                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2226
2227         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2228
2229         if (avail_spare)
2230                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2231
2232         zc.zc_cookie = VDEV_STATE_OFFLINE;
2233         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2234
2235         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2236                 return (0);
2237
2238         switch (errno) {
2239         case EBUSY:
2240
2241                 /*
2242                  * There are no other replicas of this device.
2243                  */
2244                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2245
2246         case EEXIST:
2247                 /*
2248                  * The log device has unplayed logs
2249                  */
2250                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2251
2252         default:
2253                 return (zpool_standard_error(hdl, errno, msg));
2254         }
2255 }
2256
2257 /*
2258  * Mark the given vdev faulted.
2259  */
2260 int
2261 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2262 {
2263         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2264         char msg[1024];
2265         libzfs_handle_t *hdl = zhp->zpool_hdl;
2266
2267         (void) snprintf(msg, sizeof (msg),
2268            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2269
2270         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2271         zc.zc_guid = guid;
2272         zc.zc_cookie = VDEV_STATE_FAULTED;
2273         zc.zc_obj = aux;
2274
2275         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2276                 return (0);
2277
2278         switch (errno) {
2279         case EBUSY:
2280
2281                 /*
2282                  * There are no other replicas of this device.
2283                  */
2284                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2285
2286         default:
2287                 return (zpool_standard_error(hdl, errno, msg));
2288         }
2289
2290 }
2291
2292 /*
2293  * Mark the given vdev degraded.
2294  */
2295 int
2296 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2297 {
2298         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2299         char msg[1024];
2300         libzfs_handle_t *hdl = zhp->zpool_hdl;
2301
2302         (void) snprintf(msg, sizeof (msg),
2303            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2304
2305         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2306         zc.zc_guid = guid;
2307         zc.zc_cookie = VDEV_STATE_DEGRADED;
2308         zc.zc_obj = aux;
2309
2310         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2311                 return (0);
2312
2313         return (zpool_standard_error(hdl, errno, msg));
2314 }
2315
2316 /*
2317  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2318  * a hot spare.
2319  */
2320 static boolean_t
2321 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2322 {
2323         nvlist_t **child;
2324         uint_t c, children;
2325         char *type;
2326
2327         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2328             &children) == 0) {
2329                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2330                     &type) == 0);
2331
2332                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2333                     children == 2 && child[which] == tgt)
2334                         return (B_TRUE);
2335
2336                 for (c = 0; c < children; c++)
2337                         if (is_replacing_spare(child[c], tgt, which))
2338                                 return (B_TRUE);
2339         }
2340
2341         return (B_FALSE);
2342 }
2343
2344 /*
2345  * Attach new_disk (fully described by nvroot) to old_disk.
2346  * If 'replacing' is specified, the new disk will replace the old one.
2347  */
2348 int
2349 zpool_vdev_attach(zpool_handle_t *zhp,
2350     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2351 {
2352         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2353         char msg[1024];
2354         int ret;
2355         nvlist_t *tgt;
2356         boolean_t avail_spare, l2cache, islog;
2357         uint64_t val;
2358         char *newname;
2359         nvlist_t **child;
2360         uint_t children;
2361         nvlist_t *config_root;
2362         libzfs_handle_t *hdl = zhp->zpool_hdl;
2363         boolean_t rootpool = pool_is_bootable(zhp);
2364
2365         if (replacing)
2366                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2367                     "cannot replace %s with %s"), old_disk, new_disk);
2368         else
2369                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2370                     "cannot attach %s to %s"), new_disk, old_disk);
2371
2372         /*
2373          * If this is a root pool, make sure that we're not attaching an
2374          * EFI labeled device.
2375          */
2376         if (rootpool && pool_uses_efi(nvroot)) {
2377                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2378                     "EFI labeled devices are not supported on root pools."));
2379                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2380         }
2381
2382         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2383         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2384             &islog)) == 0)
2385                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2386
2387         if (avail_spare)
2388                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2389
2390         if (l2cache)
2391                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2392
2393         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2394         zc.zc_cookie = replacing;
2395
2396         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2397             &child, &children) != 0 || children != 1) {
2398                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2399                     "new device must be a single disk"));
2400                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2401         }
2402
2403         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2404             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2405
2406         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2407                 return (-1);
2408
2409         /*
2410          * If the target is a hot spare that has been swapped in, we can only
2411          * replace it with another hot spare.
2412          */
2413         if (replacing &&
2414             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2415             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2416             NULL) == NULL || !avail_spare) &&
2417             is_replacing_spare(config_root, tgt, 1)) {
2418                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2419                     "can only be replaced by another hot spare"));
2420                 free(newname);
2421                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2422         }
2423
2424         free(newname);
2425
2426         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2427                 return (-1);
2428
2429         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2430
2431         zcmd_free_nvlists(&zc);
2432
2433         if (ret == 0) {
2434                 if (rootpool) {
2435                         /*
2436                          * XXX need a better way to prevent user from
2437                          * booting up a half-baked vdev.
2438                          */
2439                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2440                             "sure to wait until resilver is done "
2441                             "before rebooting.\n"));
2442                 }
2443                 return (0);
2444         }
2445
2446         switch (errno) {
2447         case ENOTSUP:
2448                 /*
2449                  * Can't attach to or replace this type of vdev.
2450                  */
2451                 if (replacing) {
2452                         uint64_t version = zpool_get_prop_int(zhp,
2453                             ZPOOL_PROP_VERSION, NULL);
2454
2455                         if (islog)
2456                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2457                                     "cannot replace a log with a spare"));
2458                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2459                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2460                                     "already in replacing/spare config; wait "
2461                                     "for completion or use 'zpool detach'"));
2462                         else
2463                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2464                                     "cannot replace a replacing device"));
2465                 } else {
2466                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2467                             "can only attach to mirrors and top-level "
2468                             "disks"));
2469                 }
2470                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2471                 break;
2472
2473         case EINVAL:
2474                 /*
2475                  * The new device must be a single disk.
2476                  */
2477                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2478                     "new device must be a single disk"));
2479                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2480                 break;
2481
2482         case EBUSY:
2483                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2484                     new_disk);
2485                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2486                 break;
2487
2488         case EOVERFLOW:
2489                 /*
2490                  * The new device is too small.
2491                  */
2492                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2493                     "device is too small"));
2494                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2495                 break;
2496
2497         case EDOM:
2498                 /*
2499                  * The new device has a different alignment requirement.
2500                  */
2501                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2502                     "devices have different sector alignment"));
2503                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2504                 break;
2505
2506         case ENAMETOOLONG:
2507                 /*
2508                  * The resulting top-level vdev spec won't fit in the label.
2509                  */
2510                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2511                 break;
2512
2513         default:
2514                 (void) zpool_standard_error(hdl, errno, msg);
2515         }
2516
2517         return (-1);
2518 }
2519
2520 /*
2521  * Detach the specified device.
2522  */
2523 int
2524 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2525 {
2526         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2527         char msg[1024];
2528         nvlist_t *tgt;
2529         boolean_t avail_spare, l2cache;
2530         libzfs_handle_t *hdl = zhp->zpool_hdl;
2531
2532         (void) snprintf(msg, sizeof (msg),
2533             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2534
2535         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2536         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2537             NULL)) == 0)
2538                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2539
2540         if (avail_spare)
2541                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2542
2543         if (l2cache)
2544                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2545
2546         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2547
2548         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2549                 return (0);
2550
2551         switch (errno) {
2552
2553         case ENOTSUP:
2554                 /*
2555                  * Can't detach from this type of vdev.
2556                  */
2557                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2558                     "applicable to mirror and replacing vdevs"));
2559                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2560                 break;
2561
2562         case EBUSY:
2563                 /*
2564                  * There are no other replicas of this device.
2565                  */
2566                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2567                 break;
2568
2569         default:
2570                 (void) zpool_standard_error(hdl, errno, msg);
2571         }
2572
2573         return (-1);
2574 }
2575
2576 /*
2577  * Find a mirror vdev in the source nvlist.
2578  *
2579  * The mchild array contains a list of disks in one of the top-level mirrors
2580  * of the source pool.  The schild array contains a list of disks that the
2581  * user specified on the command line.  We loop over the mchild array to
2582  * see if any entry in the schild array matches.
2583  *
2584  * If a disk in the mchild array is found in the schild array, we return
2585  * the index of that entry.  Otherwise we return -1.
2586  */
2587 static int
2588 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2589     nvlist_t **schild, uint_t schildren)
2590 {
2591         uint_t mc;
2592
2593         for (mc = 0; mc < mchildren; mc++) {
2594                 uint_t sc;
2595                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2596                     mchild[mc], B_FALSE);
2597
2598                 for (sc = 0; sc < schildren; sc++) {
2599                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2600                             schild[sc], B_FALSE);
2601                         boolean_t result = (strcmp(mpath, spath) == 0);
2602
2603                         free(spath);
2604                         if (result) {
2605                                 free(mpath);
2606                                 return (mc);
2607                         }
2608                 }
2609
2610                 free(mpath);
2611         }
2612
2613         return (-1);
2614 }
2615
2616 /*
2617  * Split a mirror pool.  If newroot points to null, then a new nvlist
2618  * is generated and it is the responsibility of the caller to free it.
2619  */
2620 int
2621 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2622     nvlist_t *props, splitflags_t flags)
2623 {
2624         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2625         char msg[1024];
2626         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2627         nvlist_t **varray = NULL, *zc_props = NULL;
2628         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2629         libzfs_handle_t *hdl = zhp->zpool_hdl;
2630         uint64_t vers;
2631         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2632         int retval = 0;
2633
2634         (void) snprintf(msg, sizeof (msg),
2635             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2636
2637         if (!zpool_name_valid(hdl, B_FALSE, newname))
2638                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2639
2640         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2641                 (void) fprintf(stderr, gettext("Internal error: unable to "
2642                     "retrieve pool configuration\n"));
2643                 return (-1);
2644         }
2645
2646         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2647             == 0);
2648         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2649
2650         if (props) {
2651                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2652                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2653                     props, vers, flags, msg)) == NULL)
2654                         return (-1);
2655         }
2656
2657         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2658             &children) != 0) {
2659                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2660                     "Source pool is missing vdev tree"));
2661                 if (zc_props)
2662                         nvlist_free(zc_props);
2663                 return (-1);
2664         }
2665
2666         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2667         vcount = 0;
2668
2669         if (*newroot == NULL ||
2670             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2671             &newchild, &newchildren) != 0)
2672                 newchildren = 0;
2673
2674         for (c = 0; c < children; c++) {
2675                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2676                 char *type;
2677                 nvlist_t **mchild, *vdev;
2678                 uint_t mchildren;
2679                 int entry;
2680
2681                 /*
2682                  * Unlike cache & spares, slogs are stored in the
2683                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2684                  */
2685                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2686                     &is_log);
2687                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2688                     &is_hole);
2689                 if (is_log || is_hole) {
2690                         /*
2691                          * Create a hole vdev and put it in the config.
2692                          */
2693                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2694                                 goto out;
2695                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2696                             VDEV_TYPE_HOLE) != 0)
2697                                 goto out;
2698                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2699                             1) != 0)
2700                                 goto out;
2701                         if (lastlog == 0)
2702                                 lastlog = vcount;
2703                         varray[vcount++] = vdev;
2704                         continue;
2705                 }
2706                 lastlog = 0;
2707                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2708                     == 0);
2709                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2710                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2711                             "Source pool must be composed only of mirrors\n"));
2712                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2713                         goto out;
2714                 }
2715
2716                 verify(nvlist_lookup_nvlist_array(child[c],
2717                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2718
2719                 /* find or add an entry for this top-level vdev */
2720                 if (newchildren > 0 &&
2721                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2722                     newchild, newchildren)) >= 0) {
2723                         /* We found a disk that the user specified. */
2724                         vdev = mchild[entry];
2725                         ++found;
2726                 } else {
2727                         /* User didn't specify a disk for this vdev. */
2728                         vdev = mchild[mchildren - 1];
2729                 }
2730
2731                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2732                         goto out;
2733         }
2734
2735         /* did we find every disk the user specified? */
2736         if (found != newchildren) {
2737                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2738                     "include at most one disk from each mirror"));
2739                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2740                 goto out;
2741         }
2742
2743         /* Prepare the nvlist for populating. */
2744         if (*newroot == NULL) {
2745                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2746                         goto out;
2747                 freelist = B_TRUE;
2748                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2749                     VDEV_TYPE_ROOT) != 0)
2750                         goto out;
2751         } else {
2752                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2753         }
2754
2755         /* Add all the children we found */
2756         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2757             lastlog == 0 ? vcount : lastlog) != 0)
2758                 goto out;
2759
2760         /*
2761          * If we're just doing a dry run, exit now with success.
2762          */
2763         if (flags.dryrun) {
2764                 memory_err = B_FALSE;
2765                 freelist = B_FALSE;
2766                 goto out;
2767         }
2768
2769         /* now build up the config list & call the ioctl */
2770         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
2771                 goto out;
2772
2773         if (nvlist_add_nvlist(newconfig,
2774             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
2775             nvlist_add_string(newconfig,
2776             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
2777             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
2778                 goto out;
2779
2780         /*
2781          * The new pool is automatically part of the namespace unless we
2782          * explicitly export it.
2783          */
2784         if (!flags.import)
2785                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
2786         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2787         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
2788         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
2789                 goto out;
2790         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
2791                 goto out;
2792
2793         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
2794                 retval = zpool_standard_error(hdl, errno, msg);
2795                 goto out;
2796         }
2797
2798         freelist = B_FALSE;
2799         memory_err = B_FALSE;
2800
2801 out:
2802         if (varray != NULL) {
2803                 int v;
2804
2805                 for (v = 0; v < vcount; v++)
2806                         nvlist_free(varray[v]);
2807                 free(varray);
2808         }
2809         zcmd_free_nvlists(&zc);
2810         if (zc_props)
2811                 nvlist_free(zc_props);
2812         if (newconfig)
2813                 nvlist_free(newconfig);
2814         if (freelist) {
2815                 nvlist_free(*newroot);
2816                 *newroot = NULL;
2817         }
2818
2819         if (retval != 0)
2820                 return (retval);
2821
2822         if (memory_err)
2823                 return (no_memory(hdl));
2824
2825         return (0);
2826 }
2827
2828 /*
2829  * Remove the given device.  Currently, this is supported only for hot spares
2830  * and level 2 cache devices.
2831  */
2832 int
2833 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2834 {
2835         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2836         char msg[1024];
2837         nvlist_t *tgt;
2838         boolean_t avail_spare, l2cache, islog;
2839         libzfs_handle_t *hdl = zhp->zpool_hdl;
2840         uint64_t version;
2841
2842         (void) snprintf(msg, sizeof (msg),
2843             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2844
2845         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2846         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2847             &islog)) == 0)
2848                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2849         /*
2850          * XXX - this should just go away.
2851          */
2852         if (!avail_spare && !l2cache && !islog) {
2853                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2854                     "only inactive hot spares, cache, top-level, "
2855                     "or log devices can be removed"));
2856                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2857         }
2858
2859         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
2860         if (islog && version < SPA_VERSION_HOLES) {
2861                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2862                     "pool must be upgrade to support log removal"));
2863                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
2864         }
2865
2866         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2867
2868         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2869                 return (0);
2870
2871         return (zpool_standard_error(hdl, errno, msg));
2872 }
2873
2874 /*
2875  * Clear the errors for the pool, or the particular device if specified.
2876  */
2877 int
2878 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
2879 {
2880         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2881         char msg[1024];
2882         nvlist_t *tgt;
2883         zpool_rewind_policy_t policy;
2884         boolean_t avail_spare, l2cache;
2885         libzfs_handle_t *hdl = zhp->zpool_hdl;
2886         nvlist_t *nvi = NULL;
2887         int error;
2888
2889         if (path)
2890                 (void) snprintf(msg, sizeof (msg),
2891                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2892                     path);
2893         else
2894                 (void) snprintf(msg, sizeof (msg),
2895                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2896                     zhp->zpool_name);
2897
2898         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2899         if (path) {
2900                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2901                     &l2cache, NULL)) == 0)
2902                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
2903
2904                 /*
2905                  * Don't allow error clearing for hot spares.  Do allow
2906                  * error clearing for l2cache devices.
2907                  */
2908                 if (avail_spare)
2909                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
2910
2911                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2912                     &zc.zc_guid) == 0);
2913         }
2914
2915         zpool_get_rewind_policy(rewindnvl, &policy);
2916         zc.zc_cookie = policy.zrp_request;
2917
2918         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
2919                 return (-1);
2920
2921         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
2922                 return (-1);
2923
2924         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
2925             errno == ENOMEM) {
2926                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
2927                         zcmd_free_nvlists(&zc);
2928                         return (-1);
2929                 }
2930         }
2931
2932         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
2933             errno != EPERM && errno != EACCES)) {
2934                 if (policy.zrp_request &
2935                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2936                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
2937                         zpool_rewind_exclaim(hdl, zc.zc_name,
2938                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
2939                             nvi);
2940                         nvlist_free(nvi);
2941                 }
2942                 zcmd_free_nvlists(&zc);
2943                 return (0);
2944         }
2945
2946         zcmd_free_nvlists(&zc);
2947         return (zpool_standard_error(hdl, errno, msg));
2948 }
2949
2950 /*
2951  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2952  */
2953 int
2954 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2955 {
2956         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2957         char msg[1024];
2958         libzfs_handle_t *hdl = zhp->zpool_hdl;
2959
2960         (void) snprintf(msg, sizeof (msg),
2961             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2962            (u_longlong_t)guid);
2963
2964         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2965         zc.zc_guid = guid;
2966         zc.zc_cookie = ZPOOL_NO_REWIND;
2967
2968         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2969                 return (0);
2970
2971         return (zpool_standard_error(hdl, errno, msg));
2972 }
2973
2974 /*
2975  * Convert from a devid string to a path.
2976  */
2977 static char *
2978 devid_to_path(char *devid_str)
2979 {
2980         ddi_devid_t devid;
2981         char *minor;
2982         char *path;
2983         devid_nmlist_t *list = NULL;
2984         int ret;
2985
2986         if (devid_str_decode(devid_str, &devid, &minor) != 0)
2987                 return (NULL);
2988
2989         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2990
2991         devid_str_free(minor);
2992         devid_free(devid);
2993
2994         if (ret != 0)
2995                 return (NULL);
2996
2997         if ((path = strdup(list[0].devname)) == NULL)
2998                 return (NULL);
2999
3000         devid_free_nmlist(list);
3001
3002         return (path);
3003 }
3004
3005 /*
3006  * Convert from a path to a devid string.
3007  */
3008 static char *
3009 path_to_devid(const char *path)
3010 {
3011         int fd;
3012         ddi_devid_t devid;
3013         char *minor, *ret;
3014
3015         if ((fd = open(path, O_RDONLY)) < 0)
3016                 return (NULL);
3017
3018         minor = NULL;
3019         ret = NULL;
3020         if (devid_get(fd, &devid) == 0) {
3021                 if (devid_get_minor_name(fd, &minor) == 0)
3022                         ret = devid_str_encode(devid, minor);
3023                 if (minor != NULL)
3024                         devid_str_free(minor);
3025                 devid_free(devid);
3026         }
3027         (void) close(fd);
3028
3029         return (ret);
3030 }
3031
3032 /*
3033  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3034  * ignore any failure here, since a common case is for an unprivileged user to
3035  * type 'zpool status', and we'll display the correct information anyway.
3036  */
3037 static void
3038 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3039 {
3040         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3041
3042         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3043         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3044         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3045             &zc.zc_guid) == 0);
3046
3047         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3048 }
3049
3050 /*
3051  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3052  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3053  * We also check if this is a whole disk, in which case we strip off the
3054  * trailing 's0' slice name.
3055  *
3056  * This routine is also responsible for identifying when disks have been
3057  * reconfigured in a new location.  The kernel will have opened the device by
3058  * devid, but the path will still refer to the old location.  To catch this, we
3059  * first do a path -> devid translation (which is fast for the common case).  If
3060  * the devid matches, we're done.  If not, we do a reverse devid -> path
3061  * translation and issue the appropriate ioctl() to update the path of the vdev.
3062  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3063  * of these checks.
3064  */
3065 char *
3066 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3067     boolean_t verbose)
3068 {
3069         char *path, *devid, *type;
3070         uint64_t value;
3071         char buf[64];
3072         vdev_stat_t *vs;
3073         uint_t vsc;
3074
3075         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3076             &value) == 0) {
3077                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3078                     &value) == 0);
3079                 (void) snprintf(buf, sizeof (buf), "%llu",
3080                     (u_longlong_t)value);
3081                 path = buf;
3082         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3083                 /*
3084                  * If the device is dead (faulted, offline, etc) then don't
3085                  * bother opening it.  Otherwise we may be forcing the user to
3086                  * open a misbehaving device, which can have undesirable
3087                  * effects.
3088                  */
3089                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3090                     (uint64_t **)&vs, &vsc) != 0 ||
3091                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3092                     zhp != NULL &&
3093                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3094                         /*
3095                          * Determine if the current path is correct.
3096                          */
3097                         char *newdevid = path_to_devid(path);
3098
3099                         if (newdevid == NULL ||
3100                             strcmp(devid, newdevid) != 0) {
3101                                 char *newpath;
3102
3103                                 if ((newpath = devid_to_path(devid)) != NULL) {
3104                                         /*
3105                                          * Update the path appropriately.
3106                                          */
3107                                         set_path(zhp, nv, newpath);
3108                                         if (nvlist_add_string(nv,
3109                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3110                                                 verify(nvlist_lookup_string(nv,
3111                                                     ZPOOL_CONFIG_PATH,
3112                                                     &path) == 0);
3113                                         free(newpath);
3114                                 }
3115                         }
3116
3117                         if (newdevid)
3118                                 devid_str_free(newdevid);
3119                 }
3120
3121                 /*
3122                  * For a block device only use the name.
3123                  */
3124                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3125                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3126                         path = strrchr(path, '/');
3127                         path++;
3128                 }
3129
3130 #if defined(__sun__) || defined(__sun)
3131                 /*
3132                  * The following code strips the slice from the device path.
3133                  */
3134                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3135                     &value) == 0 && value) {
3136                         int pathlen = strlen(path);
3137                         char *tmp = zfs_strdup(hdl, path);
3138
3139                         /*
3140                          * If it starts with c#, and ends with "s0", chop
3141                          * the "s0" off, or if it ends with "s0/old", remove
3142                          * the "s0" from the middle.
3143                          */
3144                         if (CTD_CHECK(tmp)) {
3145                                 if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
3146                                         tmp[pathlen - 2] = '\0';
3147                                 } else if (pathlen > 6 &&
3148                                     strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
3149                                         (void) strcpy(&tmp[pathlen - 6],
3150                                             "/old");
3151                                 }
3152                         }
3153                         return (tmp);
3154                 }
3155 #endif
3156         } else {
3157                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3158
3159                 /*
3160                  * If it's a raidz device, we need to stick in the parity level.
3161                  */
3162                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3163                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3164                             &value) == 0);
3165                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3166                             (u_longlong_t)value);
3167                         path = buf;
3168                 }
3169
3170                 /*
3171                  * We identify each top-level vdev by using a <type-id>
3172                  * naming convention.
3173                  */
3174                 if (verbose) {
3175                         uint64_t id;
3176
3177                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3178                             &id) == 0);
3179                         (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3180                             (u_longlong_t)id);
3181                         path = buf;
3182                 }
3183         }
3184
3185         return (zfs_strdup(hdl, path));
3186 }
3187
3188 static int
3189 zbookmark_compare(const void *a, const void *b)
3190 {
3191         return (memcmp(a, b, sizeof (zbookmark_t)));
3192 }
3193
3194 /*
3195  * Retrieve the persistent error log, uniquify the members, and return to the
3196  * caller.
3197  */
3198 int
3199 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3200 {
3201         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3202         uint64_t count;
3203         zbookmark_t *zb = NULL;
3204         int i;
3205
3206         /*
3207          * Retrieve the raw error list from the kernel.  If the number of errors
3208          * has increased, allocate more space and continue until we get the
3209          * entire list.
3210          */
3211         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3212             &count) == 0);
3213         if (count == 0)
3214                 return (0);
3215         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3216             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3217                 return (-1);
3218         zc.zc_nvlist_dst_size = count;
3219         (void) strcpy(zc.zc_name, zhp->zpool_name);
3220         for (;;) {
3221                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3222                     &zc) != 0) {
3223                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3224                         if (errno == ENOMEM) {
3225                                 count = zc.zc_nvlist_dst_size;
3226                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3227                                     zfs_alloc(zhp->zpool_hdl, count *
3228                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3229                                         return (-1);
3230                         } else {
3231                                 return (-1);
3232                         }
3233                 } else {
3234                         break;
3235                 }
3236         }
3237
3238         /*
3239          * Sort the resulting bookmarks.  This is a little confusing due to the
3240          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3241          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3242          * _not_ copied as part of the process.  So we point the start of our
3243          * array appropriate and decrement the total number of elements.
3244          */
3245         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3246             zc.zc_nvlist_dst_size;
3247         count -= zc.zc_nvlist_dst_size;
3248
3249         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3250
3251         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3252
3253         /*
3254          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3255          */
3256         for (i = 0; i < count; i++) {
3257                 nvlist_t *nv;
3258
3259                 /* ignoring zb_blkid and zb_level for now */
3260                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3261                     zb[i-1].zb_object == zb[i].zb_object)
3262                         continue;
3263
3264                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3265                         goto nomem;
3266                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3267                     zb[i].zb_objset) != 0) {
3268                         nvlist_free(nv);
3269                         goto nomem;
3270                 }
3271                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3272                     zb[i].zb_object) != 0) {
3273                         nvlist_free(nv);
3274                         goto nomem;
3275                 }
3276                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3277                         nvlist_free(nv);
3278                         goto nomem;
3279                 }
3280                 nvlist_free(nv);
3281         }
3282
3283         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3284         return (0);
3285
3286 nomem:
3287         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3288         return (no_memory(zhp->zpool_hdl));
3289 }
3290
3291 /*
3292  * Upgrade a ZFS pool to the latest on-disk version.
3293  */
3294 int
3295 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3296 {
3297         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3298         libzfs_handle_t *hdl = zhp->zpool_hdl;
3299
3300         (void) strcpy(zc.zc_name, zhp->zpool_name);
3301         zc.zc_cookie = new_version;
3302
3303         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3304                 return (zpool_standard_error_fmt(hdl, errno,
3305                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3306                     zhp->zpool_name));
3307         return (0);
3308 }
3309
3310 void
3311 zpool_set_history_str(const char *subcommand, int argc, char **argv,
3312     char *history_str)
3313 {
3314         int i;
3315
3316         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3317         for (i = 1; i < argc; i++) {
3318                 if (strlen(history_str) + 1 + strlen(argv[i]) >
3319                     HIS_MAX_RECORD_LEN)
3320                         break;
3321                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3322                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3323         }
3324 }
3325
3326 /*
3327  * Stage command history for logging.
3328  */
3329 int
3330 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3331 {
3332         if (history_str == NULL)
3333                 return (EINVAL);
3334
3335         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3336                 return (EINVAL);
3337
3338         if (hdl->libzfs_log_str != NULL)
3339                 free(hdl->libzfs_log_str);
3340
3341         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3342                 return (no_memory(hdl));
3343
3344         return (0);
3345 }
3346
3347 /*
3348  * Perform ioctl to get some command history of a pool.
3349  *
3350  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3351  * logical offset of the history buffer to start reading from.
3352  *
3353  * Upon return, 'off' is the next logical offset to read from and
3354  * 'len' is the actual amount of bytes read into 'buf'.
3355  */
3356 static int
3357 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3358 {
3359         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3360         libzfs_handle_t *hdl = zhp->zpool_hdl;
3361
3362         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3363
3364         zc.zc_history = (uint64_t)(uintptr_t)buf;
3365         zc.zc_history_len = *len;
3366         zc.zc_history_offset = *off;
3367
3368         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3369                 switch (errno) {
3370                 case EPERM:
3371                         return (zfs_error_fmt(hdl, EZFS_PERM,
3372                             dgettext(TEXT_DOMAIN,
3373                             "cannot show history for pool '%s'"),
3374                             zhp->zpool_name));
3375                 case ENOENT:
3376                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3377                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3378                             "'%s'"), zhp->zpool_name));
3379                 case ENOTSUP:
3380                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3381                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3382                             "'%s', pool must be upgraded"), zhp->zpool_name));
3383                 default:
3384                         return (zpool_standard_error_fmt(hdl, errno,
3385                             dgettext(TEXT_DOMAIN,
3386                             "cannot get history for '%s'"), zhp->zpool_name));
3387                 }
3388         }
3389
3390         *len = zc.zc_history_len;
3391         *off = zc.zc_history_offset;
3392
3393         return (0);
3394 }
3395
3396 /*
3397  * Process the buffer of nvlists, unpacking and storing each nvlist record
3398  * into 'records'.  'leftover' is set to the number of bytes that weren't
3399  * processed as there wasn't a complete record.
3400  */
3401 int
3402 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3403     nvlist_t ***records, uint_t *numrecords)
3404 {
3405         uint64_t reclen;
3406         nvlist_t *nv;
3407         int i;
3408
3409         while (bytes_read > sizeof (reclen)) {
3410
3411                 /* get length of packed record (stored as little endian) */
3412                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3413                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3414
3415                 if (bytes_read < sizeof (reclen) + reclen)
3416                         break;
3417
3418                 /* unpack record */
3419                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3420                         return (ENOMEM);
3421                 bytes_read -= sizeof (reclen) + reclen;
3422                 buf += sizeof (reclen) + reclen;
3423
3424                 /* add record to nvlist array */
3425                 (*numrecords)++;
3426                 if (ISP2(*numrecords + 1)) {
3427                         *records = realloc(*records,
3428                             *numrecords * 2 * sizeof (nvlist_t *));
3429                 }
3430                 (*records)[*numrecords - 1] = nv;
3431         }
3432
3433         *leftover = bytes_read;
3434         return (0);
3435 }
3436
3437 #define HIS_BUF_LEN     (128*1024)
3438
3439 /*
3440  * Retrieve the command history of a pool.
3441  */
3442 int
3443 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3444 {
3445         char buf[HIS_BUF_LEN];
3446         uint64_t off = 0;
3447         nvlist_t **records = NULL;
3448         uint_t numrecords = 0;
3449         int err, i;
3450
3451         do {
3452                 uint64_t bytes_read = sizeof (buf);
3453                 uint64_t leftover;
3454
3455                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3456                         break;
3457
3458                 /* if nothing else was read in, we're at EOF, just return */
3459                 if (!bytes_read)
3460                         break;
3461
3462                 if ((err = zpool_history_unpack(buf, bytes_read,
3463                     &leftover, &records, &numrecords)) != 0)
3464                         break;
3465                 off -= leftover;
3466
3467                 /* CONSTCOND */
3468         } while (1);
3469
3470         if (!err) {
3471                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3472                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3473                     records, numrecords) == 0);
3474         }
3475         for (i = 0; i < numrecords; i++)
3476                 nvlist_free(records[i]);
3477         free(records);
3478
3479         return (err);
3480 }
3481
3482 /*
3483  * Retrieve the next event.  If there is a new event available 'nvp' will
3484  * contain a newly allocated nvlist and 'dropped' will be set to the number
3485  * of missed events since the last call to this function.  When 'nvp' is
3486  * set to NULL it indicates no new events are available.  In either case
3487  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3488  * the case of a fatal error the function will return a non-zero value.
3489  * When the function is called in blocking mode it will not return until
3490  * a new event is available.
3491  */
3492 int
3493 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3494     int *dropped, int block, int cleanup_fd)
3495 {
3496         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3497         int error = 0;
3498
3499         *nvp = NULL;
3500         *dropped = 0;
3501         zc.zc_cleanup_fd = cleanup_fd;
3502
3503         if (!block)
3504                 zc.zc_guid = ZEVENT_NONBLOCK;
3505
3506         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3507                 return (-1);
3508
3509 retry:
3510         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3511                 switch (errno) {
3512                 case ESHUTDOWN:
3513                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3514                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3515                         goto out;
3516                 case ENOENT:
3517                         /* Blocking error case should not occur */
3518                         if (block)
3519                                 error = zpool_standard_error_fmt(hdl, errno,
3520                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3521
3522                         goto out;
3523                 case ENOMEM:
3524                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3525                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3526                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3527                                 goto out;
3528                         } else {
3529                                 goto retry;
3530                         }
3531                 default:
3532                         error = zpool_standard_error_fmt(hdl, errno,
3533                             dgettext(TEXT_DOMAIN, "cannot get event"));
3534                         goto out;
3535                 }
3536         }
3537
3538         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3539         if (error != 0)
3540                 goto out;
3541
3542         *dropped = (int)zc.zc_cookie;
3543 out:
3544         zcmd_free_nvlists(&zc);
3545
3546         return (error);
3547 }
3548
3549 /*
3550  * Clear all events.
3551  */
3552 int
3553 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3554 {
3555         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3556         char msg[1024];
3557
3558         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3559             "cannot clear events"));
3560
3561         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3562                 return (zpool_standard_error_fmt(hdl, errno, msg));
3563
3564         if (count != NULL)
3565                 *count = (int)zc.zc_cookie; /* # of events cleared */
3566
3567         return (0);
3568 }
3569
3570 void
3571 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3572     char *pathname, size_t len)
3573 {
3574         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3575         boolean_t mounted = B_FALSE;
3576         char *mntpnt = NULL;
3577         char dsname[MAXNAMELEN];
3578
3579         if (dsobj == 0) {
3580                 /* special case for the MOS */
3581                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
3582                 return;
3583         }
3584
3585         /* get the dataset's name */
3586         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3587         zc.zc_obj = dsobj;
3588         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3589             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3590                 /* just write out a path of two object numbers */
3591                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3592                     (longlong_t)dsobj, (longlong_t)obj);
3593                 return;
3594         }
3595         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3596
3597         /* find out if the dataset is mounted */
3598         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3599
3600         /* get the corrupted object's path */
3601         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3602         zc.zc_obj = obj;
3603         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3604             &zc) == 0) {
3605                 if (mounted) {
3606                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3607                             zc.zc_value);
3608                 } else {
3609                         (void) snprintf(pathname, len, "%s:%s",
3610                             dsname, zc.zc_value);
3611                 }
3612         } else {
3613                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
3614         }
3615         free(mntpnt);
3616 }
3617
3618 /*
3619  * Read the EFI label from the config, if a label does not exist then
3620  * pass back the error to the caller. If the caller has passed a non-NULL
3621  * diskaddr argument then we set it to the starting address of the EFI
3622  * partition.
3623  */
3624 static int
3625 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3626 {
3627         char *path;
3628         int fd;
3629         char diskname[MAXPATHLEN];
3630         int err = -1;
3631
3632         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3633                 return (err);
3634
3635         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3636             strrchr(path, '/'));
3637         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3638                 struct dk_gpt *vtoc;
3639
3640                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3641                         if (sb != NULL)
3642                                 *sb = vtoc->efi_parts[0].p_start;
3643                         efi_free(vtoc);
3644                 }
3645                 (void) close(fd);
3646         }
3647         return (err);
3648 }
3649
3650 /*
3651  * determine where a partition starts on a disk in the current
3652  * configuration
3653  */
3654 static diskaddr_t
3655 find_start_block(nvlist_t *config)
3656 {
3657         nvlist_t **child;
3658         uint_t c, children;
3659         diskaddr_t sb = MAXOFFSET_T;
3660         uint64_t wholedisk;
3661
3662         if (nvlist_lookup_nvlist_array(config,
3663             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3664                 if (nvlist_lookup_uint64(config,
3665                     ZPOOL_CONFIG_WHOLE_DISK,
3666                     &wholedisk) != 0 || !wholedisk) {
3667                         return (MAXOFFSET_T);
3668                 }
3669                 if (read_efi_label(config, &sb) < 0)
3670                         sb = MAXOFFSET_T;
3671                 return (sb);
3672         }
3673
3674         for (c = 0; c < children; c++) {
3675                 sb = find_start_block(child[c]);
3676                 if (sb != MAXOFFSET_T) {
3677                         return (sb);
3678                 }
3679         }
3680         return (MAXOFFSET_T);
3681 }
3682
3683 int
3684 zpool_label_disk_wait(char *path, int timeout)
3685 {
3686         struct stat64 statbuf;
3687         int i;
3688
3689         /*
3690          * Wait timeout miliseconds for a newly created device to be available
3691          * from the given path.  There is a small window when a /dev/ device
3692          * will exist and the udev link will not, so we must wait for the
3693          * symlink.  Depending on the udev rules this may take a few seconds.
3694          */
3695         for (i = 0; i < timeout; i++) {
3696                 usleep(1000);
3697
3698                 errno = 0;
3699                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
3700                         return (0);
3701         }
3702
3703         return (ENOENT);
3704 }
3705
3706 int
3707 zpool_label_disk_check(char *path)
3708 {
3709         struct dk_gpt *vtoc;
3710         int fd, err;
3711
3712         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
3713                 return errno;
3714
3715         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
3716                 (void) close(fd);
3717                 return err;
3718         }
3719
3720         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
3721                 efi_free(vtoc);
3722                 (void) close(fd);
3723                 return EIDRM;
3724         }
3725
3726         efi_free(vtoc);
3727         (void) close(fd);
3728         return 0;
3729 }
3730
3731 /*
3732  * Label an individual disk.  The name provided is the short name,
3733  * stripped of any leading /dev path.
3734  */
3735 int
3736 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
3737 {
3738         char path[MAXPATHLEN];
3739         struct dk_gpt *vtoc;
3740         int rval, fd;
3741         size_t resv = EFI_MIN_RESV_SIZE;
3742         uint64_t slice_size;
3743         diskaddr_t start_block;
3744         char errbuf[1024];
3745
3746         /* prepare an error message just in case */
3747         (void) snprintf(errbuf, sizeof (errbuf),
3748             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3749
3750         if (zhp) {
3751                 nvlist_t *nvroot;
3752
3753                 if (pool_is_bootable(zhp)) {
3754                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3755                             "EFI labeled devices are not supported on root "
3756                             "pools."));
3757                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
3758                 }
3759
3760                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
3761                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
3762
3763                 if (zhp->zpool_start_block == 0)
3764                         start_block = find_start_block(nvroot);
3765                 else
3766                         start_block = zhp->zpool_start_block;
3767                 zhp->zpool_start_block = start_block;
3768         } else {
3769                 /* new pool */
3770                 start_block = NEW_START_BLOCK;
3771         }
3772
3773         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
3774             BACKUP_SLICE);
3775
3776         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
3777                 /*
3778                  * This shouldn't happen.  We've long since verified that this
3779                  * is a valid device.
3780                  */
3781                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3782                     "unable to open device '%s': %d"), path, errno);
3783                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
3784         }
3785
3786         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
3787                 /*
3788                  * The only way this can fail is if we run out of memory, or we
3789                  * were unable to read the disk's capacity
3790                  */
3791                 if (errno == ENOMEM)
3792                         (void) no_memory(hdl);
3793
3794                 (void) close(fd);
3795                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3796                     "unable to read disk capacity"), name);
3797
3798                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
3799         }
3800
3801         slice_size = vtoc->efi_last_u_lba + 1;
3802         slice_size -= EFI_MIN_RESV_SIZE;
3803         if (start_block == MAXOFFSET_T)
3804                 start_block = NEW_START_BLOCK;
3805         slice_size -= start_block;
3806
3807         vtoc->efi_parts[0].p_start = start_block;
3808         vtoc->efi_parts[0].p_size = slice_size;
3809
3810         /*
3811          * Why we use V_USR: V_BACKUP confuses users, and is considered
3812          * disposable by some EFI utilities (since EFI doesn't have a backup
3813          * slice).  V_UNASSIGNED is supposed to be used only for zero size
3814          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
3815          * etc. were all pretty specific.  V_USR is as close to reality as we
3816          * can get, in the absence of V_OTHER.
3817          */
3818         vtoc->efi_parts[0].p_tag = V_USR;
3819         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
3820
3821         vtoc->efi_parts[8].p_start = slice_size + start_block;
3822         vtoc->efi_parts[8].p_size = resv;
3823         vtoc->efi_parts[8].p_tag = V_RESERVED;
3824
3825         if ((rval = efi_write(fd, vtoc)) != 0) {
3826                 /*
3827                  * Some block drivers (like pcata) may not support EFI
3828                  * GPT labels.  Print out a helpful error message dir-
3829                  * ecting the user to manually label the disk and give
3830                  * a specific slice.
3831                  */
3832                 (void) close(fd);
3833                 efi_free(vtoc);
3834
3835                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
3836                     "parted(8) and then provide a specific slice: %d"), rval);
3837                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3838         }
3839
3840         (void) close(fd);
3841         efi_free(vtoc);
3842
3843         /* Wait for the first expected slice to appear. */
3844         (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
3845             isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
3846         rval = zpool_label_disk_wait(path, 3000);
3847         if (rval) {
3848                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
3849                     "detect device partitions on '%s': %d"), path, rval);
3850                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3851         }
3852
3853         /* We can't be to paranoid.  Read the label back and verify it. */
3854         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
3855         rval = zpool_label_disk_check(path);
3856         if (rval) {
3857                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
3858                     "EFI label on '%s' is damaged.  Ensure\nthis device "
3859                     "is not in in use, and is functioning properly: %d"),
3860                     path, rval);
3861                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3862         }
3863
3864         return 0;
3865 }