9a0e41af074f307cd1324cc973f664111b9f7bd4
[zfs.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <ctype.h>
27 #include <errno.h>
28 #include <devid.h>
29 #include <fcntl.h>
30 #include <libintl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <sys/stat.h>
37 #include <sys/efi_partition.h>
38 #include <sys/vtoc.h>
39 #include <sys/zfs_ioctl.h>
40 #include <dlfcn.h>
41
42 #include "zfs_namecheck.h"
43 #include "zfs_prop.h"
44 #include "libzfs_impl.h"
45 #include "zfs_comutil.h"
46
47 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
48
49 typedef struct prop_flags {
50         int create:1;   /* Validate property on creation */
51         int import:1;   /* Validate property on import */
52 } prop_flags_t;
53
54 /*
55  * ====================================================================
56  *   zpool property functions
57  * ====================================================================
58  */
59
60 static int
61 zpool_get_all_props(zpool_handle_t *zhp)
62 {
63         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
64         libzfs_handle_t *hdl = zhp->zpool_hdl;
65
66         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
67
68         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
69                 return (-1);
70
71         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
72                 if (errno == ENOMEM) {
73                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
74                                 zcmd_free_nvlists(&zc);
75                                 return (-1);
76                         }
77                 } else {
78                         zcmd_free_nvlists(&zc);
79                         return (-1);
80                 }
81         }
82
83         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
84                 zcmd_free_nvlists(&zc);
85                 return (-1);
86         }
87
88         zcmd_free_nvlists(&zc);
89
90         return (0);
91 }
92
93 static int
94 zpool_props_refresh(zpool_handle_t *zhp)
95 {
96         nvlist_t *old_props;
97
98         old_props = zhp->zpool_props;
99
100         if (zpool_get_all_props(zhp) != 0)
101                 return (-1);
102
103         nvlist_free(old_props);
104         return (0);
105 }
106
107 static char *
108 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
109     zprop_source_t *src)
110 {
111         nvlist_t *nv, *nvl;
112         uint64_t ival;
113         char *value;
114         zprop_source_t source;
115
116         nvl = zhp->zpool_props;
117         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
118                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
119                 source = ival;
120                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
121         } else {
122                 source = ZPROP_SRC_DEFAULT;
123                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
124                         value = "-";
125         }
126
127         if (src)
128                 *src = source;
129
130         return (value);
131 }
132
133 uint64_t
134 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
135 {
136         nvlist_t *nv, *nvl;
137         uint64_t value;
138         zprop_source_t source;
139
140         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
141                 /*
142                  * zpool_get_all_props() has most likely failed because
143                  * the pool is faulted, but if all we need is the top level
144                  * vdev's guid then get it from the zhp config nvlist.
145                  */
146                 if ((prop == ZPOOL_PROP_GUID) &&
147                     (nvlist_lookup_nvlist(zhp->zpool_config,
148                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
149                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
150                     == 0)) {
151                         return (value);
152                 }
153                 return (zpool_prop_default_numeric(prop));
154         }
155
156         nvl = zhp->zpool_props;
157         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
158                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
159                 source = value;
160                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
161         } else {
162                 source = ZPROP_SRC_DEFAULT;
163                 value = zpool_prop_default_numeric(prop);
164         }
165
166         if (src)
167                 *src = source;
168
169         return (value);
170 }
171
172 /*
173  * Map VDEV STATE to printed strings.
174  */
175 char *
176 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
177 {
178         switch (state) {
179         default:
180                 break;
181         case VDEV_STATE_CLOSED:
182         case VDEV_STATE_OFFLINE:
183                 return (gettext("OFFLINE"));
184         case VDEV_STATE_REMOVED:
185                 return (gettext("REMOVED"));
186         case VDEV_STATE_CANT_OPEN:
187                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
188                         return (gettext("FAULTED"));
189                 else if (aux == VDEV_AUX_SPLIT_POOL)
190                         return (gettext("SPLIT"));
191                 else
192                         return (gettext("UNAVAIL"));
193         case VDEV_STATE_FAULTED:
194                 return (gettext("FAULTED"));
195         case VDEV_STATE_DEGRADED:
196                 return (gettext("DEGRADED"));
197         case VDEV_STATE_HEALTHY:
198                 return (gettext("ONLINE"));
199         }
200
201         return (gettext("UNKNOWN"));
202 }
203
204 /*
205  * Get a zpool property value for 'prop' and return the value in
206  * a pre-allocated buffer.
207  */
208 int
209 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
210     zprop_source_t *srctype)
211 {
212         uint64_t intval;
213         const char *strval;
214         zprop_source_t src = ZPROP_SRC_NONE;
215         nvlist_t *nvroot;
216         vdev_stat_t *vs;
217         uint_t vsc;
218
219         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
220                 switch (prop) {
221                 case ZPOOL_PROP_NAME:
222                         (void) strlcpy(buf, zpool_get_name(zhp), len);
223                         break;
224
225                 case ZPOOL_PROP_HEALTH:
226                         (void) strlcpy(buf, "FAULTED", len);
227                         break;
228
229                 case ZPOOL_PROP_GUID:
230                         intval = zpool_get_prop_int(zhp, prop, &src);
231                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
232                         break;
233
234                 case ZPOOL_PROP_ALTROOT:
235                 case ZPOOL_PROP_CACHEFILE:
236                         if (zhp->zpool_props != NULL ||
237                             zpool_get_all_props(zhp) == 0) {
238                                 (void) strlcpy(buf,
239                                     zpool_get_prop_string(zhp, prop, &src),
240                                     len);
241                                 if (srctype != NULL)
242                                         *srctype = src;
243                                 return (0);
244                         }
245                         /* FALLTHROUGH */
246                 default:
247                         (void) strlcpy(buf, "-", len);
248                         break;
249                 }
250
251                 if (srctype != NULL)
252                         *srctype = src;
253                 return (0);
254         }
255
256         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
257             prop != ZPOOL_PROP_NAME)
258                 return (-1);
259
260         switch (zpool_prop_get_type(prop)) {
261         case PROP_TYPE_STRING:
262                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
263                     len);
264                 break;
265
266         case PROP_TYPE_NUMBER:
267                 intval = zpool_get_prop_int(zhp, prop, &src);
268
269                 switch (prop) {
270                 case ZPOOL_PROP_SIZE:
271                 case ZPOOL_PROP_ALLOCATED:
272                 case ZPOOL_PROP_FREE:
273                         (void) zfs_nicenum(intval, buf, len);
274                         break;
275
276                 case ZPOOL_PROP_CAPACITY:
277                         (void) snprintf(buf, len, "%llu%%",
278                             (u_longlong_t)intval);
279                         break;
280
281                 case ZPOOL_PROP_DEDUPRATIO:
282                         (void) snprintf(buf, len, "%llu.%02llux",
283                             (u_longlong_t)(intval / 100),
284                             (u_longlong_t)(intval % 100));
285                         break;
286
287                 case ZPOOL_PROP_HEALTH:
288                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
289                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
290                         verify(nvlist_lookup_uint64_array(nvroot,
291                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
292                             == 0);
293
294                         (void) strlcpy(buf, zpool_state_to_name(intval,
295                             vs->vs_aux), len);
296                         break;
297                 default:
298                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
299                 }
300                 break;
301
302         case PROP_TYPE_INDEX:
303                 intval = zpool_get_prop_int(zhp, prop, &src);
304                 if (zpool_prop_index_to_string(prop, intval, &strval)
305                     != 0)
306                         return (-1);
307                 (void) strlcpy(buf, strval, len);
308                 break;
309
310         default:
311                 abort();
312         }
313
314         if (srctype)
315                 *srctype = src;
316
317         return (0);
318 }
319
320 /*
321  * Check if the bootfs name has the same pool name as it is set to.
322  * Assuming bootfs is a valid dataset name.
323  */
324 static boolean_t
325 bootfs_name_valid(const char *pool, char *bootfs)
326 {
327         int len = strlen(pool);
328
329         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
330                 return (B_FALSE);
331
332         if (strncmp(pool, bootfs, len) == 0 &&
333             (bootfs[len] == '/' || bootfs[len] == '\0'))
334                 return (B_TRUE);
335
336         return (B_FALSE);
337 }
338
339 /*
340  * Inspect the configuration to determine if any of the devices contain
341  * an EFI label.
342  */
343 static boolean_t
344 pool_uses_efi(nvlist_t *config)
345 {
346         nvlist_t **child;
347         uint_t c, children;
348
349         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
350             &child, &children) != 0)
351                 return (read_efi_label(config, NULL) >= 0);
352
353         for (c = 0; c < children; c++) {
354                 if (pool_uses_efi(child[c]))
355                         return (B_TRUE);
356         }
357         return (B_FALSE);
358 }
359
360 static boolean_t
361 pool_is_bootable(zpool_handle_t *zhp)
362 {
363         char bootfs[ZPOOL_MAXNAMELEN];
364
365         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
366             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
367             sizeof (bootfs)) != 0);
368 }
369
370
371 /*
372  * Given an nvlist of zpool properties to be set, validate that they are
373  * correct, and parse any numeric properties (index, boolean, etc) if they are
374  * specified as strings.
375  */
376 static nvlist_t *
377 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
378     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
379 {
380         nvpair_t *elem;
381         nvlist_t *retprops;
382         zpool_prop_t prop;
383         char *strval;
384         uint64_t intval;
385         char *slash;
386         struct stat64 statbuf;
387         zpool_handle_t *zhp;
388         nvlist_t *nvroot;
389
390         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
391                 (void) no_memory(hdl);
392                 return (NULL);
393         }
394
395         elem = NULL;
396         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
397                 const char *propname = nvpair_name(elem);
398
399                 /*
400                  * Make sure this property is valid and applies to this type.
401                  */
402                 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
403                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
404                             "invalid property '%s'"), propname);
405                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
406                         goto error;
407                 }
408
409                 if (zpool_prop_readonly(prop)) {
410                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
411                             "is readonly"), propname);
412                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
413                         goto error;
414                 }
415
416                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
417                     &strval, &intval, errbuf) != 0)
418                         goto error;
419
420                 /*
421                  * Perform additional checking for specific properties.
422                  */
423                 switch (prop) {
424                 default:
425                         break;
426                 case ZPOOL_PROP_VERSION:
427                         if (intval < version || intval > SPA_VERSION) {
428                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
429                                     "property '%s' number %d is invalid."),
430                                     propname, intval);
431                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
432                                 goto error;
433                         }
434                         break;
435
436                 case ZPOOL_PROP_BOOTFS:
437                         if (flags.create || flags.import) {
438                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
439                                     "property '%s' cannot be set at creation "
440                                     "or import time"), propname);
441                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
442                                 goto error;
443                         }
444
445                         if (version < SPA_VERSION_BOOTFS) {
446                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
447                                     "pool must be upgraded to support "
448                                     "'%s' property"), propname);
449                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
450                                 goto error;
451                         }
452
453                         /*
454                          * bootfs property value has to be a dataset name and
455                          * the dataset has to be in the same pool as it sets to.
456                          */
457                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
458                             strval)) {
459                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
460                                     "is an invalid name"), strval);
461                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
462                                 goto error;
463                         }
464
465                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
466                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
467                                     "could not open pool '%s'"), poolname);
468                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
469                                 goto error;
470                         }
471                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
472                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
473
474                         /*
475                          * bootfs property cannot be set on a disk which has
476                          * been EFI labeled.
477                          */
478                         if (pool_uses_efi(nvroot)) {
479                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
480                                     "property '%s' not supported on "
481                                     "EFI labeled devices"), propname);
482                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
483                                 zpool_close(zhp);
484                                 goto error;
485                         }
486                         zpool_close(zhp);
487                         break;
488
489                 case ZPOOL_PROP_ALTROOT:
490                         if (!flags.create && !flags.import) {
491                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
492                                     "property '%s' can only be set during pool "
493                                     "creation or import"), propname);
494                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
495                                 goto error;
496                         }
497
498                         if (strval[0] != '/') {
499                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
500                                     "bad alternate root '%s'"), strval);
501                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
502                                 goto error;
503                         }
504                         break;
505
506                 case ZPOOL_PROP_CACHEFILE:
507                         if (strval[0] == '\0')
508                                 break;
509
510                         if (strcmp(strval, "none") == 0)
511                                 break;
512
513                         if (strval[0] != '/') {
514                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
515                                     "property '%s' must be empty, an "
516                                     "absolute path, or 'none'"), propname);
517                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
518                                 goto error;
519                         }
520
521                         slash = strrchr(strval, '/');
522
523                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
524                             strcmp(slash, "/..") == 0) {
525                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
526                                     "'%s' is not a valid file"), strval);
527                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
528                                 goto error;
529                         }
530
531                         *slash = '\0';
532
533                         if (strval[0] != '\0' &&
534                             (stat64(strval, &statbuf) != 0 ||
535                             !S_ISDIR(statbuf.st_mode))) {
536                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
537                                     "'%s' is not a valid directory"),
538                                     strval);
539                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
540                                 goto error;
541                         }
542
543                         *slash = '/';
544                         break;
545
546                 case ZPOOL_PROP_READONLY:
547                         if (!flags.import) {
548                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
549                                     "property '%s' can only be set at "
550                                     "import time"), propname);
551                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
552                                 goto error;
553                         }
554                         break;
555                 }
556         }
557
558         return (retprops);
559 error:
560         nvlist_free(retprops);
561         return (NULL);
562 }
563
564 /*
565  * Set zpool property : propname=propval.
566  */
567 int
568 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
569 {
570         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
571         int ret = -1;
572         char errbuf[1024];
573         nvlist_t *nvl = NULL;
574         nvlist_t *realprops;
575         uint64_t version;
576         prop_flags_t flags = { 0 };
577
578         (void) snprintf(errbuf, sizeof (errbuf),
579             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
580             zhp->zpool_name);
581
582         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
583                 return (no_memory(zhp->zpool_hdl));
584
585         if (nvlist_add_string(nvl, propname, propval) != 0) {
586                 nvlist_free(nvl);
587                 return (no_memory(zhp->zpool_hdl));
588         }
589
590         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
591         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
592             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
593                 nvlist_free(nvl);
594                 return (-1);
595         }
596
597         nvlist_free(nvl);
598         nvl = realprops;
599
600         /*
601          * Execute the corresponding ioctl() to set this property.
602          */
603         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
604
605         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
606                 nvlist_free(nvl);
607                 return (-1);
608         }
609
610         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
611
612         zcmd_free_nvlists(&zc);
613         nvlist_free(nvl);
614
615         if (ret)
616                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
617         else
618                 (void) zpool_props_refresh(zhp);
619
620         return (ret);
621 }
622
623 int
624 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
625 {
626         libzfs_handle_t *hdl = zhp->zpool_hdl;
627         zprop_list_t *entry;
628         char buf[ZFS_MAXPROPLEN];
629
630         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
631                 return (-1);
632
633         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
634
635                 if (entry->pl_fixed)
636                         continue;
637
638                 if (entry->pl_prop != ZPROP_INVAL &&
639                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
640                     NULL) == 0) {
641                         if (strlen(buf) > entry->pl_width)
642                                 entry->pl_width = strlen(buf);
643                 }
644         }
645
646         return (0);
647 }
648
649
650 /*
651  * Don't start the slice at the default block of 34; many storage
652  * devices will use a stripe width of 128k, other vendors prefer a 1m
653  * alignment.  It is best to play it safe and ensure a 1m alignment
654  * give 512b blocks.  When the block size is larger by a power of 2
655  * we will still be 1m aligned.
656  */
657 #define NEW_START_BLOCK 2048
658
659 /*
660  * Validate the given pool name, optionally putting an extended error message in
661  * 'buf'.
662  */
663 boolean_t
664 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
665 {
666         namecheck_err_t why;
667         char what;
668         int ret;
669
670         ret = pool_namecheck(pool, &why, &what);
671
672         /*
673          * The rules for reserved pool names were extended at a later point.
674          * But we need to support users with existing pools that may now be
675          * invalid.  So we only check for this expanded set of names during a
676          * create (or import), and only in userland.
677          */
678         if (ret == 0 && !isopen &&
679             (strncmp(pool, "mirror", 6) == 0 ||
680             strncmp(pool, "raidz", 5) == 0 ||
681             strncmp(pool, "spare", 5) == 0 ||
682             strcmp(pool, "log") == 0)) {
683                 if (hdl != NULL)
684                         zfs_error_aux(hdl,
685                             dgettext(TEXT_DOMAIN, "name is reserved"));
686                 return (B_FALSE);
687         }
688
689
690         if (ret != 0) {
691                 if (hdl != NULL) {
692                         switch (why) {
693                         case NAME_ERR_TOOLONG:
694                                 zfs_error_aux(hdl,
695                                     dgettext(TEXT_DOMAIN, "name is too long"));
696                                 break;
697
698                         case NAME_ERR_INVALCHAR:
699                                 zfs_error_aux(hdl,
700                                     dgettext(TEXT_DOMAIN, "invalid character "
701                                     "'%c' in pool name"), what);
702                                 break;
703
704                         case NAME_ERR_NOLETTER:
705                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
706                                     "name must begin with a letter"));
707                                 break;
708
709                         case NAME_ERR_RESERVED:
710                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
711                                     "name is reserved"));
712                                 break;
713
714                         case NAME_ERR_DISKLIKE:
715                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
716                                     "pool name is reserved"));
717                                 break;
718
719                         case NAME_ERR_LEADING_SLASH:
720                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
721                                     "leading slash in name"));
722                                 break;
723
724                         case NAME_ERR_EMPTY_COMPONENT:
725                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
726                                     "empty component in name"));
727                                 break;
728
729                         case NAME_ERR_TRAILING_SLASH:
730                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
731                                     "trailing slash in name"));
732                                 break;
733
734                         case NAME_ERR_MULTIPLE_AT:
735                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
736                                     "multiple '@' delimiters in name"));
737                                 break;
738                         case NAME_ERR_NO_AT:
739                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
740                                     "permission set is missing '@'"));
741                                 break;
742                         }
743                 }
744                 return (B_FALSE);
745         }
746
747         return (B_TRUE);
748 }
749
750 /*
751  * Open a handle to the given pool, even if the pool is currently in the FAULTED
752  * state.
753  */
754 zpool_handle_t *
755 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
756 {
757         zpool_handle_t *zhp;
758         boolean_t missing;
759
760         /*
761          * Make sure the pool name is valid.
762          */
763         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
764                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
765                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
766                     pool);
767                 return (NULL);
768         }
769
770         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
771                 return (NULL);
772
773         zhp->zpool_hdl = hdl;
774         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
775
776         if (zpool_refresh_stats(zhp, &missing) != 0) {
777                 zpool_close(zhp);
778                 return (NULL);
779         }
780
781         if (missing) {
782                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
783                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
784                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
785                 zpool_close(zhp);
786                 return (NULL);
787         }
788
789         return (zhp);
790 }
791
792 /*
793  * Like the above, but silent on error.  Used when iterating over pools (because
794  * the configuration cache may be out of date).
795  */
796 int
797 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
798 {
799         zpool_handle_t *zhp;
800         boolean_t missing;
801
802         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
803                 return (-1);
804
805         zhp->zpool_hdl = hdl;
806         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
807
808         if (zpool_refresh_stats(zhp, &missing) != 0) {
809                 zpool_close(zhp);
810                 return (-1);
811         }
812
813         if (missing) {
814                 zpool_close(zhp);
815                 *ret = NULL;
816                 return (0);
817         }
818
819         *ret = zhp;
820         return (0);
821 }
822
823 /*
824  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
825  * state.
826  */
827 zpool_handle_t *
828 zpool_open(libzfs_handle_t *hdl, const char *pool)
829 {
830         zpool_handle_t *zhp;
831
832         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
833                 return (NULL);
834
835         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
836                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
837                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
838                 zpool_close(zhp);
839                 return (NULL);
840         }
841
842         return (zhp);
843 }
844
845 /*
846  * Close the handle.  Simply frees the memory associated with the handle.
847  */
848 void
849 zpool_close(zpool_handle_t *zhp)
850 {
851         if (zhp->zpool_config)
852                 nvlist_free(zhp->zpool_config);
853         if (zhp->zpool_old_config)
854                 nvlist_free(zhp->zpool_old_config);
855         if (zhp->zpool_props)
856                 nvlist_free(zhp->zpool_props);
857         free(zhp);
858 }
859
860 /*
861  * Return the name of the pool.
862  */
863 const char *
864 zpool_get_name(zpool_handle_t *zhp)
865 {
866         return (zhp->zpool_name);
867 }
868
869
870 /*
871  * Return the state of the pool (ACTIVE or UNAVAILABLE)
872  */
873 int
874 zpool_get_state(zpool_handle_t *zhp)
875 {
876         return (zhp->zpool_state);
877 }
878
879 /*
880  * Create the named pool, using the provided vdev list.  It is assumed
881  * that the consumer has already validated the contents of the nvlist, so we
882  * don't have to worry about error semantics.
883  */
884 int
885 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
886     nvlist_t *props, nvlist_t *fsprops)
887 {
888         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
889         nvlist_t *zc_fsprops = NULL;
890         nvlist_t *zc_props = NULL;
891         char msg[1024];
892         char *altroot;
893         int ret = -1;
894
895         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
896             "cannot create '%s'"), pool);
897
898         if (!zpool_name_valid(hdl, B_FALSE, pool))
899                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
900
901         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
902                 return (-1);
903
904         if (props) {
905                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
906
907                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
908                     SPA_VERSION_1, flags, msg)) == NULL) {
909                         goto create_failed;
910                 }
911         }
912
913         if (fsprops) {
914                 uint64_t zoned;
915                 char *zonestr;
916
917                 zoned = ((nvlist_lookup_string(fsprops,
918                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
919                     strcmp(zonestr, "on") == 0);
920
921                 if ((zc_fsprops = zfs_valid_proplist(hdl,
922                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
923                         goto create_failed;
924                 }
925                 if (!zc_props &&
926                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
927                         goto create_failed;
928                 }
929                 if (nvlist_add_nvlist(zc_props,
930                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
931                         goto create_failed;
932                 }
933         }
934
935         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
936                 goto create_failed;
937
938         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
939
940         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
941
942                 zcmd_free_nvlists(&zc);
943                 nvlist_free(zc_props);
944                 nvlist_free(zc_fsprops);
945
946                 switch (errno) {
947                 case EBUSY:
948                         /*
949                          * This can happen if the user has specified the same
950                          * device multiple times.  We can't reliably detect this
951                          * until we try to add it and see we already have a
952                          * label.  This can also happen under if the device is
953                          * part of an active md or lvm device.
954                          */
955                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
956                             "one or more vdevs refer to the same device, or one of\n"
957                             "the devices is part of an active md or lvm device"));
958                         return (zfs_error(hdl, EZFS_BADDEV, msg));
959
960                 case EOVERFLOW:
961                         /*
962                          * This occurs when one of the devices is below
963                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
964                          * device was the problem device since there's no
965                          * reliable way to determine device size from userland.
966                          */
967                         {
968                                 char buf[64];
969
970                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
971
972                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
973                                     "one or more devices is less than the "
974                                     "minimum size (%s)"), buf);
975                         }
976                         return (zfs_error(hdl, EZFS_BADDEV, msg));
977
978                 case ENOSPC:
979                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
980                             "one or more devices is out of space"));
981                         return (zfs_error(hdl, EZFS_BADDEV, msg));
982
983                 case ENOTBLK:
984                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
985                             "cache device must be a disk or disk slice"));
986                         return (zfs_error(hdl, EZFS_BADDEV, msg));
987
988                 default:
989                         return (zpool_standard_error(hdl, errno, msg));
990                 }
991         }
992
993         /*
994          * If this is an alternate root pool, then we automatically set the
995          * mountpoint of the root dataset to be '/'.
996          */
997         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
998             &altroot) == 0) {
999                 zfs_handle_t *zhp;
1000
1001                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
1002                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1003                     "/") == 0);
1004
1005                 zfs_close(zhp);
1006         }
1007
1008 create_failed:
1009         zcmd_free_nvlists(&zc);
1010         nvlist_free(zc_props);
1011         nvlist_free(zc_fsprops);
1012         return (ret);
1013 }
1014
1015 /*
1016  * Destroy the given pool.  It is up to the caller to ensure that there are no
1017  * datasets left in the pool.
1018  */
1019 int
1020 zpool_destroy(zpool_handle_t *zhp)
1021 {
1022         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1023         zfs_handle_t *zfp = NULL;
1024         libzfs_handle_t *hdl = zhp->zpool_hdl;
1025         char msg[1024];
1026
1027         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1028             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1029                 return (-1);
1030
1031         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1032
1033         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1034                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1035                     "cannot destroy '%s'"), zhp->zpool_name);
1036
1037                 if (errno == EROFS) {
1038                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1039                             "one or more devices is read only"));
1040                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1041                 } else {
1042                         (void) zpool_standard_error(hdl, errno, msg);
1043                 }
1044
1045                 if (zfp)
1046                         zfs_close(zfp);
1047                 return (-1);
1048         }
1049
1050         if (zfp) {
1051                 remove_mountpoint(zfp);
1052                 zfs_close(zfp);
1053         }
1054
1055         return (0);
1056 }
1057
1058 /*
1059  * Add the given vdevs to the pool.  The caller must have already performed the
1060  * necessary verification to ensure that the vdev specification is well-formed.
1061  */
1062 int
1063 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1064 {
1065         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1066         int ret;
1067         libzfs_handle_t *hdl = zhp->zpool_hdl;
1068         char msg[1024];
1069         nvlist_t **spares, **l2cache;
1070         uint_t nspares, nl2cache;
1071
1072         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1073             "cannot add to '%s'"), zhp->zpool_name);
1074
1075         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1076             SPA_VERSION_SPARES &&
1077             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1078             &spares, &nspares) == 0) {
1079                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1080                     "upgraded to add hot spares"));
1081                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1082         }
1083
1084         if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1085             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1086                 uint64_t s;
1087
1088                 for (s = 0; s < nspares; s++) {
1089                         char *path;
1090
1091                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1092                             &path) == 0 && pool_uses_efi(spares[s])) {
1093                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1094                                     "device '%s' contains an EFI label and "
1095                                     "cannot be used on root pools."),
1096                                     zpool_vdev_name(hdl, NULL, spares[s],
1097                                     B_FALSE));
1098                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1099                         }
1100                 }
1101         }
1102
1103         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1104             SPA_VERSION_L2CACHE &&
1105             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1106             &l2cache, &nl2cache) == 0) {
1107                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1108                     "upgraded to add cache devices"));
1109                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1110         }
1111
1112         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1113                 return (-1);
1114         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1115
1116         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1117                 switch (errno) {
1118                 case EBUSY:
1119                         /*
1120                          * This can happen if the user has specified the same
1121                          * device multiple times.  We can't reliably detect this
1122                          * until we try to add it and see we already have a
1123                          * label.
1124                          */
1125                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1126                             "one or more vdevs refer to the same device"));
1127                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1128                         break;
1129
1130                 case EOVERFLOW:
1131                         /*
1132                          * This occurrs when one of the devices is below
1133                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1134                          * device was the problem device since there's no
1135                          * reliable way to determine device size from userland.
1136                          */
1137                         {
1138                                 char buf[64];
1139
1140                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1141
1142                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1143                                     "device is less than the minimum "
1144                                     "size (%s)"), buf);
1145                         }
1146                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1147                         break;
1148
1149                 case ENOTSUP:
1150                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1151                             "pool must be upgraded to add these vdevs"));
1152                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1153                         break;
1154
1155                 case EDOM:
1156                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1157                             "root pool can not have multiple vdevs"
1158                             " or separate logs"));
1159                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1160                         break;
1161
1162                 case ENOTBLK:
1163                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1164                             "cache device must be a disk or disk slice"));
1165                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1166                         break;
1167
1168                 default:
1169                         (void) zpool_standard_error(hdl, errno, msg);
1170                 }
1171
1172                 ret = -1;
1173         } else {
1174                 ret = 0;
1175         }
1176
1177         zcmd_free_nvlists(&zc);
1178
1179         return (ret);
1180 }
1181
1182 /*
1183  * Exports the pool from the system.  The caller must ensure that there are no
1184  * mounted datasets in the pool.
1185  */
1186 int
1187 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1188 {
1189         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1190         char msg[1024];
1191
1192         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1193             "cannot export '%s'"), zhp->zpool_name);
1194
1195         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1196         zc.zc_cookie = force;
1197         zc.zc_guid = hardforce;
1198
1199         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1200                 switch (errno) {
1201                 case EXDEV:
1202                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1203                             "use '-f' to override the following errors:\n"
1204                             "'%s' has an active shared spare which could be"
1205                             " used by other pools once '%s' is exported."),
1206                             zhp->zpool_name, zhp->zpool_name);
1207                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1208                             msg));
1209                 default:
1210                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1211                             msg));
1212                 }
1213         }
1214
1215         return (0);
1216 }
1217
1218 int
1219 zpool_export(zpool_handle_t *zhp, boolean_t force)
1220 {
1221         return (zpool_export_common(zhp, force, B_FALSE));
1222 }
1223
1224 int
1225 zpool_export_force(zpool_handle_t *zhp)
1226 {
1227         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1228 }
1229
1230 static void
1231 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1232     nvlist_t *config)
1233 {
1234         nvlist_t *nv = NULL;
1235         uint64_t rewindto;
1236         int64_t loss = -1;
1237         struct tm t;
1238         char timestr[128];
1239
1240         if (!hdl->libzfs_printerr || config == NULL)
1241                 return;
1242
1243         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
1244                 return;
1245
1246         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1247                 return;
1248         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1249
1250         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1251             strftime(timestr, 128, "%c", &t) != 0) {
1252                 if (dryrun) {
1253                         (void) printf(dgettext(TEXT_DOMAIN,
1254                             "Would be able to return %s "
1255                             "to its state as of %s.\n"),
1256                             name, timestr);
1257                 } else {
1258                         (void) printf(dgettext(TEXT_DOMAIN,
1259                             "Pool %s returned to its state as of %s.\n"),
1260                             name, timestr);
1261                 }
1262                 if (loss > 120) {
1263                         (void) printf(dgettext(TEXT_DOMAIN,
1264                             "%s approximately %lld "),
1265                             dryrun ? "Would discard" : "Discarded",
1266                             ((longlong_t)loss + 30) / 60);
1267                         (void) printf(dgettext(TEXT_DOMAIN,
1268                             "minutes of transactions.\n"));
1269                 } else if (loss > 0) {
1270                         (void) printf(dgettext(TEXT_DOMAIN,
1271                             "%s approximately %lld "),
1272                             dryrun ? "Would discard" : "Discarded",
1273                             (longlong_t)loss);
1274                         (void) printf(dgettext(TEXT_DOMAIN,
1275                             "seconds of transactions.\n"));
1276                 }
1277         }
1278 }
1279
1280 void
1281 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1282     nvlist_t *config)
1283 {
1284         nvlist_t *nv = NULL;
1285         int64_t loss = -1;
1286         uint64_t edata = UINT64_MAX;
1287         uint64_t rewindto;
1288         struct tm t;
1289         char timestr[128];
1290
1291         if (!hdl->libzfs_printerr)
1292                 return;
1293
1294         if (reason >= 0)
1295                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1296         else
1297                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1298
1299         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1300         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1301             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1302                 goto no_info;
1303
1304         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1305         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1306             &edata);
1307
1308         (void) printf(dgettext(TEXT_DOMAIN,
1309             "Recovery is possible, but will result in some data loss.\n"));
1310
1311         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1312             strftime(timestr, 128, "%c", &t) != 0) {
1313                 (void) printf(dgettext(TEXT_DOMAIN,
1314                     "\tReturning the pool to its state as of %s\n"
1315                     "\tshould correct the problem.  "),
1316                     timestr);
1317         } else {
1318                 (void) printf(dgettext(TEXT_DOMAIN,
1319                     "\tReverting the pool to an earlier state "
1320                     "should correct the problem.\n\t"));
1321         }
1322
1323         if (loss > 120) {
1324                 (void) printf(dgettext(TEXT_DOMAIN,
1325                     "Approximately %lld minutes of data\n"
1326                     "\tmust be discarded, irreversibly.  "),
1327                     ((longlong_t)loss + 30) / 60);
1328         } else if (loss > 0) {
1329                 (void) printf(dgettext(TEXT_DOMAIN,
1330                     "Approximately %lld seconds of data\n"
1331                     "\tmust be discarded, irreversibly.  "),
1332                     (longlong_t)loss);
1333         }
1334         if (edata != 0 && edata != UINT64_MAX) {
1335                 if (edata == 1) {
1336                         (void) printf(dgettext(TEXT_DOMAIN,
1337                             "After rewind, at least\n"
1338                             "\tone persistent user-data error will remain.  "));
1339                 } else {
1340                         (void) printf(dgettext(TEXT_DOMAIN,
1341                             "After rewind, several\n"
1342                             "\tpersistent user-data errors will remain.  "));
1343                 }
1344         }
1345         (void) printf(dgettext(TEXT_DOMAIN,
1346             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1347             reason >= 0 ? "clear" : "import", name);
1348
1349         (void) printf(dgettext(TEXT_DOMAIN,
1350             "A scrub of the pool\n"
1351             "\tis strongly recommended after recovery.\n"));
1352         return;
1353
1354 no_info:
1355         (void) printf(dgettext(TEXT_DOMAIN,
1356             "Destroy and re-create the pool from\n\ta backup source.\n"));
1357 }
1358
1359 /*
1360  * zpool_import() is a contracted interface. Should be kept the same
1361  * if possible.
1362  *
1363  * Applications should use zpool_import_props() to import a pool with
1364  * new properties value to be set.
1365  */
1366 int
1367 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1368     char *altroot)
1369 {
1370         nvlist_t *props = NULL;
1371         int ret;
1372
1373         if (altroot != NULL) {
1374                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1375                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1376                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1377                             newname));
1378                 }
1379
1380                 if (nvlist_add_string(props,
1381                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1382                     nvlist_add_string(props,
1383                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1384                         nvlist_free(props);
1385                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1386                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1387                             newname));
1388                 }
1389         }
1390
1391         ret = zpool_import_props(hdl, config, newname, props,
1392             ZFS_IMPORT_NORMAL);
1393         if (props)
1394                 nvlist_free(props);
1395         return (ret);
1396 }
1397
1398 static void
1399 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1400     int indent)
1401 {
1402         nvlist_t **child;
1403         uint_t c, children;
1404         char *vname;
1405         uint64_t is_log = 0;
1406
1407         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1408             &is_log);
1409
1410         if (name != NULL)
1411                 (void) printf("\t%*s%s%s\n", indent, "", name,
1412                     is_log ? " [log]" : "");
1413
1414         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1415             &child, &children) != 0)
1416                 return;
1417
1418         for (c = 0; c < children; c++) {
1419                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1420                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1421                 free(vname);
1422         }
1423 }
1424
1425 /*
1426  * Import the given pool using the known configuration and a list of
1427  * properties to be set. The configuration should have come from
1428  * zpool_find_import(). The 'newname' parameters control whether the pool
1429  * is imported with a different name.
1430  */
1431 int
1432 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1433     nvlist_t *props, int flags)
1434 {
1435         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1436         zpool_rewind_policy_t policy;
1437         nvlist_t *nv = NULL;
1438         nvlist_t *nvinfo = NULL;
1439         nvlist_t *missing = NULL;
1440         char *thename;
1441         char *origname;
1442         int ret;
1443         int error = 0;
1444         char errbuf[1024];
1445
1446         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1447             &origname) == 0);
1448
1449         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1450             "cannot import pool '%s'"), origname);
1451
1452         if (newname != NULL) {
1453                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1454                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1455                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1456                             newname));
1457                 thename = (char *)newname;
1458         } else {
1459                 thename = origname;
1460         }
1461
1462         if (props) {
1463                 uint64_t version;
1464                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1465
1466                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1467                     &version) == 0);
1468
1469                 if ((props = zpool_valid_proplist(hdl, origname,
1470                     props, version, flags, errbuf)) == NULL) {
1471                         return (-1);
1472                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1473                         nvlist_free(props);
1474                         return (-1);
1475                 }
1476         }
1477
1478         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1479
1480         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1481             &zc.zc_guid) == 0);
1482
1483         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1484                 nvlist_free(props);
1485                 return (-1);
1486         }
1487         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1488                 nvlist_free(props);
1489                 return (-1);
1490         }
1491
1492         zc.zc_cookie = flags;
1493         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1494             errno == ENOMEM) {
1495                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1496                         zcmd_free_nvlists(&zc);
1497                         return (-1);
1498                 }
1499         }
1500         if (ret != 0)
1501                 error = errno;
1502
1503         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1504         zpool_get_rewind_policy(config, &policy);
1505
1506         if (error) {
1507                 char desc[1024];
1508
1509                 /*
1510                  * Dry-run failed, but we print out what success
1511                  * looks like if we found a best txg
1512                  */
1513                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1514                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1515                             B_TRUE, nv);
1516                         nvlist_free(nv);
1517                         return (-1);
1518                 }
1519
1520                 if (newname == NULL)
1521                         (void) snprintf(desc, sizeof (desc),
1522                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1523                             thename);
1524                 else
1525                         (void) snprintf(desc, sizeof (desc),
1526                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1527                             origname, thename);
1528
1529                 switch (error) {
1530                 case ENOTSUP:
1531                         /*
1532                          * Unsupported version.
1533                          */
1534                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1535                         break;
1536
1537                 case EINVAL:
1538                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1539                         break;
1540
1541                 case EROFS:
1542                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1543                             "one or more devices is read only"));
1544                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1545                         break;
1546
1547                 case ENXIO:
1548                         if (nv && nvlist_lookup_nvlist(nv,
1549                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1550                             nvlist_lookup_nvlist(nvinfo,
1551                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1552                                 (void) printf(dgettext(TEXT_DOMAIN,
1553                                     "The devices below are missing, use "
1554                                     "'-m' to import the pool anyway:\n"));
1555                                 print_vdev_tree(hdl, NULL, missing, 2);
1556                                 (void) printf("\n");
1557                         }
1558                         (void) zpool_standard_error(hdl, error, desc);
1559                         break;
1560
1561                 case EEXIST:
1562                         (void) zpool_standard_error(hdl, error, desc);
1563                         break;
1564
1565                 default:
1566                         (void) zpool_standard_error(hdl, error, desc);
1567                         zpool_explain_recover(hdl,
1568                             newname ? origname : thename, -error, nv);
1569                         break;
1570                 }
1571
1572                 nvlist_free(nv);
1573                 ret = -1;
1574         } else {
1575                 zpool_handle_t *zhp;
1576
1577                 /*
1578                  * This should never fail, but play it safe anyway.
1579                  */
1580                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1581                         ret = -1;
1582                 else if (zhp != NULL)
1583                         zpool_close(zhp);
1584                 if (policy.zrp_request &
1585                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1586                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1587                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1588                 }
1589                 nvlist_free(nv);
1590                 return (0);
1591         }
1592
1593         zcmd_free_nvlists(&zc);
1594         nvlist_free(props);
1595
1596         return (ret);
1597 }
1598
1599 /*
1600  * Scan the pool.
1601  */
1602 int
1603 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1604 {
1605         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1606         char msg[1024];
1607         libzfs_handle_t *hdl = zhp->zpool_hdl;
1608
1609         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1610         zc.zc_cookie = func;
1611
1612         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1613             (errno == ENOENT && func != POOL_SCAN_NONE))
1614                 return (0);
1615
1616         if (func == POOL_SCAN_SCRUB) {
1617                 (void) snprintf(msg, sizeof (msg),
1618                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1619         } else if (func == POOL_SCAN_NONE) {
1620                 (void) snprintf(msg, sizeof (msg),
1621                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1622                     zc.zc_name);
1623         } else {
1624                 assert(!"unexpected result");
1625         }
1626
1627         if (errno == EBUSY) {
1628                 nvlist_t *nvroot;
1629                 pool_scan_stat_t *ps = NULL;
1630                 uint_t psc;
1631
1632                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1633                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1634                 (void) nvlist_lookup_uint64_array(nvroot,
1635                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1636                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1637                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1638                 else
1639                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1640         } else if (errno == ENOENT) {
1641                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1642         } else {
1643                 return (zpool_standard_error(hdl, errno, msg));
1644         }
1645 }
1646
1647 /*
1648  * Find a vdev that matches the search criteria specified. We use the
1649  * the nvpair name to determine how we should look for the device.
1650  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1651  * spare; but FALSE if its an INUSE spare.
1652  */
1653 static nvlist_t *
1654 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1655     boolean_t *l2cache, boolean_t *log)
1656 {
1657         uint_t c, children;
1658         nvlist_t **child;
1659         nvlist_t *ret;
1660         uint64_t is_log;
1661         char *srchkey;
1662         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1663
1664         /* Nothing to look for */
1665         if (search == NULL || pair == NULL)
1666                 return (NULL);
1667
1668         /* Obtain the key we will use to search */
1669         srchkey = nvpair_name(pair);
1670
1671         switch (nvpair_type(pair)) {
1672         case DATA_TYPE_UINT64:
1673                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1674                         uint64_t srchval, theguid;
1675
1676                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1677                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1678                             &theguid) == 0);
1679                         if (theguid == srchval)
1680                                 return (nv);
1681                 }
1682                 break;
1683
1684         case DATA_TYPE_STRING: {
1685                 char *srchval, *val;
1686
1687                 verify(nvpair_value_string(pair, &srchval) == 0);
1688                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1689                         break;
1690
1691                 /*
1692                  * Search for the requested value. Special cases:
1693                  *
1694                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in with a
1695                  *   partition suffix "1", "-part1", or "p1".  The suffix is  hidden
1696                  *   from the user, but included in the string, so this matches around
1697                  *   it.
1698                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1699                  *
1700                  * Otherwise, all other searches are simple string compares.
1701                  */
1702                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
1703                         uint64_t wholedisk = 0;
1704
1705                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1706                             &wholedisk);
1707                         if (wholedisk) {
1708                                 char buf[MAXPATHLEN];
1709
1710                                 zfs_append_partition(srchval, buf, sizeof (buf));
1711                                 if (strcmp(val, buf) == 0)
1712                                         return (nv);
1713
1714                                 break;
1715                         }
1716                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1717                         char *type, *idx, *end, *p;
1718                         uint64_t id, vdev_id;
1719
1720                         /*
1721                          * Determine our vdev type, keeping in mind
1722                          * that the srchval is composed of a type and
1723                          * vdev id pair (i.e. mirror-4).
1724                          */
1725                         if ((type = strdup(srchval)) == NULL)
1726                                 return (NULL);
1727
1728                         if ((p = strrchr(type, '-')) == NULL) {
1729                                 free(type);
1730                                 break;
1731                         }
1732                         idx = p + 1;
1733                         *p = '\0';
1734
1735                         /*
1736                          * If the types don't match then keep looking.
1737                          */
1738                         if (strncmp(val, type, strlen(val)) != 0) {
1739                                 free(type);
1740                                 break;
1741                         }
1742
1743                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
1744                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1745                             strncmp(type, VDEV_TYPE_MIRROR,
1746                             strlen(VDEV_TYPE_MIRROR)) == 0);
1747                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
1748                             &id) == 0);
1749
1750                         errno = 0;
1751                         vdev_id = strtoull(idx, &end, 10);
1752
1753                         free(type);
1754                         if (errno != 0)
1755                                 return (NULL);
1756
1757                         /*
1758                          * Now verify that we have the correct vdev id.
1759                          */
1760                         if (vdev_id == id)
1761                                 return (nv);
1762                 }
1763
1764                 /*
1765                  * Common case
1766                  */
1767                 if (strcmp(srchval, val) == 0)
1768                         return (nv);
1769                 break;
1770         }
1771
1772         default:
1773                 break;
1774         }
1775
1776         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1777             &child, &children) != 0)
1778                 return (NULL);
1779
1780         for (c = 0; c < children; c++) {
1781                 if ((ret = vdev_to_nvlist_iter(child[c], search,
1782                     avail_spare, l2cache, NULL)) != NULL) {
1783                         /*
1784                          * The 'is_log' value is only set for the toplevel
1785                          * vdev, not the leaf vdevs.  So we always lookup the
1786                          * log device from the root of the vdev tree (where
1787                          * 'log' is non-NULL).
1788                          */
1789                         if (log != NULL &&
1790                             nvlist_lookup_uint64(child[c],
1791                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1792                             is_log) {
1793                                 *log = B_TRUE;
1794                         }
1795                         return (ret);
1796                 }
1797         }
1798
1799         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1800             &child, &children) == 0) {
1801                 for (c = 0; c < children; c++) {
1802                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1803                             avail_spare, l2cache, NULL)) != NULL) {
1804                                 *avail_spare = B_TRUE;
1805                                 return (ret);
1806                         }
1807                 }
1808         }
1809
1810         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1811             &child, &children) == 0) {
1812                 for (c = 0; c < children; c++) {
1813                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1814                             avail_spare, l2cache, NULL)) != NULL) {
1815                                 *l2cache = B_TRUE;
1816                                 return (ret);
1817                         }
1818                 }
1819         }
1820
1821         return (NULL);
1822 }
1823
1824 /*
1825  * Given a physical path (minus the "/devices" prefix), find the
1826  * associated vdev.
1827  */
1828 nvlist_t *
1829 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
1830     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1831 {
1832         nvlist_t *search, *nvroot, *ret;
1833
1834         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1835         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
1836
1837         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1838             &nvroot) == 0);
1839
1840         *avail_spare = B_FALSE;
1841         *l2cache = B_FALSE;
1842         if (log != NULL)
1843                 *log = B_FALSE;
1844         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1845         nvlist_free(search);
1846
1847         return (ret);
1848 }
1849
1850 /*
1851  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
1852  */
1853 boolean_t
1854 zpool_vdev_is_interior(const char *name)
1855 {
1856         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1857             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
1858                 return (B_TRUE);
1859         return (B_FALSE);
1860 }
1861
1862 nvlist_t *
1863 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1864     boolean_t *l2cache, boolean_t *log)
1865 {
1866         char buf[MAXPATHLEN];
1867         char *end;
1868         nvlist_t *nvroot, *search, *ret;
1869         uint64_t guid;
1870
1871         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1872
1873         guid = strtoull(path, &end, 10);
1874         if (guid != 0 && *end == '\0') {
1875                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
1876         } else if (zpool_vdev_is_interior(path)) {
1877                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
1878         } else if (path[0] != '/') {
1879                 if (zfs_resolve_shortname(path, buf, sizeof (buf)) < 0) {
1880                         nvlist_free(search);
1881                         return (NULL);
1882                 }
1883                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
1884         } else {
1885                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
1886         }
1887
1888         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1889             &nvroot) == 0);
1890
1891         *avail_spare = B_FALSE;
1892         *l2cache = B_FALSE;
1893         if (log != NULL)
1894                 *log = B_FALSE;
1895         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1896         nvlist_free(search);
1897
1898         return (ret);
1899 }
1900
1901 static int
1902 vdev_online(nvlist_t *nv)
1903 {
1904         uint64_t ival;
1905
1906         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1907             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1908             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1909                 return (0);
1910
1911         return (1);
1912 }
1913
1914 /*
1915  * Helper function for zpool_get_physpaths().
1916  */
1917 static int
1918 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
1919     size_t *bytes_written)
1920 {
1921         size_t bytes_left, pos, rsz;
1922         char *tmppath;
1923         const char *format;
1924
1925         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
1926             &tmppath) != 0)
1927                 return (EZFS_NODEVICE);
1928
1929         pos = *bytes_written;
1930         bytes_left = physpath_size - pos;
1931         format = (pos == 0) ? "%s" : " %s";
1932
1933         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
1934         *bytes_written += rsz;
1935
1936         if (rsz >= bytes_left) {
1937                 /* if physpath was not copied properly, clear it */
1938                 if (bytes_left != 0) {
1939                         physpath[pos] = 0;
1940                 }
1941                 return (EZFS_NOSPC);
1942         }
1943         return (0);
1944 }
1945
1946 static int
1947 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
1948     size_t *rsz, boolean_t is_spare)
1949 {
1950         char *type;
1951         int ret;
1952
1953         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
1954                 return (EZFS_INVALCONFIG);
1955
1956         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1957                 /*
1958                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
1959                  * For a spare vdev, we only want to boot from the active
1960                  * spare device.
1961                  */
1962                 if (is_spare) {
1963                         uint64_t spare = 0;
1964                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
1965                             &spare);
1966                         if (!spare)
1967                                 return (EZFS_INVALCONFIG);
1968                 }
1969
1970                 if (vdev_online(nv)) {
1971                         if ((ret = vdev_get_one_physpath(nv, physpath,
1972                             phypath_size, rsz)) != 0)
1973                                 return (ret);
1974                 }
1975         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
1976             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
1977             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
1978                 nvlist_t **child;
1979                 uint_t count;
1980                 int i, ret;
1981
1982                 if (nvlist_lookup_nvlist_array(nv,
1983                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
1984                         return (EZFS_INVALCONFIG);
1985
1986                 for (i = 0; i < count; i++) {
1987                         ret = vdev_get_physpaths(child[i], physpath,
1988                             phypath_size, rsz, is_spare);
1989                         if (ret == EZFS_NOSPC)
1990                                 return (ret);
1991                 }
1992         }
1993
1994         return (EZFS_POOL_INVALARG);
1995 }
1996
1997 /*
1998  * Get phys_path for a root pool config.
1999  * Return 0 on success; non-zero on failure.
2000  */
2001 static int
2002 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2003 {
2004         size_t rsz;
2005         nvlist_t *vdev_root;
2006         nvlist_t **child;
2007         uint_t count;
2008         char *type;
2009
2010         rsz = 0;
2011
2012         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2013             &vdev_root) != 0)
2014                 return (EZFS_INVALCONFIG);
2015
2016         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2017             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2018             &child, &count) != 0)
2019                 return (EZFS_INVALCONFIG);
2020
2021         /*
2022          * root pool can not have EFI labeled disks and can only have
2023          * a single top-level vdev.
2024          */
2025         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2026             pool_uses_efi(vdev_root))
2027                 return (EZFS_POOL_INVALARG);
2028
2029         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2030             B_FALSE);
2031
2032         /* No online devices */
2033         if (rsz == 0)
2034                 return (EZFS_NODEVICE);
2035
2036         return (0);
2037 }
2038
2039 /*
2040  * Get phys_path for a root pool
2041  * Return 0 on success; non-zero on failure.
2042  */
2043 int
2044 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2045 {
2046         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2047             phypath_size));
2048 }
2049
2050 /*
2051  * If the device has being dynamically expanded then we need to relabel
2052  * the disk to use the new unallocated space.
2053  */
2054 static int
2055 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
2056 {
2057         char errbuf[1024];
2058         int fd, error;
2059
2060         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2061                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2062                     "relabel '%s': unable to open device"), path);
2063                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2064         }
2065
2066         /*
2067          * It's possible that we might encounter an error if the device
2068          * does not have any unallocated space left. If so, we simply
2069          * ignore that error and continue on.
2070          */
2071         error = efi_use_whole_disk(fd);
2072         (void) close(fd);
2073         if (error && error != VT_ENOSPC) {
2074                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2075                     "relabel '%s': unable to read disk capacity"), path);
2076                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2077         }
2078         return (0);
2079 }
2080
2081 /*
2082  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2083  * ZFS_ONLINE_* flags.
2084  */
2085 int
2086 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2087     vdev_state_t *newstate)
2088 {
2089         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2090         char msg[1024];
2091         nvlist_t *tgt;
2092         boolean_t avail_spare, l2cache, islog;
2093         libzfs_handle_t *hdl = zhp->zpool_hdl;
2094
2095         if (flags & ZFS_ONLINE_EXPAND) {
2096                 (void) snprintf(msg, sizeof (msg),
2097                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2098         } else {
2099                 (void) snprintf(msg, sizeof (msg),
2100                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2101         }
2102
2103         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2104         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2105             &islog)) == NULL)
2106                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2107
2108         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2109
2110         if (avail_spare)
2111                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2112
2113         if (flags & ZFS_ONLINE_EXPAND ||
2114             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2115                 char *pathname = NULL;
2116                 uint64_t wholedisk = 0;
2117
2118                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2119                     &wholedisk);
2120                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2121                     &pathname) == 0);
2122
2123                 /*
2124                  * XXX - L2ARC 1.0 devices can't support expansion.
2125                  */
2126                 if (l2cache) {
2127                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2128                             "cannot expand cache devices"));
2129                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2130                 }
2131
2132                 if (wholedisk) {
2133                         pathname += strlen(DISK_ROOT) + 1;
2134                         (void) zpool_relabel_disk(hdl, pathname);
2135                 }
2136         }
2137
2138         zc.zc_cookie = VDEV_STATE_ONLINE;
2139         zc.zc_obj = flags;
2140
2141         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2142                 if (errno == EINVAL) {
2143                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2144                             "from this pool into a new one.  Use '%s' "
2145                             "instead"), "zpool detach");
2146                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2147                 }
2148                 return (zpool_standard_error(hdl, errno, msg));
2149         }
2150
2151         *newstate = zc.zc_cookie;
2152         return (0);
2153 }
2154
2155 /*
2156  * Take the specified vdev offline
2157  */
2158 int
2159 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2160 {
2161         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2162         char msg[1024];
2163         nvlist_t *tgt;
2164         boolean_t avail_spare, l2cache;
2165         libzfs_handle_t *hdl = zhp->zpool_hdl;
2166
2167         (void) snprintf(msg, sizeof (msg),
2168             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2169
2170         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2171         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2172             NULL)) == NULL)
2173                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2174
2175         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2176
2177         if (avail_spare)
2178                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2179
2180         zc.zc_cookie = VDEV_STATE_OFFLINE;
2181         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2182
2183         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2184                 return (0);
2185
2186         switch (errno) {
2187         case EBUSY:
2188
2189                 /*
2190                  * There are no other replicas of this device.
2191                  */
2192                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2193
2194         case EEXIST:
2195                 /*
2196                  * The log device has unplayed logs
2197                  */
2198                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2199
2200         default:
2201                 return (zpool_standard_error(hdl, errno, msg));
2202         }
2203 }
2204
2205 /*
2206  * Mark the given vdev faulted.
2207  */
2208 int
2209 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2210 {
2211         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2212         char msg[1024];
2213         libzfs_handle_t *hdl = zhp->zpool_hdl;
2214
2215         (void) snprintf(msg, sizeof (msg),
2216            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2217
2218         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2219         zc.zc_guid = guid;
2220         zc.zc_cookie = VDEV_STATE_FAULTED;
2221         zc.zc_obj = aux;
2222
2223         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2224                 return (0);
2225
2226         switch (errno) {
2227         case EBUSY:
2228
2229                 /*
2230                  * There are no other replicas of this device.
2231                  */
2232                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2233
2234         default:
2235                 return (zpool_standard_error(hdl, errno, msg));
2236         }
2237
2238 }
2239
2240 /*
2241  * Mark the given vdev degraded.
2242  */
2243 int
2244 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2245 {
2246         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2247         char msg[1024];
2248         libzfs_handle_t *hdl = zhp->zpool_hdl;
2249
2250         (void) snprintf(msg, sizeof (msg),
2251            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2252
2253         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2254         zc.zc_guid = guid;
2255         zc.zc_cookie = VDEV_STATE_DEGRADED;
2256         zc.zc_obj = aux;
2257
2258         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2259                 return (0);
2260
2261         return (zpool_standard_error(hdl, errno, msg));
2262 }
2263
2264 /*
2265  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2266  * a hot spare.
2267  */
2268 static boolean_t
2269 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2270 {
2271         nvlist_t **child;
2272         uint_t c, children;
2273         char *type;
2274
2275         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2276             &children) == 0) {
2277                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2278                     &type) == 0);
2279
2280                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2281                     children == 2 && child[which] == tgt)
2282                         return (B_TRUE);
2283
2284                 for (c = 0; c < children; c++)
2285                         if (is_replacing_spare(child[c], tgt, which))
2286                                 return (B_TRUE);
2287         }
2288
2289         return (B_FALSE);
2290 }
2291
2292 /*
2293  * Attach new_disk (fully described by nvroot) to old_disk.
2294  * If 'replacing' is specified, the new disk will replace the old one.
2295  */
2296 int
2297 zpool_vdev_attach(zpool_handle_t *zhp,
2298     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2299 {
2300         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2301         char msg[1024];
2302         int ret;
2303         nvlist_t *tgt;
2304         boolean_t avail_spare, l2cache, islog;
2305         uint64_t val;
2306         char *newname;
2307         nvlist_t **child;
2308         uint_t children;
2309         nvlist_t *config_root;
2310         libzfs_handle_t *hdl = zhp->zpool_hdl;
2311         boolean_t rootpool = pool_is_bootable(zhp);
2312
2313         if (replacing)
2314                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2315                     "cannot replace %s with %s"), old_disk, new_disk);
2316         else
2317                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2318                     "cannot attach %s to %s"), new_disk, old_disk);
2319
2320         /*
2321          * If this is a root pool, make sure that we're not attaching an
2322          * EFI labeled device.
2323          */
2324         if (rootpool && pool_uses_efi(nvroot)) {
2325                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2326                     "EFI labeled devices are not supported on root pools."));
2327                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2328         }
2329
2330         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2331         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2332             &islog)) == 0)
2333                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2334
2335         if (avail_spare)
2336                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2337
2338         if (l2cache)
2339                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2340
2341         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2342         zc.zc_cookie = replacing;
2343
2344         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2345             &child, &children) != 0 || children != 1) {
2346                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2347                     "new device must be a single disk"));
2348                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2349         }
2350
2351         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2352             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2353
2354         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2355                 return (-1);
2356
2357         /*
2358          * If the target is a hot spare that has been swapped in, we can only
2359          * replace it with another hot spare.
2360          */
2361         if (replacing &&
2362             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2363             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2364             NULL) == NULL || !avail_spare) &&
2365             is_replacing_spare(config_root, tgt, 1)) {
2366                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2367                     "can only be replaced by another hot spare"));
2368                 free(newname);
2369                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2370         }
2371
2372         free(newname);
2373
2374         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2375                 return (-1);
2376
2377         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2378
2379         zcmd_free_nvlists(&zc);
2380
2381         if (ret == 0) {
2382                 if (rootpool) {
2383                         /*
2384                          * XXX need a better way to prevent user from
2385                          * booting up a half-baked vdev.
2386                          */
2387                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2388                             "sure to wait until resilver is done "
2389                             "before rebooting.\n"));
2390                 }
2391                 return (0);
2392         }
2393
2394         switch (errno) {
2395         case ENOTSUP:
2396                 /*
2397                  * Can't attach to or replace this type of vdev.
2398                  */
2399                 if (replacing) {
2400                         uint64_t version = zpool_get_prop_int(zhp,
2401                             ZPOOL_PROP_VERSION, NULL);
2402
2403                         if (islog)
2404                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2405                                     "cannot replace a log with a spare"));
2406                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2407                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2408                                     "already in replacing/spare config; wait "
2409                                     "for completion or use 'zpool detach'"));
2410                         else
2411                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2412                                     "cannot replace a replacing device"));
2413                 } else {
2414                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2415                             "can only attach to mirrors and top-level "
2416                             "disks"));
2417                 }
2418                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2419                 break;
2420
2421         case EINVAL:
2422                 /*
2423                  * The new device must be a single disk.
2424                  */
2425                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2426                     "new device must be a single disk"));
2427                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2428                 break;
2429
2430         case EBUSY:
2431                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2432                     new_disk);
2433                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2434                 break;
2435
2436         case EOVERFLOW:
2437                 /*
2438                  * The new device is too small.
2439                  */
2440                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2441                     "device is too small"));
2442                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2443                 break;
2444
2445         case EDOM:
2446                 /*
2447                  * The new device has a different alignment requirement.
2448                  */
2449                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2450                     "devices have different sector alignment"));
2451                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2452                 break;
2453
2454         case ENAMETOOLONG:
2455                 /*
2456                  * The resulting top-level vdev spec won't fit in the label.
2457                  */
2458                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2459                 break;
2460
2461         default:
2462                 (void) zpool_standard_error(hdl, errno, msg);
2463         }
2464
2465         return (-1);
2466 }
2467
2468 /*
2469  * Detach the specified device.
2470  */
2471 int
2472 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2473 {
2474         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2475         char msg[1024];
2476         nvlist_t *tgt;
2477         boolean_t avail_spare, l2cache;
2478         libzfs_handle_t *hdl = zhp->zpool_hdl;
2479
2480         (void) snprintf(msg, sizeof (msg),
2481             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2482
2483         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2484         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2485             NULL)) == 0)
2486                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2487
2488         if (avail_spare)
2489                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2490
2491         if (l2cache)
2492                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2493
2494         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2495
2496         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2497                 return (0);
2498
2499         switch (errno) {
2500
2501         case ENOTSUP:
2502                 /*
2503                  * Can't detach from this type of vdev.
2504                  */
2505                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2506                     "applicable to mirror and replacing vdevs"));
2507                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2508                 break;
2509
2510         case EBUSY:
2511                 /*
2512                  * There are no other replicas of this device.
2513                  */
2514                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2515                 break;
2516
2517         default:
2518                 (void) zpool_standard_error(hdl, errno, msg);
2519         }
2520
2521         return (-1);
2522 }
2523
2524 /*
2525  * Find a mirror vdev in the source nvlist.
2526  *
2527  * The mchild array contains a list of disks in one of the top-level mirrors
2528  * of the source pool.  The schild array contains a list of disks that the
2529  * user specified on the command line.  We loop over the mchild array to
2530  * see if any entry in the schild array matches.
2531  *
2532  * If a disk in the mchild array is found in the schild array, we return
2533  * the index of that entry.  Otherwise we return -1.
2534  */
2535 static int
2536 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2537     nvlist_t **schild, uint_t schildren)
2538 {
2539         uint_t mc;
2540
2541         for (mc = 0; mc < mchildren; mc++) {
2542                 uint_t sc;
2543                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2544                     mchild[mc], B_FALSE);
2545
2546                 for (sc = 0; sc < schildren; sc++) {
2547                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2548                             schild[sc], B_FALSE);
2549                         boolean_t result = (strcmp(mpath, spath) == 0);
2550
2551                         free(spath);
2552                         if (result) {
2553                                 free(mpath);
2554                                 return (mc);
2555                         }
2556                 }
2557
2558                 free(mpath);
2559         }
2560
2561         return (-1);
2562 }
2563
2564 /*
2565  * Split a mirror pool.  If newroot points to null, then a new nvlist
2566  * is generated and it is the responsibility of the caller to free it.
2567  */
2568 int
2569 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2570     nvlist_t *props, splitflags_t flags)
2571 {
2572         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2573         char msg[1024];
2574         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2575         nvlist_t **varray = NULL, *zc_props = NULL;
2576         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2577         libzfs_handle_t *hdl = zhp->zpool_hdl;
2578         uint64_t vers;
2579         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2580         int retval = 0;
2581
2582         (void) snprintf(msg, sizeof (msg),
2583             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2584
2585         if (!zpool_name_valid(hdl, B_FALSE, newname))
2586                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2587
2588         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2589                 (void) fprintf(stderr, gettext("Internal error: unable to "
2590                     "retrieve pool configuration\n"));
2591                 return (-1);
2592         }
2593
2594         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2595             == 0);
2596         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2597
2598         if (props) {
2599                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2600                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2601                     props, vers, flags, msg)) == NULL)
2602                         return (-1);
2603         }
2604
2605         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2606             &children) != 0) {
2607                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2608                     "Source pool is missing vdev tree"));
2609                 if (zc_props)
2610                         nvlist_free(zc_props);
2611                 return (-1);
2612         }
2613
2614         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2615         vcount = 0;
2616
2617         if (*newroot == NULL ||
2618             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2619             &newchild, &newchildren) != 0)
2620                 newchildren = 0;
2621
2622         for (c = 0; c < children; c++) {
2623                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2624                 char *type;
2625                 nvlist_t **mchild, *vdev;
2626                 uint_t mchildren;
2627                 int entry;
2628
2629                 /*
2630                  * Unlike cache & spares, slogs are stored in the
2631                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2632                  */
2633                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2634                     &is_log);
2635                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2636                     &is_hole);
2637                 if (is_log || is_hole) {
2638                         /*
2639                          * Create a hole vdev and put it in the config.
2640                          */
2641                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2642                                 goto out;
2643                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2644                             VDEV_TYPE_HOLE) != 0)
2645                                 goto out;
2646                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2647                             1) != 0)
2648                                 goto out;
2649                         if (lastlog == 0)
2650                                 lastlog = vcount;
2651                         varray[vcount++] = vdev;
2652                         continue;
2653                 }
2654                 lastlog = 0;
2655                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2656                     == 0);
2657                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2658                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2659                             "Source pool must be composed only of mirrors\n"));
2660                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2661                         goto out;
2662                 }
2663
2664                 verify(nvlist_lookup_nvlist_array(child[c],
2665                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2666
2667                 /* find or add an entry for this top-level vdev */
2668                 if (newchildren > 0 &&
2669                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2670                     newchild, newchildren)) >= 0) {
2671                         /* We found a disk that the user specified. */
2672                         vdev = mchild[entry];
2673                         ++found;
2674                 } else {
2675                         /* User didn't specify a disk for this vdev. */
2676                         vdev = mchild[mchildren - 1];
2677                 }
2678
2679                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2680                         goto out;
2681         }
2682
2683         /* did we find every disk the user specified? */
2684         if (found != newchildren) {
2685                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2686                     "include at most one disk from each mirror"));
2687                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2688                 goto out;
2689         }
2690
2691         /* Prepare the nvlist for populating. */
2692         if (*newroot == NULL) {
2693                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2694                         goto out;
2695                 freelist = B_TRUE;
2696                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2697                     VDEV_TYPE_ROOT) != 0)
2698                         goto out;
2699         } else {
2700                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2701         }
2702
2703         /* Add all the children we found */
2704         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2705             lastlog == 0 ? vcount : lastlog) != 0)
2706                 goto out;
2707
2708         /*
2709          * If we're just doing a dry run, exit now with success.
2710          */
2711         if (flags.dryrun) {
2712                 memory_err = B_FALSE;
2713                 freelist = B_FALSE;
2714                 goto out;
2715         }
2716
2717         /* now build up the config list & call the ioctl */
2718         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
2719                 goto out;
2720
2721         if (nvlist_add_nvlist(newconfig,
2722             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
2723             nvlist_add_string(newconfig,
2724             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
2725             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
2726                 goto out;
2727
2728         /*
2729          * The new pool is automatically part of the namespace unless we
2730          * explicitly export it.
2731          */
2732         if (!flags.import)
2733                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
2734         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2735         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
2736         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
2737                 goto out;
2738         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
2739                 goto out;
2740
2741         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
2742                 retval = zpool_standard_error(hdl, errno, msg);
2743                 goto out;
2744         }
2745
2746         freelist = B_FALSE;
2747         memory_err = B_FALSE;
2748
2749 out:
2750         if (varray != NULL) {
2751                 int v;
2752
2753                 for (v = 0; v < vcount; v++)
2754                         nvlist_free(varray[v]);
2755                 free(varray);
2756         }
2757         zcmd_free_nvlists(&zc);
2758         if (zc_props)
2759                 nvlist_free(zc_props);
2760         if (newconfig)
2761                 nvlist_free(newconfig);
2762         if (freelist) {
2763                 nvlist_free(*newroot);
2764                 *newroot = NULL;
2765         }
2766
2767         if (retval != 0)
2768                 return (retval);
2769
2770         if (memory_err)
2771                 return (no_memory(hdl));
2772
2773         return (0);
2774 }
2775
2776 /*
2777  * Remove the given device.  Currently, this is supported only for hot spares
2778  * and level 2 cache devices.
2779  */
2780 int
2781 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2782 {
2783         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2784         char msg[1024];
2785         nvlist_t *tgt;
2786         boolean_t avail_spare, l2cache, islog;
2787         libzfs_handle_t *hdl = zhp->zpool_hdl;
2788         uint64_t version;
2789
2790         (void) snprintf(msg, sizeof (msg),
2791             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2792
2793         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2794         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2795             &islog)) == 0)
2796                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2797         /*
2798          * XXX - this should just go away.
2799          */
2800         if (!avail_spare && !l2cache && !islog) {
2801                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2802                     "only inactive hot spares, cache, top-level, "
2803                     "or log devices can be removed"));
2804                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2805         }
2806
2807         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
2808         if (islog && version < SPA_VERSION_HOLES) {
2809                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2810                     "pool must be upgrade to support log removal"));
2811                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
2812         }
2813
2814         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2815
2816         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2817                 return (0);
2818
2819         return (zpool_standard_error(hdl, errno, msg));
2820 }
2821
2822 /*
2823  * Clear the errors for the pool, or the particular device if specified.
2824  */
2825 int
2826 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
2827 {
2828         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2829         char msg[1024];
2830         nvlist_t *tgt;
2831         zpool_rewind_policy_t policy;
2832         boolean_t avail_spare, l2cache;
2833         libzfs_handle_t *hdl = zhp->zpool_hdl;
2834         nvlist_t *nvi = NULL;
2835         int error;
2836
2837         if (path)
2838                 (void) snprintf(msg, sizeof (msg),
2839                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2840                     path);
2841         else
2842                 (void) snprintf(msg, sizeof (msg),
2843                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2844                     zhp->zpool_name);
2845
2846         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2847         if (path) {
2848                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2849                     &l2cache, NULL)) == 0)
2850                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
2851
2852                 /*
2853                  * Don't allow error clearing for hot spares.  Do allow
2854                  * error clearing for l2cache devices.
2855                  */
2856                 if (avail_spare)
2857                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
2858
2859                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2860                     &zc.zc_guid) == 0);
2861         }
2862
2863         zpool_get_rewind_policy(rewindnvl, &policy);
2864         zc.zc_cookie = policy.zrp_request;
2865
2866         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
2867                 return (-1);
2868
2869         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
2870                 return (-1);
2871
2872         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
2873             errno == ENOMEM) {
2874                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
2875                         zcmd_free_nvlists(&zc);
2876                         return (-1);
2877                 }
2878         }
2879
2880         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
2881             errno != EPERM && errno != EACCES)) {
2882                 if (policy.zrp_request &
2883                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2884                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
2885                         zpool_rewind_exclaim(hdl, zc.zc_name,
2886                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
2887                             nvi);
2888                         nvlist_free(nvi);
2889                 }
2890                 zcmd_free_nvlists(&zc);
2891                 return (0);
2892         }
2893
2894         zcmd_free_nvlists(&zc);
2895         return (zpool_standard_error(hdl, errno, msg));
2896 }
2897
2898 /*
2899  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2900  */
2901 int
2902 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2903 {
2904         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2905         char msg[1024];
2906         libzfs_handle_t *hdl = zhp->zpool_hdl;
2907
2908         (void) snprintf(msg, sizeof (msg),
2909             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2910            (u_longlong_t)guid);
2911
2912         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2913         zc.zc_guid = guid;
2914         zc.zc_cookie = ZPOOL_NO_REWIND;
2915
2916         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2917                 return (0);
2918
2919         return (zpool_standard_error(hdl, errno, msg));
2920 }
2921
2922 /*
2923  * Convert from a devid string to a path.
2924  */
2925 static char *
2926 devid_to_path(char *devid_str)
2927 {
2928         ddi_devid_t devid;
2929         char *minor;
2930         char *path;
2931         devid_nmlist_t *list = NULL;
2932         int ret;
2933
2934         if (devid_str_decode(devid_str, &devid, &minor) != 0)
2935                 return (NULL);
2936
2937         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2938
2939         devid_str_free(minor);
2940         devid_free(devid);
2941
2942         if (ret != 0)
2943                 return (NULL);
2944
2945         if ((path = strdup(list[0].devname)) == NULL)
2946                 return (NULL);
2947
2948         devid_free_nmlist(list);
2949
2950         return (path);
2951 }
2952
2953 /*
2954  * Convert from a path to a devid string.
2955  */
2956 static char *
2957 path_to_devid(const char *path)
2958 {
2959         int fd;
2960         ddi_devid_t devid;
2961         char *minor, *ret;
2962
2963         if ((fd = open(path, O_RDONLY)) < 0)
2964                 return (NULL);
2965
2966         minor = NULL;
2967         ret = NULL;
2968         if (devid_get(fd, &devid) == 0) {
2969                 if (devid_get_minor_name(fd, &minor) == 0)
2970                         ret = devid_str_encode(devid, minor);
2971                 if (minor != NULL)
2972                         devid_str_free(minor);
2973                 devid_free(devid);
2974         }
2975         (void) close(fd);
2976
2977         return (ret);
2978 }
2979
2980 /*
2981  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2982  * ignore any failure here, since a common case is for an unprivileged user to
2983  * type 'zpool status', and we'll display the correct information anyway.
2984  */
2985 static void
2986 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2987 {
2988         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2989
2990         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2991         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2992         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2993             &zc.zc_guid) == 0);
2994
2995         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2996 }
2997
2998 /*
2999  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3000  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3001  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3002  * third case only occurs when preceded by a string matching the regular
3003  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
3004  */
3005 static char *
3006 strip_partition(libzfs_handle_t *hdl, char *path)
3007 {
3008         char *tmp = zfs_strdup(hdl, path);
3009         char *part = NULL, *d = NULL;
3010
3011         if ((part = strstr(tmp, "-part")) && part != tmp) {
3012                 d = part + 5;
3013         } else if ((part = strrchr(tmp, 'p')) &&
3014             part > tmp + 1 && isdigit(*(part-1))) {
3015                 d = part + 1;
3016         } else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
3017                 for (d = &tmp[2]; isalpha(*d); part = ++d);
3018         }
3019         if (part && d && *d != '\0') {
3020                 for (; isdigit(*d); d++);
3021                 if (*d == '\0')
3022                         *part = '\0';
3023         }
3024         return (tmp);
3025 }
3026
3027 #define PATH_BUF_LEN    64
3028
3029 /*
3030  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3031  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3032  * We also check if this is a whole disk, in which case we strip off the
3033  * trailing 's0' slice name.
3034  *
3035  * This routine is also responsible for identifying when disks have been
3036  * reconfigured in a new location.  The kernel will have opened the device by
3037  * devid, but the path will still refer to the old location.  To catch this, we
3038  * first do a path -> devid translation (which is fast for the common case).  If
3039  * the devid matches, we're done.  If not, we do a reverse devid -> path
3040  * translation and issue the appropriate ioctl() to update the path of the vdev.
3041  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3042  * of these checks.
3043  */
3044 char *
3045 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3046     boolean_t verbose)
3047 {
3048         char *path, *devid, *type;
3049         uint64_t value;
3050         char buf[PATH_BUF_LEN];
3051         vdev_stat_t *vs;
3052         uint_t vsc;
3053
3054         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3055             &value) == 0) {
3056                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3057                     &value) == 0);
3058                 (void) snprintf(buf, sizeof (buf), "%llu",
3059                     (u_longlong_t)value);
3060                 path = buf;
3061         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3062                 /*
3063                  * If the device is dead (faulted, offline, etc) then don't
3064                  * bother opening it.  Otherwise we may be forcing the user to
3065                  * open a misbehaving device, which can have undesirable
3066                  * effects.
3067                  */
3068                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3069                     (uint64_t **)&vs, &vsc) != 0 ||
3070                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3071                     zhp != NULL &&
3072                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3073                         /*
3074                          * Determine if the current path is correct.
3075                          */
3076                         char *newdevid = path_to_devid(path);
3077
3078                         if (newdevid == NULL ||
3079                             strcmp(devid, newdevid) != 0) {
3080                                 char *newpath;
3081
3082                                 if ((newpath = devid_to_path(devid)) != NULL) {
3083                                         /*
3084                                          * Update the path appropriately.
3085                                          */
3086                                         set_path(zhp, nv, newpath);
3087                                         if (nvlist_add_string(nv,
3088                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3089                                                 verify(nvlist_lookup_string(nv,
3090                                                     ZPOOL_CONFIG_PATH,
3091                                                     &path) == 0);
3092                                         free(newpath);
3093                                 }
3094                         }
3095
3096                         if (newdevid)
3097                                 devid_str_free(newdevid);
3098                 }
3099
3100                 /*
3101                  * For a block device only use the name.
3102                  */
3103                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3104                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3105                         path = strrchr(path, '/');
3106                         path++;
3107                 }
3108
3109                 /*
3110                  * Remove the partition from the path it this is a whole disk.
3111                  */
3112                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3113                     &value) == 0 && value) {
3114                         return strip_partition(hdl, path);
3115                 }
3116         } else {
3117                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3118
3119                 /*
3120                  * If it's a raidz device, we need to stick in the parity level.
3121                  */
3122                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3123                         char tmpbuf[PATH_BUF_LEN];
3124
3125                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3126                             &value) == 0);
3127                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%llu", path,
3128                             (u_longlong_t)value);
3129                         path = tmpbuf;
3130                 }
3131
3132                 /*
3133                  * We identify each top-level vdev by using a <type-id>
3134                  * naming convention.
3135                  */
3136                 if (verbose) {
3137                         uint64_t id;
3138
3139                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3140                             &id) == 0);
3141                         (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3142                             (u_longlong_t)id);
3143                         path = buf;
3144                 }
3145         }
3146
3147         return (zfs_strdup(hdl, path));
3148 }
3149
3150 static int
3151 zbookmark_compare(const void *a, const void *b)
3152 {
3153         return (memcmp(a, b, sizeof (zbookmark_t)));
3154 }
3155
3156 /*
3157  * Retrieve the persistent error log, uniquify the members, and return to the
3158  * caller.
3159  */
3160 int
3161 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3162 {
3163         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3164         uint64_t count;
3165         zbookmark_t *zb = NULL;
3166         int i;
3167
3168         /*
3169          * Retrieve the raw error list from the kernel.  If the number of errors
3170          * has increased, allocate more space and continue until we get the
3171          * entire list.
3172          */
3173         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3174             &count) == 0);
3175         if (count == 0)
3176                 return (0);
3177         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3178             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3179                 return (-1);
3180         zc.zc_nvlist_dst_size = count;
3181         (void) strcpy(zc.zc_name, zhp->zpool_name);
3182         for (;;) {
3183                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3184                     &zc) != 0) {
3185                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3186                         if (errno == ENOMEM) {
3187                                 count = zc.zc_nvlist_dst_size;
3188                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3189                                     zfs_alloc(zhp->zpool_hdl, count *
3190                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3191                                         return (-1);
3192                         } else {
3193                                 return (-1);
3194                         }
3195                 } else {
3196                         break;
3197                 }
3198         }
3199
3200         /*
3201          * Sort the resulting bookmarks.  This is a little confusing due to the
3202          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3203          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3204          * _not_ copied as part of the process.  So we point the start of our
3205          * array appropriate and decrement the total number of elements.
3206          */
3207         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3208             zc.zc_nvlist_dst_size;
3209         count -= zc.zc_nvlist_dst_size;
3210
3211         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3212
3213         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3214
3215         /*
3216          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3217          */
3218         for (i = 0; i < count; i++) {
3219                 nvlist_t *nv;
3220
3221                 /* ignoring zb_blkid and zb_level for now */
3222                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3223                     zb[i-1].zb_object == zb[i].zb_object)
3224                         continue;
3225
3226                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3227                         goto nomem;
3228                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3229                     zb[i].zb_objset) != 0) {
3230                         nvlist_free(nv);
3231                         goto nomem;
3232                 }
3233                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3234                     zb[i].zb_object) != 0) {
3235                         nvlist_free(nv);
3236                         goto nomem;
3237                 }
3238                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3239                         nvlist_free(nv);
3240                         goto nomem;
3241                 }
3242                 nvlist_free(nv);
3243         }
3244
3245         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3246         return (0);
3247
3248 nomem:
3249         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3250         return (no_memory(zhp->zpool_hdl));
3251 }
3252
3253 /*
3254  * Upgrade a ZFS pool to the latest on-disk version.
3255  */
3256 int
3257 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3258 {
3259         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3260         libzfs_handle_t *hdl = zhp->zpool_hdl;
3261
3262         (void) strcpy(zc.zc_name, zhp->zpool_name);
3263         zc.zc_cookie = new_version;
3264
3265         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3266                 return (zpool_standard_error_fmt(hdl, errno,
3267                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3268                     zhp->zpool_name));
3269         return (0);
3270 }
3271
3272 void
3273 zpool_set_history_str(const char *subcommand, int argc, char **argv,
3274     char *history_str)
3275 {
3276         int i;
3277
3278         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3279         for (i = 1; i < argc; i++) {
3280                 if (strlen(history_str) + 1 + strlen(argv[i]) >
3281                     HIS_MAX_RECORD_LEN)
3282                         break;
3283                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3284                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3285         }
3286 }
3287
3288 /*
3289  * Stage command history for logging.
3290  */
3291 int
3292 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3293 {
3294         if (history_str == NULL)
3295                 return (EINVAL);
3296
3297         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3298                 return (EINVAL);
3299
3300         if (hdl->libzfs_log_str != NULL)
3301                 free(hdl->libzfs_log_str);
3302
3303         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3304                 return (no_memory(hdl));
3305
3306         return (0);
3307 }
3308
3309 /*
3310  * Perform ioctl to get some command history of a pool.
3311  *
3312  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3313  * logical offset of the history buffer to start reading from.
3314  *
3315  * Upon return, 'off' is the next logical offset to read from and
3316  * 'len' is the actual amount of bytes read into 'buf'.
3317  */
3318 static int
3319 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3320 {
3321         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3322         libzfs_handle_t *hdl = zhp->zpool_hdl;
3323
3324         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3325
3326         zc.zc_history = (uint64_t)(uintptr_t)buf;
3327         zc.zc_history_len = *len;
3328         zc.zc_history_offset = *off;
3329
3330         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3331                 switch (errno) {
3332                 case EPERM:
3333                         return (zfs_error_fmt(hdl, EZFS_PERM,
3334                             dgettext(TEXT_DOMAIN,
3335                             "cannot show history for pool '%s'"),
3336                             zhp->zpool_name));
3337                 case ENOENT:
3338                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3339                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3340                             "'%s'"), zhp->zpool_name));
3341                 case ENOTSUP:
3342                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3343                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3344                             "'%s', pool must be upgraded"), zhp->zpool_name));
3345                 default:
3346                         return (zpool_standard_error_fmt(hdl, errno,
3347                             dgettext(TEXT_DOMAIN,
3348                             "cannot get history for '%s'"), zhp->zpool_name));
3349                 }
3350         }
3351
3352         *len = zc.zc_history_len;
3353         *off = zc.zc_history_offset;
3354
3355         return (0);
3356 }
3357
3358 /*
3359  * Process the buffer of nvlists, unpacking and storing each nvlist record
3360  * into 'records'.  'leftover' is set to the number of bytes that weren't
3361  * processed as there wasn't a complete record.
3362  */
3363 int
3364 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3365     nvlist_t ***records, uint_t *numrecords)
3366 {
3367         uint64_t reclen;
3368         nvlist_t *nv;
3369         int i;
3370
3371         while (bytes_read > sizeof (reclen)) {
3372
3373                 /* get length of packed record (stored as little endian) */
3374                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3375                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3376
3377                 if (bytes_read < sizeof (reclen) + reclen)
3378                         break;
3379
3380                 /* unpack record */
3381                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3382                         return (ENOMEM);
3383                 bytes_read -= sizeof (reclen) + reclen;
3384                 buf += sizeof (reclen) + reclen;
3385
3386                 /* add record to nvlist array */
3387                 (*numrecords)++;
3388                 if (ISP2(*numrecords + 1)) {
3389                         *records = realloc(*records,
3390                             *numrecords * 2 * sizeof (nvlist_t *));
3391                 }
3392                 (*records)[*numrecords - 1] = nv;
3393         }
3394
3395         *leftover = bytes_read;
3396         return (0);
3397 }
3398
3399 #define HIS_BUF_LEN     (128*1024)
3400
3401 /*
3402  * Retrieve the command history of a pool.
3403  */
3404 int
3405 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3406 {
3407         char buf[HIS_BUF_LEN];
3408         uint64_t off = 0;
3409         nvlist_t **records = NULL;
3410         uint_t numrecords = 0;
3411         int err, i;
3412
3413         do {
3414                 uint64_t bytes_read = sizeof (buf);
3415                 uint64_t leftover;
3416
3417                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3418                         break;
3419
3420                 /* if nothing else was read in, we're at EOF, just return */
3421                 if (!bytes_read)
3422                         break;
3423
3424                 if ((err = zpool_history_unpack(buf, bytes_read,
3425                     &leftover, &records, &numrecords)) != 0)
3426                         break;
3427                 off -= leftover;
3428
3429                 /* CONSTCOND */
3430         } while (1);
3431
3432         if (!err) {
3433                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3434                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3435                     records, numrecords) == 0);
3436         }
3437         for (i = 0; i < numrecords; i++)
3438                 nvlist_free(records[i]);
3439         free(records);
3440
3441         return (err);
3442 }
3443
3444 /*
3445  * Retrieve the next event.  If there is a new event available 'nvp' will
3446  * contain a newly allocated nvlist and 'dropped' will be set to the number
3447  * of missed events since the last call to this function.  When 'nvp' is
3448  * set to NULL it indicates no new events are available.  In either case
3449  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3450  * the case of a fatal error the function will return a non-zero value.
3451  * When the function is called in blocking mode it will not return until
3452  * a new event is available.
3453  */
3454 int
3455 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3456     int *dropped, int block, int cleanup_fd)
3457 {
3458         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3459         int error = 0;
3460
3461         *nvp = NULL;
3462         *dropped = 0;
3463         zc.zc_cleanup_fd = cleanup_fd;
3464
3465         if (!block)
3466                 zc.zc_guid = ZEVENT_NONBLOCK;
3467
3468         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3469                 return (-1);
3470
3471 retry:
3472         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3473                 switch (errno) {
3474                 case ESHUTDOWN:
3475                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3476                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3477                         goto out;
3478                 case ENOENT:
3479                         /* Blocking error case should not occur */
3480                         if (block)
3481                                 error = zpool_standard_error_fmt(hdl, errno,
3482                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3483
3484                         goto out;
3485                 case ENOMEM:
3486                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3487                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3488                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3489                                 goto out;
3490                         } else {
3491                                 goto retry;
3492                         }
3493                 default:
3494                         error = zpool_standard_error_fmt(hdl, errno,
3495                             dgettext(TEXT_DOMAIN, "cannot get event"));
3496                         goto out;
3497                 }
3498         }
3499
3500         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3501         if (error != 0)
3502                 goto out;
3503
3504         *dropped = (int)zc.zc_cookie;
3505 out:
3506         zcmd_free_nvlists(&zc);
3507
3508         return (error);
3509 }
3510
3511 /*
3512  * Clear all events.
3513  */
3514 int
3515 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3516 {
3517         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3518         char msg[1024];
3519
3520         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3521             "cannot clear events"));
3522
3523         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3524                 return (zpool_standard_error_fmt(hdl, errno, msg));
3525
3526         if (count != NULL)
3527                 *count = (int)zc.zc_cookie; /* # of events cleared */
3528
3529         return (0);
3530 }
3531
3532 void
3533 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3534     char *pathname, size_t len)
3535 {
3536         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3537         boolean_t mounted = B_FALSE;
3538         char *mntpnt = NULL;
3539         char dsname[MAXNAMELEN];
3540
3541         if (dsobj == 0) {
3542                 /* special case for the MOS */
3543                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
3544                 return;
3545         }
3546
3547         /* get the dataset's name */
3548         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3549         zc.zc_obj = dsobj;
3550         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3551             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3552                 /* just write out a path of two object numbers */
3553                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3554                     (longlong_t)dsobj, (longlong_t)obj);
3555                 return;
3556         }
3557         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3558
3559         /* find out if the dataset is mounted */
3560         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3561
3562         /* get the corrupted object's path */
3563         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3564         zc.zc_obj = obj;
3565         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3566             &zc) == 0) {
3567                 if (mounted) {
3568                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3569                             zc.zc_value);
3570                 } else {
3571                         (void) snprintf(pathname, len, "%s:%s",
3572                             dsname, zc.zc_value);
3573                 }
3574         } else {
3575                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
3576         }
3577         free(mntpnt);
3578 }
3579
3580 /*
3581  * Read the EFI label from the config, if a label does not exist then
3582  * pass back the error to the caller. If the caller has passed a non-NULL
3583  * diskaddr argument then we set it to the starting address of the EFI
3584  * partition.
3585  */
3586 static int
3587 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3588 {
3589         char *path;
3590         int fd;
3591         char diskname[MAXPATHLEN];
3592         int err = -1;
3593
3594         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3595                 return (err);
3596
3597         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3598             strrchr(path, '/'));
3599         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3600                 struct dk_gpt *vtoc;
3601
3602                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3603                         if (sb != NULL)
3604                                 *sb = vtoc->efi_parts[0].p_start;
3605                         efi_free(vtoc);
3606                 }
3607                 (void) close(fd);
3608         }
3609         return (err);
3610 }
3611
3612 /*
3613  * determine where a partition starts on a disk in the current
3614  * configuration
3615  */
3616 static diskaddr_t
3617 find_start_block(nvlist_t *config)
3618 {
3619         nvlist_t **child;
3620         uint_t c, children;
3621         diskaddr_t sb = MAXOFFSET_T;
3622         uint64_t wholedisk;
3623
3624         if (nvlist_lookup_nvlist_array(config,
3625             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3626                 if (nvlist_lookup_uint64(config,
3627                     ZPOOL_CONFIG_WHOLE_DISK,
3628                     &wholedisk) != 0 || !wholedisk) {
3629                         return (MAXOFFSET_T);
3630                 }
3631                 if (read_efi_label(config, &sb) < 0)
3632                         sb = MAXOFFSET_T;
3633                 return (sb);
3634         }
3635
3636         for (c = 0; c < children; c++) {
3637                 sb = find_start_block(child[c]);
3638                 if (sb != MAXOFFSET_T) {
3639                         return (sb);
3640                 }
3641         }
3642         return (MAXOFFSET_T);
3643 }
3644
3645 int
3646 zpool_label_disk_wait(char *path, int timeout)
3647 {
3648         struct stat64 statbuf;
3649         int i;
3650
3651         /*
3652          * Wait timeout miliseconds for a newly created device to be available
3653          * from the given path.  There is a small window when a /dev/ device
3654          * will exist and the udev link will not, so we must wait for the
3655          * symlink.  Depending on the udev rules this may take a few seconds.
3656          */
3657         for (i = 0; i < timeout; i++) {
3658                 usleep(1000);
3659
3660                 errno = 0;
3661                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
3662                         return (0);
3663         }
3664
3665         return (ENOENT);
3666 }
3667
3668 int
3669 zpool_label_disk_check(char *path)
3670 {
3671         struct dk_gpt *vtoc;
3672         int fd, err;
3673
3674         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
3675                 return errno;
3676
3677         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
3678                 (void) close(fd);
3679                 return err;
3680         }
3681
3682         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
3683                 efi_free(vtoc);
3684                 (void) close(fd);
3685                 return EIDRM;
3686         }
3687
3688         efi_free(vtoc);
3689         (void) close(fd);
3690         return 0;
3691 }
3692
3693 /*
3694  * Label an individual disk.  The name provided is the short name,
3695  * stripped of any leading /dev path.
3696  */
3697 int
3698 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
3699 {
3700         char path[MAXPATHLEN];
3701         struct dk_gpt *vtoc;
3702         int rval, fd;
3703         size_t resv = EFI_MIN_RESV_SIZE;
3704         uint64_t slice_size;
3705         diskaddr_t start_block;
3706         char errbuf[1024];
3707
3708         /* prepare an error message just in case */
3709         (void) snprintf(errbuf, sizeof (errbuf),
3710             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3711
3712         if (zhp) {
3713                 nvlist_t *nvroot;
3714
3715                 if (pool_is_bootable(zhp)) {
3716                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3717                             "EFI labeled devices are not supported on root "
3718                             "pools."));
3719                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
3720                 }
3721
3722                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
3723                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
3724
3725                 if (zhp->zpool_start_block == 0)
3726                         start_block = find_start_block(nvroot);
3727                 else
3728                         start_block = zhp->zpool_start_block;
3729                 zhp->zpool_start_block = start_block;
3730         } else {
3731                 /* new pool */
3732                 start_block = NEW_START_BLOCK;
3733         }
3734
3735         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
3736             BACKUP_SLICE);
3737
3738         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
3739                 /*
3740                  * This shouldn't happen.  We've long since verified that this
3741                  * is a valid device.
3742                  */
3743                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3744                     "unable to open device '%s': %d"), path, errno);
3745                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
3746         }
3747
3748         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
3749                 /*
3750                  * The only way this can fail is if we run out of memory, or we
3751                  * were unable to read the disk's capacity
3752                  */
3753                 if (errno == ENOMEM)
3754                         (void) no_memory(hdl);
3755
3756                 (void) close(fd);
3757                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3758                     "unable to read disk capacity"), name);
3759
3760                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
3761         }
3762
3763         slice_size = vtoc->efi_last_u_lba + 1;
3764         slice_size -= EFI_MIN_RESV_SIZE;
3765         if (start_block == MAXOFFSET_T)
3766                 start_block = NEW_START_BLOCK;
3767         slice_size -= start_block;
3768
3769         vtoc->efi_parts[0].p_start = start_block;
3770         vtoc->efi_parts[0].p_size = slice_size;
3771
3772         /*
3773          * Why we use V_USR: V_BACKUP confuses users, and is considered
3774          * disposable by some EFI utilities (since EFI doesn't have a backup
3775          * slice).  V_UNASSIGNED is supposed to be used only for zero size
3776          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
3777          * etc. were all pretty specific.  V_USR is as close to reality as we
3778          * can get, in the absence of V_OTHER.
3779          */
3780         vtoc->efi_parts[0].p_tag = V_USR;
3781         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
3782
3783         vtoc->efi_parts[8].p_start = slice_size + start_block;
3784         vtoc->efi_parts[8].p_size = resv;
3785         vtoc->efi_parts[8].p_tag = V_RESERVED;
3786
3787         if ((rval = efi_write(fd, vtoc)) != 0) {
3788                 /*
3789                  * Some block drivers (like pcata) may not support EFI
3790                  * GPT labels.  Print out a helpful error message dir-
3791                  * ecting the user to manually label the disk and give
3792                  * a specific slice.
3793                  */
3794                 (void) close(fd);
3795                 efi_free(vtoc);
3796
3797                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
3798                     "parted(8) and then provide a specific slice: %d"), rval);
3799                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3800         }
3801
3802         (void) close(fd);
3803         efi_free(vtoc);
3804
3805         /* Wait for the first expected slice to appear. */
3806         (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
3807             isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
3808         rval = zpool_label_disk_wait(path, 3000);
3809         if (rval) {
3810                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
3811                     "detect device partitions on '%s': %d"), path, rval);
3812                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3813         }
3814
3815         /* We can't be to paranoid.  Read the label back and verify it. */
3816         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
3817         rval = zpool_label_disk_check(path);
3818         if (rval) {
3819                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
3820                     "EFI label on '%s' is damaged.  Ensure\nthis device "
3821                     "is not in in use, and is functioning properly: %d"),
3822                     path, rval);
3823                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3824         }
3825
3826         return 0;
3827 }