Support ashift=13 for 8KB SSD block sizes
[zfs.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <ctype.h>
27 #include <errno.h>
28 #include <devid.h>
29 #include <fcntl.h>
30 #include <libintl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <sys/stat.h>
37 #include <sys/efi_partition.h>
38 #include <sys/vtoc.h>
39 #include <sys/zfs_ioctl.h>
40 #include <dlfcn.h>
41
42 #include "zfs_namecheck.h"
43 #include "zfs_prop.h"
44 #include "libzfs_impl.h"
45 #include "zfs_comutil.h"
46
47 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
48
49 typedef struct prop_flags {
50         int create:1;   /* Validate property on creation */
51         int import:1;   /* Validate property on import */
52 } prop_flags_t;
53
54 /*
55  * ====================================================================
56  *   zpool property functions
57  * ====================================================================
58  */
59
60 static int
61 zpool_get_all_props(zpool_handle_t *zhp)
62 {
63         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
64         libzfs_handle_t *hdl = zhp->zpool_hdl;
65
66         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
67
68         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
69                 return (-1);
70
71         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
72                 if (errno == ENOMEM) {
73                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
74                                 zcmd_free_nvlists(&zc);
75                                 return (-1);
76                         }
77                 } else {
78                         zcmd_free_nvlists(&zc);
79                         return (-1);
80                 }
81         }
82
83         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
84                 zcmd_free_nvlists(&zc);
85                 return (-1);
86         }
87
88         zcmd_free_nvlists(&zc);
89
90         return (0);
91 }
92
93 static int
94 zpool_props_refresh(zpool_handle_t *zhp)
95 {
96         nvlist_t *old_props;
97
98         old_props = zhp->zpool_props;
99
100         if (zpool_get_all_props(zhp) != 0)
101                 return (-1);
102
103         nvlist_free(old_props);
104         return (0);
105 }
106
107 static char *
108 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
109     zprop_source_t *src)
110 {
111         nvlist_t *nv, *nvl;
112         uint64_t ival;
113         char *value;
114         zprop_source_t source;
115
116         nvl = zhp->zpool_props;
117         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
118                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
119                 source = ival;
120                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
121         } else {
122                 source = ZPROP_SRC_DEFAULT;
123                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
124                         value = "-";
125         }
126
127         if (src)
128                 *src = source;
129
130         return (value);
131 }
132
133 uint64_t
134 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
135 {
136         nvlist_t *nv, *nvl;
137         uint64_t value;
138         zprop_source_t source;
139
140         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
141                 /*
142                  * zpool_get_all_props() has most likely failed because
143                  * the pool is faulted, but if all we need is the top level
144                  * vdev's guid then get it from the zhp config nvlist.
145                  */
146                 if ((prop == ZPOOL_PROP_GUID) &&
147                     (nvlist_lookup_nvlist(zhp->zpool_config,
148                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
149                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
150                     == 0)) {
151                         return (value);
152                 }
153                 return (zpool_prop_default_numeric(prop));
154         }
155
156         nvl = zhp->zpool_props;
157         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
158                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
159                 source = value;
160                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
161         } else {
162                 source = ZPROP_SRC_DEFAULT;
163                 value = zpool_prop_default_numeric(prop);
164         }
165
166         if (src)
167                 *src = source;
168
169         return (value);
170 }
171
172 /*
173  * Map VDEV STATE to printed strings.
174  */
175 char *
176 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
177 {
178         switch (state) {
179         default:
180                 break;
181         case VDEV_STATE_CLOSED:
182         case VDEV_STATE_OFFLINE:
183                 return (gettext("OFFLINE"));
184         case VDEV_STATE_REMOVED:
185                 return (gettext("REMOVED"));
186         case VDEV_STATE_CANT_OPEN:
187                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
188                         return (gettext("FAULTED"));
189                 else if (aux == VDEV_AUX_SPLIT_POOL)
190                         return (gettext("SPLIT"));
191                 else
192                         return (gettext("UNAVAIL"));
193         case VDEV_STATE_FAULTED:
194                 return (gettext("FAULTED"));
195         case VDEV_STATE_DEGRADED:
196                 return (gettext("DEGRADED"));
197         case VDEV_STATE_HEALTHY:
198                 return (gettext("ONLINE"));
199         }
200
201         return (gettext("UNKNOWN"));
202 }
203
204 /*
205  * Get a zpool property value for 'prop' and return the value in
206  * a pre-allocated buffer.
207  */
208 int
209 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
210     zprop_source_t *srctype)
211 {
212         uint64_t intval;
213         const char *strval;
214         zprop_source_t src = ZPROP_SRC_NONE;
215         nvlist_t *nvroot;
216         vdev_stat_t *vs;
217         uint_t vsc;
218
219         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
220                 switch (prop) {
221                 case ZPOOL_PROP_NAME:
222                         (void) strlcpy(buf, zpool_get_name(zhp), len);
223                         break;
224
225                 case ZPOOL_PROP_HEALTH:
226                         (void) strlcpy(buf, "FAULTED", len);
227                         break;
228
229                 case ZPOOL_PROP_GUID:
230                         intval = zpool_get_prop_int(zhp, prop, &src);
231                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
232                         break;
233
234                 case ZPOOL_PROP_ALTROOT:
235                 case ZPOOL_PROP_CACHEFILE:
236                         if (zhp->zpool_props != NULL ||
237                             zpool_get_all_props(zhp) == 0) {
238                                 (void) strlcpy(buf,
239                                     zpool_get_prop_string(zhp, prop, &src),
240                                     len);
241                                 if (srctype != NULL)
242                                         *srctype = src;
243                                 return (0);
244                         }
245                         /* FALLTHROUGH */
246                 default:
247                         (void) strlcpy(buf, "-", len);
248                         break;
249                 }
250
251                 if (srctype != NULL)
252                         *srctype = src;
253                 return (0);
254         }
255
256         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
257             prop != ZPOOL_PROP_NAME)
258                 return (-1);
259
260         switch (zpool_prop_get_type(prop)) {
261         case PROP_TYPE_STRING:
262                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
263                     len);
264                 break;
265
266         case PROP_TYPE_NUMBER:
267                 intval = zpool_get_prop_int(zhp, prop, &src);
268
269                 switch (prop) {
270                 case ZPOOL_PROP_SIZE:
271                 case ZPOOL_PROP_ALLOCATED:
272                 case ZPOOL_PROP_FREE:
273                 case ZPOOL_PROP_ASHIFT:
274                         (void) zfs_nicenum(intval, buf, len);
275                         break;
276
277                 case ZPOOL_PROP_CAPACITY:
278                         (void) snprintf(buf, len, "%llu%%",
279                             (u_longlong_t)intval);
280                         break;
281
282                 case ZPOOL_PROP_DEDUPRATIO:
283                         (void) snprintf(buf, len, "%llu.%02llux",
284                             (u_longlong_t)(intval / 100),
285                             (u_longlong_t)(intval % 100));
286                         break;
287
288                 case ZPOOL_PROP_HEALTH:
289                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
290                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
291                         verify(nvlist_lookup_uint64_array(nvroot,
292                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
293                             == 0);
294
295                         (void) strlcpy(buf, zpool_state_to_name(intval,
296                             vs->vs_aux), len);
297                         break;
298                 default:
299                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
300                 }
301                 break;
302
303         case PROP_TYPE_INDEX:
304                 intval = zpool_get_prop_int(zhp, prop, &src);
305                 if (zpool_prop_index_to_string(prop, intval, &strval)
306                     != 0)
307                         return (-1);
308                 (void) strlcpy(buf, strval, len);
309                 break;
310
311         default:
312                 abort();
313         }
314
315         if (srctype)
316                 *srctype = src;
317
318         return (0);
319 }
320
321 /*
322  * Check if the bootfs name has the same pool name as it is set to.
323  * Assuming bootfs is a valid dataset name.
324  */
325 static boolean_t
326 bootfs_name_valid(const char *pool, char *bootfs)
327 {
328         int len = strlen(pool);
329
330         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
331                 return (B_FALSE);
332
333         if (strncmp(pool, bootfs, len) == 0 &&
334             (bootfs[len] == '/' || bootfs[len] == '\0'))
335                 return (B_TRUE);
336
337         return (B_FALSE);
338 }
339
340 /*
341  * Inspect the configuration to determine if any of the devices contain
342  * an EFI label.
343  */
344 static boolean_t
345 pool_uses_efi(nvlist_t *config)
346 {
347         nvlist_t **child;
348         uint_t c, children;
349
350         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
351             &child, &children) != 0)
352                 return (read_efi_label(config, NULL) >= 0);
353
354         for (c = 0; c < children; c++) {
355                 if (pool_uses_efi(child[c]))
356                         return (B_TRUE);
357         }
358         return (B_FALSE);
359 }
360
361 static boolean_t
362 pool_is_bootable(zpool_handle_t *zhp)
363 {
364         char bootfs[ZPOOL_MAXNAMELEN];
365
366         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
367             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
368             sizeof (bootfs)) != 0);
369 }
370
371
372 /*
373  * Given an nvlist of zpool properties to be set, validate that they are
374  * correct, and parse any numeric properties (index, boolean, etc) if they are
375  * specified as strings.
376  */
377 static nvlist_t *
378 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
379     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
380 {
381         nvpair_t *elem;
382         nvlist_t *retprops;
383         zpool_prop_t prop;
384         char *strval;
385         uint64_t intval;
386         char *slash;
387         struct stat64 statbuf;
388         zpool_handle_t *zhp;
389         nvlist_t *nvroot;
390
391         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
392                 (void) no_memory(hdl);
393                 return (NULL);
394         }
395
396         elem = NULL;
397         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
398                 const char *propname = nvpair_name(elem);
399
400                 /*
401                  * Make sure this property is valid and applies to this type.
402                  */
403                 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
404                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
405                             "invalid property '%s'"), propname);
406                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
407                         goto error;
408                 }
409
410                 if (zpool_prop_readonly(prop)) {
411                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
412                             "is readonly"), propname);
413                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
414                         goto error;
415                 }
416
417                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
418                     &strval, &intval, errbuf) != 0)
419                         goto error;
420
421                 /*
422                  * Perform additional checking for specific properties.
423                  */
424                 switch (prop) {
425                 default:
426                         break;
427                 case ZPOOL_PROP_VERSION:
428                         if (intval < version || intval > SPA_VERSION) {
429                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
430                                     "property '%s' number %d is invalid."),
431                                     propname, intval);
432                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
433                                 goto error;
434                         }
435                         break;
436
437                 case ZPOOL_PROP_ASHIFT:
438                         if (!flags.create) {
439                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
440                                     "property '%s' can only be set at "
441                                     "creation time"), propname);
442                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
443                                 goto error;
444                         }
445
446                         if (intval != 0 && (intval < 9 || intval > 13)) {
447                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
448                                     "property '%s' number %d is invalid."),
449                                     propname, intval);
450                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
451                                 goto error;
452                         }
453                         break;
454
455                 case ZPOOL_PROP_BOOTFS:
456                         if (flags.create || flags.import) {
457                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
458                                     "property '%s' cannot be set at creation "
459                                     "or import time"), propname);
460                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
461                                 goto error;
462                         }
463
464                         if (version < SPA_VERSION_BOOTFS) {
465                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
466                                     "pool must be upgraded to support "
467                                     "'%s' property"), propname);
468                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
469                                 goto error;
470                         }
471
472                         /*
473                          * bootfs property value has to be a dataset name and
474                          * the dataset has to be in the same pool as it sets to.
475                          */
476                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
477                             strval)) {
478                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
479                                     "is an invalid name"), strval);
480                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
481                                 goto error;
482                         }
483
484                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
485                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
486                                     "could not open pool '%s'"), poolname);
487                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
488                                 goto error;
489                         }
490                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
491                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
492
493 #if defined(__sun__) || defined(__sun)
494                         /*
495                          * bootfs property cannot be set on a disk which has
496                          * been EFI labeled.
497                          */
498                         if (pool_uses_efi(nvroot)) {
499                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
500                                     "property '%s' not supported on "
501                                     "EFI labeled devices"), propname);
502                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
503                                 zpool_close(zhp);
504                                 goto error;
505                         }
506 #endif
507                         zpool_close(zhp);
508                         break;
509
510                 case ZPOOL_PROP_ALTROOT:
511                         if (!flags.create && !flags.import) {
512                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
513                                     "property '%s' can only be set during pool "
514                                     "creation or import"), propname);
515                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
516                                 goto error;
517                         }
518
519                         if (strval[0] != '/') {
520                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
521                                     "bad alternate root '%s'"), strval);
522                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
523                                 goto error;
524                         }
525                         break;
526
527                 case ZPOOL_PROP_CACHEFILE:
528                         if (strval[0] == '\0')
529                                 break;
530
531                         if (strcmp(strval, "none") == 0)
532                                 break;
533
534                         if (strval[0] != '/') {
535                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
536                                     "property '%s' must be empty, an "
537                                     "absolute path, or 'none'"), propname);
538                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
539                                 goto error;
540                         }
541
542                         slash = strrchr(strval, '/');
543
544                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
545                             strcmp(slash, "/..") == 0) {
546                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
547                                     "'%s' is not a valid file"), strval);
548                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
549                                 goto error;
550                         }
551
552                         *slash = '\0';
553
554                         if (strval[0] != '\0' &&
555                             (stat64(strval, &statbuf) != 0 ||
556                             !S_ISDIR(statbuf.st_mode))) {
557                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
558                                     "'%s' is not a valid directory"),
559                                     strval);
560                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
561                                 goto error;
562                         }
563
564                         *slash = '/';
565                         break;
566
567                 case ZPOOL_PROP_READONLY:
568                         if (!flags.import) {
569                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
570                                     "property '%s' can only be set at "
571                                     "import time"), propname);
572                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
573                                 goto error;
574                         }
575                         break;
576                 }
577         }
578
579         return (retprops);
580 error:
581         nvlist_free(retprops);
582         return (NULL);
583 }
584
585 /*
586  * Set zpool property : propname=propval.
587  */
588 int
589 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
590 {
591         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
592         int ret = -1;
593         char errbuf[1024];
594         nvlist_t *nvl = NULL;
595         nvlist_t *realprops;
596         uint64_t version;
597         prop_flags_t flags = { 0 };
598
599         (void) snprintf(errbuf, sizeof (errbuf),
600             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
601             zhp->zpool_name);
602
603         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
604                 return (no_memory(zhp->zpool_hdl));
605
606         if (nvlist_add_string(nvl, propname, propval) != 0) {
607                 nvlist_free(nvl);
608                 return (no_memory(zhp->zpool_hdl));
609         }
610
611         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
612         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
613             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
614                 nvlist_free(nvl);
615                 return (-1);
616         }
617
618         nvlist_free(nvl);
619         nvl = realprops;
620
621         /*
622          * Execute the corresponding ioctl() to set this property.
623          */
624         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
625
626         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
627                 nvlist_free(nvl);
628                 return (-1);
629         }
630
631         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
632
633         zcmd_free_nvlists(&zc);
634         nvlist_free(nvl);
635
636         if (ret)
637                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
638         else
639                 (void) zpool_props_refresh(zhp);
640
641         return (ret);
642 }
643
644 int
645 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
646 {
647         libzfs_handle_t *hdl = zhp->zpool_hdl;
648         zprop_list_t *entry;
649         char buf[ZFS_MAXPROPLEN];
650
651         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
652                 return (-1);
653
654         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
655
656                 if (entry->pl_fixed)
657                         continue;
658
659                 if (entry->pl_prop != ZPROP_INVAL &&
660                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
661                     NULL) == 0) {
662                         if (strlen(buf) > entry->pl_width)
663                                 entry->pl_width = strlen(buf);
664                 }
665         }
666
667         return (0);
668 }
669
670
671 /*
672  * Don't start the slice at the default block of 34; many storage
673  * devices will use a stripe width of 128k, other vendors prefer a 1m
674  * alignment.  It is best to play it safe and ensure a 1m alignment
675  * give 512b blocks.  When the block size is larger by a power of 2
676  * we will still be 1m aligned.
677  */
678 #define NEW_START_BLOCK 2048
679
680 /*
681  * Validate the given pool name, optionally putting an extended error message in
682  * 'buf'.
683  */
684 boolean_t
685 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
686 {
687         namecheck_err_t why;
688         char what;
689         int ret;
690
691         ret = pool_namecheck(pool, &why, &what);
692
693         /*
694          * The rules for reserved pool names were extended at a later point.
695          * But we need to support users with existing pools that may now be
696          * invalid.  So we only check for this expanded set of names during a
697          * create (or import), and only in userland.
698          */
699         if (ret == 0 && !isopen &&
700             (strncmp(pool, "mirror", 6) == 0 ||
701             strncmp(pool, "raidz", 5) == 0 ||
702             strncmp(pool, "spare", 5) == 0 ||
703             strcmp(pool, "log") == 0)) {
704                 if (hdl != NULL)
705                         zfs_error_aux(hdl,
706                             dgettext(TEXT_DOMAIN, "name is reserved"));
707                 return (B_FALSE);
708         }
709
710
711         if (ret != 0) {
712                 if (hdl != NULL) {
713                         switch (why) {
714                         case NAME_ERR_TOOLONG:
715                                 zfs_error_aux(hdl,
716                                     dgettext(TEXT_DOMAIN, "name is too long"));
717                                 break;
718
719                         case NAME_ERR_INVALCHAR:
720                                 zfs_error_aux(hdl,
721                                     dgettext(TEXT_DOMAIN, "invalid character "
722                                     "'%c' in pool name"), what);
723                                 break;
724
725                         case NAME_ERR_NOLETTER:
726                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
727                                     "name must begin with a letter"));
728                                 break;
729
730                         case NAME_ERR_RESERVED:
731                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
732                                     "name is reserved"));
733                                 break;
734
735                         case NAME_ERR_DISKLIKE:
736                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
737                                     "pool name is reserved"));
738                                 break;
739
740                         case NAME_ERR_LEADING_SLASH:
741                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
742                                     "leading slash in name"));
743                                 break;
744
745                         case NAME_ERR_EMPTY_COMPONENT:
746                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
747                                     "empty component in name"));
748                                 break;
749
750                         case NAME_ERR_TRAILING_SLASH:
751                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
752                                     "trailing slash in name"));
753                                 break;
754
755                         case NAME_ERR_MULTIPLE_AT:
756                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
757                                     "multiple '@' delimiters in name"));
758                                 break;
759                         case NAME_ERR_NO_AT:
760                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
761                                     "permission set is missing '@'"));
762                                 break;
763                         }
764                 }
765                 return (B_FALSE);
766         }
767
768         return (B_TRUE);
769 }
770
771 /*
772  * Open a handle to the given pool, even if the pool is currently in the FAULTED
773  * state.
774  */
775 zpool_handle_t *
776 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
777 {
778         zpool_handle_t *zhp;
779         boolean_t missing;
780
781         /*
782          * Make sure the pool name is valid.
783          */
784         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
785                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
786                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
787                     pool);
788                 return (NULL);
789         }
790
791         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
792                 return (NULL);
793
794         zhp->zpool_hdl = hdl;
795         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
796
797         if (zpool_refresh_stats(zhp, &missing) != 0) {
798                 zpool_close(zhp);
799                 return (NULL);
800         }
801
802         if (missing) {
803                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
804                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
805                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
806                 zpool_close(zhp);
807                 return (NULL);
808         }
809
810         return (zhp);
811 }
812
813 /*
814  * Like the above, but silent on error.  Used when iterating over pools (because
815  * the configuration cache may be out of date).
816  */
817 int
818 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
819 {
820         zpool_handle_t *zhp;
821         boolean_t missing;
822
823         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
824                 return (-1);
825
826         zhp->zpool_hdl = hdl;
827         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
828
829         if (zpool_refresh_stats(zhp, &missing) != 0) {
830                 zpool_close(zhp);
831                 return (-1);
832         }
833
834         if (missing) {
835                 zpool_close(zhp);
836                 *ret = NULL;
837                 return (0);
838         }
839
840         *ret = zhp;
841         return (0);
842 }
843
844 /*
845  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
846  * state.
847  */
848 zpool_handle_t *
849 zpool_open(libzfs_handle_t *hdl, const char *pool)
850 {
851         zpool_handle_t *zhp;
852
853         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
854                 return (NULL);
855
856         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
857                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
858                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
859                 zpool_close(zhp);
860                 return (NULL);
861         }
862
863         return (zhp);
864 }
865
866 /*
867  * Close the handle.  Simply frees the memory associated with the handle.
868  */
869 void
870 zpool_close(zpool_handle_t *zhp)
871 {
872         if (zhp->zpool_config)
873                 nvlist_free(zhp->zpool_config);
874         if (zhp->zpool_old_config)
875                 nvlist_free(zhp->zpool_old_config);
876         if (zhp->zpool_props)
877                 nvlist_free(zhp->zpool_props);
878         free(zhp);
879 }
880
881 /*
882  * Return the name of the pool.
883  */
884 const char *
885 zpool_get_name(zpool_handle_t *zhp)
886 {
887         return (zhp->zpool_name);
888 }
889
890
891 /*
892  * Return the state of the pool (ACTIVE or UNAVAILABLE)
893  */
894 int
895 zpool_get_state(zpool_handle_t *zhp)
896 {
897         return (zhp->zpool_state);
898 }
899
900 /*
901  * Create the named pool, using the provided vdev list.  It is assumed
902  * that the consumer has already validated the contents of the nvlist, so we
903  * don't have to worry about error semantics.
904  */
905 int
906 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
907     nvlist_t *props, nvlist_t *fsprops)
908 {
909         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
910         nvlist_t *zc_fsprops = NULL;
911         nvlist_t *zc_props = NULL;
912         char msg[1024];
913         char *altroot;
914         int ret = -1;
915
916         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
917             "cannot create '%s'"), pool);
918
919         if (!zpool_name_valid(hdl, B_FALSE, pool))
920                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
921
922         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
923                 return (-1);
924
925         if (props) {
926                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
927
928                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
929                     SPA_VERSION_1, flags, msg)) == NULL) {
930                         goto create_failed;
931                 }
932         }
933
934         if (fsprops) {
935                 uint64_t zoned;
936                 char *zonestr;
937
938                 zoned = ((nvlist_lookup_string(fsprops,
939                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
940                     strcmp(zonestr, "on") == 0);
941
942                 if ((zc_fsprops = zfs_valid_proplist(hdl,
943                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
944                         goto create_failed;
945                 }
946                 if (!zc_props &&
947                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
948                         goto create_failed;
949                 }
950                 if (nvlist_add_nvlist(zc_props,
951                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
952                         goto create_failed;
953                 }
954         }
955
956         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
957                 goto create_failed;
958
959         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
960
961         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
962
963                 zcmd_free_nvlists(&zc);
964                 nvlist_free(zc_props);
965                 nvlist_free(zc_fsprops);
966
967                 switch (errno) {
968                 case EBUSY:
969                         /*
970                          * This can happen if the user has specified the same
971                          * device multiple times.  We can't reliably detect this
972                          * until we try to add it and see we already have a
973                          * label.  This can also happen under if the device is
974                          * part of an active md or lvm device.
975                          */
976                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
977                             "one or more vdevs refer to the same device, or one of\n"
978                             "the devices is part of an active md or lvm device"));
979                         return (zfs_error(hdl, EZFS_BADDEV, msg));
980
981                 case EOVERFLOW:
982                         /*
983                          * This occurs when one of the devices is below
984                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
985                          * device was the problem device since there's no
986                          * reliable way to determine device size from userland.
987                          */
988                         {
989                                 char buf[64];
990
991                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
992
993                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
994                                     "one or more devices is less than the "
995                                     "minimum size (%s)"), buf);
996                         }
997                         return (zfs_error(hdl, EZFS_BADDEV, msg));
998
999                 case ENOSPC:
1000                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1001                             "one or more devices is out of space"));
1002                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1003
1004                 case ENOTBLK:
1005                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006                             "cache device must be a disk or disk slice"));
1007                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1008
1009                 default:
1010                         return (zpool_standard_error(hdl, errno, msg));
1011                 }
1012         }
1013
1014         /*
1015          * If this is an alternate root pool, then we automatically set the
1016          * mountpoint of the root dataset to be '/'.
1017          */
1018         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
1019             &altroot) == 0) {
1020                 zfs_handle_t *zhp;
1021
1022                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
1023                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1024                     "/") == 0);
1025
1026                 zfs_close(zhp);
1027         }
1028
1029 create_failed:
1030         zcmd_free_nvlists(&zc);
1031         nvlist_free(zc_props);
1032         nvlist_free(zc_fsprops);
1033         return (ret);
1034 }
1035
1036 /*
1037  * Destroy the given pool.  It is up to the caller to ensure that there are no
1038  * datasets left in the pool.
1039  */
1040 int
1041 zpool_destroy(zpool_handle_t *zhp)
1042 {
1043         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1044         zfs_handle_t *zfp = NULL;
1045         libzfs_handle_t *hdl = zhp->zpool_hdl;
1046         char msg[1024];
1047
1048         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1049             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1050                 return (-1);
1051
1052         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1053
1054         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1055                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1056                     "cannot destroy '%s'"), zhp->zpool_name);
1057
1058                 if (errno == EROFS) {
1059                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1060                             "one or more devices is read only"));
1061                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1062                 } else {
1063                         (void) zpool_standard_error(hdl, errno, msg);
1064                 }
1065
1066                 if (zfp)
1067                         zfs_close(zfp);
1068                 return (-1);
1069         }
1070
1071         if (zfp) {
1072                 remove_mountpoint(zfp);
1073                 zfs_close(zfp);
1074         }
1075
1076         return (0);
1077 }
1078
1079 /*
1080  * Add the given vdevs to the pool.  The caller must have already performed the
1081  * necessary verification to ensure that the vdev specification is well-formed.
1082  */
1083 int
1084 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1085 {
1086         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1087         int ret;
1088         libzfs_handle_t *hdl = zhp->zpool_hdl;
1089         char msg[1024];
1090         nvlist_t **spares, **l2cache;
1091         uint_t nspares, nl2cache;
1092
1093         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1094             "cannot add to '%s'"), zhp->zpool_name);
1095
1096         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1097             SPA_VERSION_SPARES &&
1098             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1099             &spares, &nspares) == 0) {
1100                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1101                     "upgraded to add hot spares"));
1102                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1103         }
1104
1105         if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1106             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1107                 uint64_t s;
1108
1109                 for (s = 0; s < nspares; s++) {
1110                         char *path;
1111
1112                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1113                             &path) == 0 && pool_uses_efi(spares[s])) {
1114                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1115                                     "device '%s' contains an EFI label and "
1116                                     "cannot be used on root pools."),
1117                                     zpool_vdev_name(hdl, NULL, spares[s],
1118                                     B_FALSE));
1119                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1120                         }
1121                 }
1122         }
1123
1124         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1125             SPA_VERSION_L2CACHE &&
1126             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1127             &l2cache, &nl2cache) == 0) {
1128                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1129                     "upgraded to add cache devices"));
1130                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1131         }
1132
1133         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1134                 return (-1);
1135         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1136
1137         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1138                 switch (errno) {
1139                 case EBUSY:
1140                         /*
1141                          * This can happen if the user has specified the same
1142                          * device multiple times.  We can't reliably detect this
1143                          * until we try to add it and see we already have a
1144                          * label.
1145                          */
1146                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1147                             "one or more vdevs refer to the same device"));
1148                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1149                         break;
1150
1151                 case EOVERFLOW:
1152                         /*
1153                          * This occurrs when one of the devices is below
1154                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1155                          * device was the problem device since there's no
1156                          * reliable way to determine device size from userland.
1157                          */
1158                         {
1159                                 char buf[64];
1160
1161                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1162
1163                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1164                                     "device is less than the minimum "
1165                                     "size (%s)"), buf);
1166                         }
1167                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1168                         break;
1169
1170                 case ENOTSUP:
1171                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1172                             "pool must be upgraded to add these vdevs"));
1173                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1174                         break;
1175
1176                 case EDOM:
1177                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1178                             "root pool can not have multiple vdevs"
1179                             " or separate logs"));
1180                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1181                         break;
1182
1183                 case ENOTBLK:
1184                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1185                             "cache device must be a disk or disk slice"));
1186                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1187                         break;
1188
1189                 default:
1190                         (void) zpool_standard_error(hdl, errno, msg);
1191                 }
1192
1193                 ret = -1;
1194         } else {
1195                 ret = 0;
1196         }
1197
1198         zcmd_free_nvlists(&zc);
1199
1200         return (ret);
1201 }
1202
1203 /*
1204  * Exports the pool from the system.  The caller must ensure that there are no
1205  * mounted datasets in the pool.
1206  */
1207 int
1208 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1209 {
1210         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1211         char msg[1024];
1212
1213         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1214             "cannot export '%s'"), zhp->zpool_name);
1215
1216         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1217         zc.zc_cookie = force;
1218         zc.zc_guid = hardforce;
1219
1220         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1221                 switch (errno) {
1222                 case EXDEV:
1223                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1224                             "use '-f' to override the following errors:\n"
1225                             "'%s' has an active shared spare which could be"
1226                             " used by other pools once '%s' is exported."),
1227                             zhp->zpool_name, zhp->zpool_name);
1228                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1229                             msg));
1230                 default:
1231                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1232                             msg));
1233                 }
1234         }
1235
1236         return (0);
1237 }
1238
1239 int
1240 zpool_export(zpool_handle_t *zhp, boolean_t force)
1241 {
1242         return (zpool_export_common(zhp, force, B_FALSE));
1243 }
1244
1245 int
1246 zpool_export_force(zpool_handle_t *zhp)
1247 {
1248         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1249 }
1250
1251 static void
1252 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1253     nvlist_t *config)
1254 {
1255         nvlist_t *nv = NULL;
1256         uint64_t rewindto;
1257         int64_t loss = -1;
1258         struct tm t;
1259         char timestr[128];
1260
1261         if (!hdl->libzfs_printerr || config == NULL)
1262                 return;
1263
1264         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
1265                 return;
1266
1267         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1268                 return;
1269         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1270
1271         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1272             strftime(timestr, 128, "%c", &t) != 0) {
1273                 if (dryrun) {
1274                         (void) printf(dgettext(TEXT_DOMAIN,
1275                             "Would be able to return %s "
1276                             "to its state as of %s.\n"),
1277                             name, timestr);
1278                 } else {
1279                         (void) printf(dgettext(TEXT_DOMAIN,
1280                             "Pool %s returned to its state as of %s.\n"),
1281                             name, timestr);
1282                 }
1283                 if (loss > 120) {
1284                         (void) printf(dgettext(TEXT_DOMAIN,
1285                             "%s approximately %lld "),
1286                             dryrun ? "Would discard" : "Discarded",
1287                             ((longlong_t)loss + 30) / 60);
1288                         (void) printf(dgettext(TEXT_DOMAIN,
1289                             "minutes of transactions.\n"));
1290                 } else if (loss > 0) {
1291                         (void) printf(dgettext(TEXT_DOMAIN,
1292                             "%s approximately %lld "),
1293                             dryrun ? "Would discard" : "Discarded",
1294                             (longlong_t)loss);
1295                         (void) printf(dgettext(TEXT_DOMAIN,
1296                             "seconds of transactions.\n"));
1297                 }
1298         }
1299 }
1300
1301 void
1302 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1303     nvlist_t *config)
1304 {
1305         nvlist_t *nv = NULL;
1306         int64_t loss = -1;
1307         uint64_t edata = UINT64_MAX;
1308         uint64_t rewindto;
1309         struct tm t;
1310         char timestr[128];
1311
1312         if (!hdl->libzfs_printerr)
1313                 return;
1314
1315         if (reason >= 0)
1316                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1317         else
1318                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1319
1320         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1321         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1322             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1323                 goto no_info;
1324
1325         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1326         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1327             &edata);
1328
1329         (void) printf(dgettext(TEXT_DOMAIN,
1330             "Recovery is possible, but will result in some data loss.\n"));
1331
1332         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1333             strftime(timestr, 128, "%c", &t) != 0) {
1334                 (void) printf(dgettext(TEXT_DOMAIN,
1335                     "\tReturning the pool to its state as of %s\n"
1336                     "\tshould correct the problem.  "),
1337                     timestr);
1338         } else {
1339                 (void) printf(dgettext(TEXT_DOMAIN,
1340                     "\tReverting the pool to an earlier state "
1341                     "should correct the problem.\n\t"));
1342         }
1343
1344         if (loss > 120) {
1345                 (void) printf(dgettext(TEXT_DOMAIN,
1346                     "Approximately %lld minutes of data\n"
1347                     "\tmust be discarded, irreversibly.  "),
1348                     ((longlong_t)loss + 30) / 60);
1349         } else if (loss > 0) {
1350                 (void) printf(dgettext(TEXT_DOMAIN,
1351                     "Approximately %lld seconds of data\n"
1352                     "\tmust be discarded, irreversibly.  "),
1353                     (longlong_t)loss);
1354         }
1355         if (edata != 0 && edata != UINT64_MAX) {
1356                 if (edata == 1) {
1357                         (void) printf(dgettext(TEXT_DOMAIN,
1358                             "After rewind, at least\n"
1359                             "\tone persistent user-data error will remain.  "));
1360                 } else {
1361                         (void) printf(dgettext(TEXT_DOMAIN,
1362                             "After rewind, several\n"
1363                             "\tpersistent user-data errors will remain.  "));
1364                 }
1365         }
1366         (void) printf(dgettext(TEXT_DOMAIN,
1367             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1368             reason >= 0 ? "clear" : "import", name);
1369
1370         (void) printf(dgettext(TEXT_DOMAIN,
1371             "A scrub of the pool\n"
1372             "\tis strongly recommended after recovery.\n"));
1373         return;
1374
1375 no_info:
1376         (void) printf(dgettext(TEXT_DOMAIN,
1377             "Destroy and re-create the pool from\n\ta backup source.\n"));
1378 }
1379
1380 /*
1381  * zpool_import() is a contracted interface. Should be kept the same
1382  * if possible.
1383  *
1384  * Applications should use zpool_import_props() to import a pool with
1385  * new properties value to be set.
1386  */
1387 int
1388 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1389     char *altroot)
1390 {
1391         nvlist_t *props = NULL;
1392         int ret;
1393
1394         if (altroot != NULL) {
1395                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1396                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1397                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1398                             newname));
1399                 }
1400
1401                 if (nvlist_add_string(props,
1402                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1403                     nvlist_add_string(props,
1404                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1405                         nvlist_free(props);
1406                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1407                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1408                             newname));
1409                 }
1410         }
1411
1412         ret = zpool_import_props(hdl, config, newname, props,
1413             ZFS_IMPORT_NORMAL);
1414         if (props)
1415                 nvlist_free(props);
1416         return (ret);
1417 }
1418
1419 static void
1420 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1421     int indent)
1422 {
1423         nvlist_t **child;
1424         uint_t c, children;
1425         char *vname;
1426         uint64_t is_log = 0;
1427
1428         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1429             &is_log);
1430
1431         if (name != NULL)
1432                 (void) printf("\t%*s%s%s\n", indent, "", name,
1433                     is_log ? " [log]" : "");
1434
1435         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1436             &child, &children) != 0)
1437                 return;
1438
1439         for (c = 0; c < children; c++) {
1440                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1441                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1442                 free(vname);
1443         }
1444 }
1445
1446 /*
1447  * Import the given pool using the known configuration and a list of
1448  * properties to be set. The configuration should have come from
1449  * zpool_find_import(). The 'newname' parameters control whether the pool
1450  * is imported with a different name.
1451  */
1452 int
1453 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1454     nvlist_t *props, int flags)
1455 {
1456         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1457         zpool_rewind_policy_t policy;
1458         nvlist_t *nv = NULL;
1459         nvlist_t *nvinfo = NULL;
1460         nvlist_t *missing = NULL;
1461         char *thename;
1462         char *origname;
1463         int ret;
1464         int error = 0;
1465         char errbuf[1024];
1466
1467         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1468             &origname) == 0);
1469
1470         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1471             "cannot import pool '%s'"), origname);
1472
1473         if (newname != NULL) {
1474                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1475                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1476                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1477                             newname));
1478                 thename = (char *)newname;
1479         } else {
1480                 thename = origname;
1481         }
1482
1483         if (props) {
1484                 uint64_t version;
1485                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1486
1487                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1488                     &version) == 0);
1489
1490                 if ((props = zpool_valid_proplist(hdl, origname,
1491                     props, version, flags, errbuf)) == NULL) {
1492                         return (-1);
1493                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1494                         nvlist_free(props);
1495                         return (-1);
1496                 }
1497         }
1498
1499         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1500
1501         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1502             &zc.zc_guid) == 0);
1503
1504         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1505                 nvlist_free(props);
1506                 return (-1);
1507         }
1508         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1509                 nvlist_free(props);
1510                 return (-1);
1511         }
1512
1513         zc.zc_cookie = flags;
1514         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1515             errno == ENOMEM) {
1516                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1517                         zcmd_free_nvlists(&zc);
1518                         return (-1);
1519                 }
1520         }
1521         if (ret != 0)
1522                 error = errno;
1523
1524         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1525         zpool_get_rewind_policy(config, &policy);
1526
1527         if (error) {
1528                 char desc[1024];
1529
1530                 /*
1531                  * Dry-run failed, but we print out what success
1532                  * looks like if we found a best txg
1533                  */
1534                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1535                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1536                             B_TRUE, nv);
1537                         nvlist_free(nv);
1538                         return (-1);
1539                 }
1540
1541                 if (newname == NULL)
1542                         (void) snprintf(desc, sizeof (desc),
1543                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1544                             thename);
1545                 else
1546                         (void) snprintf(desc, sizeof (desc),
1547                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1548                             origname, thename);
1549
1550                 switch (error) {
1551                 case ENOTSUP:
1552                         /*
1553                          * Unsupported version.
1554                          */
1555                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1556                         break;
1557
1558                 case EINVAL:
1559                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1560                         break;
1561
1562                 case EROFS:
1563                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1564                             "one or more devices is read only"));
1565                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1566                         break;
1567
1568                 case ENXIO:
1569                         if (nv && nvlist_lookup_nvlist(nv,
1570                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1571                             nvlist_lookup_nvlist(nvinfo,
1572                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1573                                 (void) printf(dgettext(TEXT_DOMAIN,
1574                                     "The devices below are missing, use "
1575                                     "'-m' to import the pool anyway:\n"));
1576                                 print_vdev_tree(hdl, NULL, missing, 2);
1577                                 (void) printf("\n");
1578                         }
1579                         (void) zpool_standard_error(hdl, error, desc);
1580                         break;
1581
1582                 case EEXIST:
1583                         (void) zpool_standard_error(hdl, error, desc);
1584                         break;
1585
1586                 default:
1587                         (void) zpool_standard_error(hdl, error, desc);
1588                         zpool_explain_recover(hdl,
1589                             newname ? origname : thename, -error, nv);
1590                         break;
1591                 }
1592
1593                 nvlist_free(nv);
1594                 ret = -1;
1595         } else {
1596                 zpool_handle_t *zhp;
1597
1598                 /*
1599                  * This should never fail, but play it safe anyway.
1600                  */
1601                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1602                         ret = -1;
1603                 else if (zhp != NULL)
1604                         zpool_close(zhp);
1605                 if (policy.zrp_request &
1606                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1607                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1608                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1609                 }
1610                 nvlist_free(nv);
1611                 return (0);
1612         }
1613
1614         zcmd_free_nvlists(&zc);
1615         nvlist_free(props);
1616
1617         return (ret);
1618 }
1619
1620 /*
1621  * Scan the pool.
1622  */
1623 int
1624 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1625 {
1626         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1627         char msg[1024];
1628         libzfs_handle_t *hdl = zhp->zpool_hdl;
1629
1630         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1631         zc.zc_cookie = func;
1632
1633         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1634             (errno == ENOENT && func != POOL_SCAN_NONE))
1635                 return (0);
1636
1637         if (func == POOL_SCAN_SCRUB) {
1638                 (void) snprintf(msg, sizeof (msg),
1639                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1640         } else if (func == POOL_SCAN_NONE) {
1641                 (void) snprintf(msg, sizeof (msg),
1642                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1643                     zc.zc_name);
1644         } else {
1645                 assert(!"unexpected result");
1646         }
1647
1648         if (errno == EBUSY) {
1649                 nvlist_t *nvroot;
1650                 pool_scan_stat_t *ps = NULL;
1651                 uint_t psc;
1652
1653                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1654                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1655                 (void) nvlist_lookup_uint64_array(nvroot,
1656                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1657                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1658                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1659                 else
1660                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1661         } else if (errno == ENOENT) {
1662                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1663         } else {
1664                 return (zpool_standard_error(hdl, errno, msg));
1665         }
1666 }
1667
1668 /*
1669  * Find a vdev that matches the search criteria specified. We use the
1670  * the nvpair name to determine how we should look for the device.
1671  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1672  * spare; but FALSE if its an INUSE spare.
1673  */
1674 static nvlist_t *
1675 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1676     boolean_t *l2cache, boolean_t *log)
1677 {
1678         uint_t c, children;
1679         nvlist_t **child;
1680         nvlist_t *ret;
1681         uint64_t is_log;
1682         char *srchkey;
1683         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1684
1685         /* Nothing to look for */
1686         if (search == NULL || pair == NULL)
1687                 return (NULL);
1688
1689         /* Obtain the key we will use to search */
1690         srchkey = nvpair_name(pair);
1691
1692         switch (nvpair_type(pair)) {
1693         case DATA_TYPE_UINT64:
1694                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1695                         uint64_t srchval, theguid;
1696
1697                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1698                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1699                             &theguid) == 0);
1700                         if (theguid == srchval)
1701                                 return (nv);
1702                 }
1703                 break;
1704
1705         case DATA_TYPE_STRING: {
1706                 char *srchval, *val;
1707
1708                 verify(nvpair_value_string(pair, &srchval) == 0);
1709                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1710                         break;
1711
1712                 /*
1713                  * Search for the requested value. Special cases:
1714                  *
1715                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in with a
1716                  *   partition suffix "1", "-part1", or "p1".  The suffix is  hidden
1717                  *   from the user, but included in the string, so this matches around
1718                  *   it.
1719                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1720                  *
1721                  * Otherwise, all other searches are simple string compares.
1722                  */
1723                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
1724                         uint64_t wholedisk = 0;
1725
1726                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1727                             &wholedisk);
1728                         if (wholedisk) {
1729                                 char buf[MAXPATHLEN];
1730
1731                                 zfs_append_partition(srchval, buf, sizeof (buf));
1732                                 if (strcmp(val, buf) == 0)
1733                                         return (nv);
1734
1735                                 break;
1736                         }
1737                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1738                         char *type, *idx, *end, *p;
1739                         uint64_t id, vdev_id;
1740
1741                         /*
1742                          * Determine our vdev type, keeping in mind
1743                          * that the srchval is composed of a type and
1744                          * vdev id pair (i.e. mirror-4).
1745                          */
1746                         if ((type = strdup(srchval)) == NULL)
1747                                 return (NULL);
1748
1749                         if ((p = strrchr(type, '-')) == NULL) {
1750                                 free(type);
1751                                 break;
1752                         }
1753                         idx = p + 1;
1754                         *p = '\0';
1755
1756                         /*
1757                          * If the types don't match then keep looking.
1758                          */
1759                         if (strncmp(val, type, strlen(val)) != 0) {
1760                                 free(type);
1761                                 break;
1762                         }
1763
1764                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
1765                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1766                             strncmp(type, VDEV_TYPE_MIRROR,
1767                             strlen(VDEV_TYPE_MIRROR)) == 0);
1768                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
1769                             &id) == 0);
1770
1771                         errno = 0;
1772                         vdev_id = strtoull(idx, &end, 10);
1773
1774                         free(type);
1775                         if (errno != 0)
1776                                 return (NULL);
1777
1778                         /*
1779                          * Now verify that we have the correct vdev id.
1780                          */
1781                         if (vdev_id == id)
1782                                 return (nv);
1783                 }
1784
1785                 /*
1786                  * Common case
1787                  */
1788                 if (strcmp(srchval, val) == 0)
1789                         return (nv);
1790                 break;
1791         }
1792
1793         default:
1794                 break;
1795         }
1796
1797         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1798             &child, &children) != 0)
1799                 return (NULL);
1800
1801         for (c = 0; c < children; c++) {
1802                 if ((ret = vdev_to_nvlist_iter(child[c], search,
1803                     avail_spare, l2cache, NULL)) != NULL) {
1804                         /*
1805                          * The 'is_log' value is only set for the toplevel
1806                          * vdev, not the leaf vdevs.  So we always lookup the
1807                          * log device from the root of the vdev tree (where
1808                          * 'log' is non-NULL).
1809                          */
1810                         if (log != NULL &&
1811                             nvlist_lookup_uint64(child[c],
1812                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1813                             is_log) {
1814                                 *log = B_TRUE;
1815                         }
1816                         return (ret);
1817                 }
1818         }
1819
1820         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1821             &child, &children) == 0) {
1822                 for (c = 0; c < children; c++) {
1823                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1824                             avail_spare, l2cache, NULL)) != NULL) {
1825                                 *avail_spare = B_TRUE;
1826                                 return (ret);
1827                         }
1828                 }
1829         }
1830
1831         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1832             &child, &children) == 0) {
1833                 for (c = 0; c < children; c++) {
1834                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1835                             avail_spare, l2cache, NULL)) != NULL) {
1836                                 *l2cache = B_TRUE;
1837                                 return (ret);
1838                         }
1839                 }
1840         }
1841
1842         return (NULL);
1843 }
1844
1845 /*
1846  * Given a physical path (minus the "/devices" prefix), find the
1847  * associated vdev.
1848  */
1849 nvlist_t *
1850 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
1851     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1852 {
1853         nvlist_t *search, *nvroot, *ret;
1854
1855         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1856         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
1857
1858         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1859             &nvroot) == 0);
1860
1861         *avail_spare = B_FALSE;
1862         *l2cache = B_FALSE;
1863         if (log != NULL)
1864                 *log = B_FALSE;
1865         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1866         nvlist_free(search);
1867
1868         return (ret);
1869 }
1870
1871 /*
1872  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
1873  */
1874 boolean_t
1875 zpool_vdev_is_interior(const char *name)
1876 {
1877         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1878             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
1879                 return (B_TRUE);
1880         return (B_FALSE);
1881 }
1882
1883 nvlist_t *
1884 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1885     boolean_t *l2cache, boolean_t *log)
1886 {
1887         char buf[MAXPATHLEN];
1888         char *end;
1889         nvlist_t *nvroot, *search, *ret;
1890         uint64_t guid;
1891
1892         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1893
1894         guid = strtoull(path, &end, 10);
1895         if (guid != 0 && *end == '\0') {
1896                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
1897         } else if (zpool_vdev_is_interior(path)) {
1898                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
1899         } else if (path[0] != '/') {
1900                 if (zfs_resolve_shortname(path, buf, sizeof (buf)) < 0) {
1901                         nvlist_free(search);
1902                         return (NULL);
1903                 }
1904                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
1905         } else {
1906                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
1907         }
1908
1909         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1910             &nvroot) == 0);
1911
1912         *avail_spare = B_FALSE;
1913         *l2cache = B_FALSE;
1914         if (log != NULL)
1915                 *log = B_FALSE;
1916         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1917         nvlist_free(search);
1918
1919         return (ret);
1920 }
1921
1922 static int
1923 vdev_online(nvlist_t *nv)
1924 {
1925         uint64_t ival;
1926
1927         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1928             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1929             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1930                 return (0);
1931
1932         return (1);
1933 }
1934
1935 /*
1936  * Helper function for zpool_get_physpaths().
1937  */
1938 static int
1939 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
1940     size_t *bytes_written)
1941 {
1942         size_t bytes_left, pos, rsz;
1943         char *tmppath;
1944         const char *format;
1945
1946         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
1947             &tmppath) != 0)
1948                 return (EZFS_NODEVICE);
1949
1950         pos = *bytes_written;
1951         bytes_left = physpath_size - pos;
1952         format = (pos == 0) ? "%s" : " %s";
1953
1954         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
1955         *bytes_written += rsz;
1956
1957         if (rsz >= bytes_left) {
1958                 /* if physpath was not copied properly, clear it */
1959                 if (bytes_left != 0) {
1960                         physpath[pos] = 0;
1961                 }
1962                 return (EZFS_NOSPC);
1963         }
1964         return (0);
1965 }
1966
1967 static int
1968 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
1969     size_t *rsz, boolean_t is_spare)
1970 {
1971         char *type;
1972         int ret;
1973
1974         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
1975                 return (EZFS_INVALCONFIG);
1976
1977         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1978                 /*
1979                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
1980                  * For a spare vdev, we only want to boot from the active
1981                  * spare device.
1982                  */
1983                 if (is_spare) {
1984                         uint64_t spare = 0;
1985                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
1986                             &spare);
1987                         if (!spare)
1988                                 return (EZFS_INVALCONFIG);
1989                 }
1990
1991                 if (vdev_online(nv)) {
1992                         if ((ret = vdev_get_one_physpath(nv, physpath,
1993                             phypath_size, rsz)) != 0)
1994                                 return (ret);
1995                 }
1996         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
1997             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
1998             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
1999                 nvlist_t **child;
2000                 uint_t count;
2001                 int i, ret;
2002
2003                 if (nvlist_lookup_nvlist_array(nv,
2004                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2005                         return (EZFS_INVALCONFIG);
2006
2007                 for (i = 0; i < count; i++) {
2008                         ret = vdev_get_physpaths(child[i], physpath,
2009                             phypath_size, rsz, is_spare);
2010                         if (ret == EZFS_NOSPC)
2011                                 return (ret);
2012                 }
2013         }
2014
2015         return (EZFS_POOL_INVALARG);
2016 }
2017
2018 /*
2019  * Get phys_path for a root pool config.
2020  * Return 0 on success; non-zero on failure.
2021  */
2022 static int
2023 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2024 {
2025         size_t rsz;
2026         nvlist_t *vdev_root;
2027         nvlist_t **child;
2028         uint_t count;
2029         char *type;
2030
2031         rsz = 0;
2032
2033         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2034             &vdev_root) != 0)
2035                 return (EZFS_INVALCONFIG);
2036
2037         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2038             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2039             &child, &count) != 0)
2040                 return (EZFS_INVALCONFIG);
2041
2042         /*
2043          * root pool can not have EFI labeled disks and can only have
2044          * a single top-level vdev.
2045          */
2046         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2047             pool_uses_efi(vdev_root))
2048                 return (EZFS_POOL_INVALARG);
2049
2050         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2051             B_FALSE);
2052
2053         /* No online devices */
2054         if (rsz == 0)
2055                 return (EZFS_NODEVICE);
2056
2057         return (0);
2058 }
2059
2060 /*
2061  * Get phys_path for a root pool
2062  * Return 0 on success; non-zero on failure.
2063  */
2064 int
2065 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2066 {
2067         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2068             phypath_size));
2069 }
2070
2071 /*
2072  * If the device has being dynamically expanded then we need to relabel
2073  * the disk to use the new unallocated space.
2074  */
2075 static int
2076 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
2077 {
2078         char errbuf[1024];
2079         int fd, error;
2080
2081         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2082                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2083                     "relabel '%s': unable to open device"), path);
2084                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2085         }
2086
2087         /*
2088          * It's possible that we might encounter an error if the device
2089          * does not have any unallocated space left. If so, we simply
2090          * ignore that error and continue on.
2091          */
2092         error = efi_use_whole_disk(fd);
2093         (void) close(fd);
2094         if (error && error != VT_ENOSPC) {
2095                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2096                     "relabel '%s': unable to read disk capacity"), path);
2097                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2098         }
2099         return (0);
2100 }
2101
2102 /*
2103  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2104  * ZFS_ONLINE_* flags.
2105  */
2106 int
2107 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2108     vdev_state_t *newstate)
2109 {
2110         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2111         char msg[1024];
2112         nvlist_t *tgt;
2113         boolean_t avail_spare, l2cache, islog;
2114         libzfs_handle_t *hdl = zhp->zpool_hdl;
2115
2116         if (flags & ZFS_ONLINE_EXPAND) {
2117                 (void) snprintf(msg, sizeof (msg),
2118                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2119         } else {
2120                 (void) snprintf(msg, sizeof (msg),
2121                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2122         }
2123
2124         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2125         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2126             &islog)) == NULL)
2127                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2128
2129         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2130
2131         if (avail_spare)
2132                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2133
2134         if (flags & ZFS_ONLINE_EXPAND ||
2135             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2136                 char *pathname = NULL;
2137                 uint64_t wholedisk = 0;
2138
2139                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2140                     &wholedisk);
2141                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2142                     &pathname) == 0);
2143
2144                 /*
2145                  * XXX - L2ARC 1.0 devices can't support expansion.
2146                  */
2147                 if (l2cache) {
2148                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2149                             "cannot expand cache devices"));
2150                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2151                 }
2152
2153                 if (wholedisk) {
2154                         pathname += strlen(DISK_ROOT) + 1;
2155                         (void) zpool_relabel_disk(hdl, pathname);
2156                 }
2157         }
2158
2159         zc.zc_cookie = VDEV_STATE_ONLINE;
2160         zc.zc_obj = flags;
2161
2162         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2163                 if (errno == EINVAL) {
2164                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2165                             "from this pool into a new one.  Use '%s' "
2166                             "instead"), "zpool detach");
2167                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2168                 }
2169                 return (zpool_standard_error(hdl, errno, msg));
2170         }
2171
2172         *newstate = zc.zc_cookie;
2173         return (0);
2174 }
2175
2176 /*
2177  * Take the specified vdev offline
2178  */
2179 int
2180 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2181 {
2182         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2183         char msg[1024];
2184         nvlist_t *tgt;
2185         boolean_t avail_spare, l2cache;
2186         libzfs_handle_t *hdl = zhp->zpool_hdl;
2187
2188         (void) snprintf(msg, sizeof (msg),
2189             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2190
2191         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2192         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2193             NULL)) == NULL)
2194                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2195
2196         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2197
2198         if (avail_spare)
2199                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2200
2201         zc.zc_cookie = VDEV_STATE_OFFLINE;
2202         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2203
2204         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2205                 return (0);
2206
2207         switch (errno) {
2208         case EBUSY:
2209
2210                 /*
2211                  * There are no other replicas of this device.
2212                  */
2213                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2214
2215         case EEXIST:
2216                 /*
2217                  * The log device has unplayed logs
2218                  */
2219                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2220
2221         default:
2222                 return (zpool_standard_error(hdl, errno, msg));
2223         }
2224 }
2225
2226 /*
2227  * Mark the given vdev faulted.
2228  */
2229 int
2230 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2231 {
2232         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2233         char msg[1024];
2234         libzfs_handle_t *hdl = zhp->zpool_hdl;
2235
2236         (void) snprintf(msg, sizeof (msg),
2237            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2238
2239         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2240         zc.zc_guid = guid;
2241         zc.zc_cookie = VDEV_STATE_FAULTED;
2242         zc.zc_obj = aux;
2243
2244         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2245                 return (0);
2246
2247         switch (errno) {
2248         case EBUSY:
2249
2250                 /*
2251                  * There are no other replicas of this device.
2252                  */
2253                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2254
2255         default:
2256                 return (zpool_standard_error(hdl, errno, msg));
2257         }
2258
2259 }
2260
2261 /*
2262  * Mark the given vdev degraded.
2263  */
2264 int
2265 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2266 {
2267         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2268         char msg[1024];
2269         libzfs_handle_t *hdl = zhp->zpool_hdl;
2270
2271         (void) snprintf(msg, sizeof (msg),
2272            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2273
2274         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2275         zc.zc_guid = guid;
2276         zc.zc_cookie = VDEV_STATE_DEGRADED;
2277         zc.zc_obj = aux;
2278
2279         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2280                 return (0);
2281
2282         return (zpool_standard_error(hdl, errno, msg));
2283 }
2284
2285 /*
2286  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2287  * a hot spare.
2288  */
2289 static boolean_t
2290 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2291 {
2292         nvlist_t **child;
2293         uint_t c, children;
2294         char *type;
2295
2296         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2297             &children) == 0) {
2298                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2299                     &type) == 0);
2300
2301                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2302                     children == 2 && child[which] == tgt)
2303                         return (B_TRUE);
2304
2305                 for (c = 0; c < children; c++)
2306                         if (is_replacing_spare(child[c], tgt, which))
2307                                 return (B_TRUE);
2308         }
2309
2310         return (B_FALSE);
2311 }
2312
2313 /*
2314  * Attach new_disk (fully described by nvroot) to old_disk.
2315  * If 'replacing' is specified, the new disk will replace the old one.
2316  */
2317 int
2318 zpool_vdev_attach(zpool_handle_t *zhp,
2319     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2320 {
2321         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2322         char msg[1024];
2323         int ret;
2324         nvlist_t *tgt;
2325         boolean_t avail_spare, l2cache, islog;
2326         uint64_t val;
2327         char *newname;
2328         nvlist_t **child;
2329         uint_t children;
2330         nvlist_t *config_root;
2331         libzfs_handle_t *hdl = zhp->zpool_hdl;
2332         boolean_t rootpool = pool_is_bootable(zhp);
2333
2334         if (replacing)
2335                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2336                     "cannot replace %s with %s"), old_disk, new_disk);
2337         else
2338                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2339                     "cannot attach %s to %s"), new_disk, old_disk);
2340
2341         /*
2342          * If this is a root pool, make sure that we're not attaching an
2343          * EFI labeled device.
2344          */
2345         if (rootpool && pool_uses_efi(nvroot)) {
2346                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2347                     "EFI labeled devices are not supported on root pools."));
2348                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2349         }
2350
2351         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2352         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2353             &islog)) == 0)
2354                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2355
2356         if (avail_spare)
2357                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2358
2359         if (l2cache)
2360                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2361
2362         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2363         zc.zc_cookie = replacing;
2364
2365         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2366             &child, &children) != 0 || children != 1) {
2367                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2368                     "new device must be a single disk"));
2369                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2370         }
2371
2372         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2373             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2374
2375         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2376                 return (-1);
2377
2378         /*
2379          * If the target is a hot spare that has been swapped in, we can only
2380          * replace it with another hot spare.
2381          */
2382         if (replacing &&
2383             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2384             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2385             NULL) == NULL || !avail_spare) &&
2386             is_replacing_spare(config_root, tgt, 1)) {
2387                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2388                     "can only be replaced by another hot spare"));
2389                 free(newname);
2390                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2391         }
2392
2393         free(newname);
2394
2395         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2396                 return (-1);
2397
2398         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2399
2400         zcmd_free_nvlists(&zc);
2401
2402         if (ret == 0) {
2403                 if (rootpool) {
2404                         /*
2405                          * XXX need a better way to prevent user from
2406                          * booting up a half-baked vdev.
2407                          */
2408                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2409                             "sure to wait until resilver is done "
2410                             "before rebooting.\n"));
2411                 }
2412                 return (0);
2413         }
2414
2415         switch (errno) {
2416         case ENOTSUP:
2417                 /*
2418                  * Can't attach to or replace this type of vdev.
2419                  */
2420                 if (replacing) {
2421                         uint64_t version = zpool_get_prop_int(zhp,
2422                             ZPOOL_PROP_VERSION, NULL);
2423
2424                         if (islog)
2425                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2426                                     "cannot replace a log with a spare"));
2427                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2428                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2429                                     "already in replacing/spare config; wait "
2430                                     "for completion or use 'zpool detach'"));
2431                         else
2432                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2433                                     "cannot replace a replacing device"));
2434                 } else {
2435                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2436                             "can only attach to mirrors and top-level "
2437                             "disks"));
2438                 }
2439                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2440                 break;
2441
2442         case EINVAL:
2443                 /*
2444                  * The new device must be a single disk.
2445                  */
2446                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2447                     "new device must be a single disk"));
2448                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2449                 break;
2450
2451         case EBUSY:
2452                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2453                     new_disk);
2454                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2455                 break;
2456
2457         case EOVERFLOW:
2458                 /*
2459                  * The new device is too small.
2460                  */
2461                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2462                     "device is too small"));
2463                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2464                 break;
2465
2466         case EDOM:
2467                 /*
2468                  * The new device has a different alignment requirement.
2469                  */
2470                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2471                     "devices have different sector alignment"));
2472                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2473                 break;
2474
2475         case ENAMETOOLONG:
2476                 /*
2477                  * The resulting top-level vdev spec won't fit in the label.
2478                  */
2479                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2480                 break;
2481
2482         default:
2483                 (void) zpool_standard_error(hdl, errno, msg);
2484         }
2485
2486         return (-1);
2487 }
2488
2489 /*
2490  * Detach the specified device.
2491  */
2492 int
2493 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2494 {
2495         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2496         char msg[1024];
2497         nvlist_t *tgt;
2498         boolean_t avail_spare, l2cache;
2499         libzfs_handle_t *hdl = zhp->zpool_hdl;
2500
2501         (void) snprintf(msg, sizeof (msg),
2502             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2503
2504         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2505         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2506             NULL)) == 0)
2507                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2508
2509         if (avail_spare)
2510                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2511
2512         if (l2cache)
2513                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2514
2515         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2516
2517         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2518                 return (0);
2519
2520         switch (errno) {
2521
2522         case ENOTSUP:
2523                 /*
2524                  * Can't detach from this type of vdev.
2525                  */
2526                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2527                     "applicable to mirror and replacing vdevs"));
2528                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2529                 break;
2530
2531         case EBUSY:
2532                 /*
2533                  * There are no other replicas of this device.
2534                  */
2535                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2536                 break;
2537
2538         default:
2539                 (void) zpool_standard_error(hdl, errno, msg);
2540         }
2541
2542         return (-1);
2543 }
2544
2545 /*
2546  * Find a mirror vdev in the source nvlist.
2547  *
2548  * The mchild array contains a list of disks in one of the top-level mirrors
2549  * of the source pool.  The schild array contains a list of disks that the
2550  * user specified on the command line.  We loop over the mchild array to
2551  * see if any entry in the schild array matches.
2552  *
2553  * If a disk in the mchild array is found in the schild array, we return
2554  * the index of that entry.  Otherwise we return -1.
2555  */
2556 static int
2557 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2558     nvlist_t **schild, uint_t schildren)
2559 {
2560         uint_t mc;
2561
2562         for (mc = 0; mc < mchildren; mc++) {
2563                 uint_t sc;
2564                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2565                     mchild[mc], B_FALSE);
2566
2567                 for (sc = 0; sc < schildren; sc++) {
2568                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2569                             schild[sc], B_FALSE);
2570                         boolean_t result = (strcmp(mpath, spath) == 0);
2571
2572                         free(spath);
2573                         if (result) {
2574                                 free(mpath);
2575                                 return (mc);
2576                         }
2577                 }
2578
2579                 free(mpath);
2580         }
2581
2582         return (-1);
2583 }
2584
2585 /*
2586  * Split a mirror pool.  If newroot points to null, then a new nvlist
2587  * is generated and it is the responsibility of the caller to free it.
2588  */
2589 int
2590 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2591     nvlist_t *props, splitflags_t flags)
2592 {
2593         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2594         char msg[1024];
2595         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2596         nvlist_t **varray = NULL, *zc_props = NULL;
2597         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2598         libzfs_handle_t *hdl = zhp->zpool_hdl;
2599         uint64_t vers;
2600         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2601         int retval = 0;
2602
2603         (void) snprintf(msg, sizeof (msg),
2604             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2605
2606         if (!zpool_name_valid(hdl, B_FALSE, newname))
2607                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2608
2609         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2610                 (void) fprintf(stderr, gettext("Internal error: unable to "
2611                     "retrieve pool configuration\n"));
2612                 return (-1);
2613         }
2614
2615         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2616             == 0);
2617         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2618
2619         if (props) {
2620                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2621                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2622                     props, vers, flags, msg)) == NULL)
2623                         return (-1);
2624         }
2625
2626         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2627             &children) != 0) {
2628                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2629                     "Source pool is missing vdev tree"));
2630                 if (zc_props)
2631                         nvlist_free(zc_props);
2632                 return (-1);
2633         }
2634
2635         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2636         vcount = 0;
2637
2638         if (*newroot == NULL ||
2639             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2640             &newchild, &newchildren) != 0)
2641                 newchildren = 0;
2642
2643         for (c = 0; c < children; c++) {
2644                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2645                 char *type;
2646                 nvlist_t **mchild, *vdev;
2647                 uint_t mchildren;
2648                 int entry;
2649
2650                 /*
2651                  * Unlike cache & spares, slogs are stored in the
2652                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2653                  */
2654                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2655                     &is_log);
2656                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2657                     &is_hole);
2658                 if (is_log || is_hole) {
2659                         /*
2660                          * Create a hole vdev and put it in the config.
2661                          */
2662                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2663                                 goto out;
2664                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2665                             VDEV_TYPE_HOLE) != 0)
2666                                 goto out;
2667                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2668                             1) != 0)
2669                                 goto out;
2670                         if (lastlog == 0)
2671                                 lastlog = vcount;
2672                         varray[vcount++] = vdev;
2673                         continue;
2674                 }
2675                 lastlog = 0;
2676                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2677                     == 0);
2678                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2679                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2680                             "Source pool must be composed only of mirrors\n"));
2681                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2682                         goto out;
2683                 }
2684
2685                 verify(nvlist_lookup_nvlist_array(child[c],
2686                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2687
2688                 /* find or add an entry for this top-level vdev */
2689                 if (newchildren > 0 &&
2690                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2691                     newchild, newchildren)) >= 0) {
2692                         /* We found a disk that the user specified. */
2693                         vdev = mchild[entry];
2694                         ++found;
2695                 } else {
2696                         /* User didn't specify a disk for this vdev. */
2697                         vdev = mchild[mchildren - 1];
2698                 }
2699
2700                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2701                         goto out;
2702         }
2703
2704         /* did we find every disk the user specified? */
2705         if (found != newchildren) {
2706                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2707                     "include at most one disk from each mirror"));
2708                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2709                 goto out;
2710         }
2711
2712         /* Prepare the nvlist for populating. */
2713         if (*newroot == NULL) {
2714                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2715                         goto out;
2716                 freelist = B_TRUE;
2717                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2718                     VDEV_TYPE_ROOT) != 0)
2719                         goto out;
2720         } else {
2721                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2722         }
2723
2724         /* Add all the children we found */
2725         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2726             lastlog == 0 ? vcount : lastlog) != 0)
2727                 goto out;
2728
2729         /*
2730          * If we're just doing a dry run, exit now with success.
2731          */
2732         if (flags.dryrun) {
2733                 memory_err = B_FALSE;
2734                 freelist = B_FALSE;
2735                 goto out;
2736         }
2737
2738         /* now build up the config list & call the ioctl */
2739         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
2740                 goto out;
2741
2742         if (nvlist_add_nvlist(newconfig,
2743             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
2744             nvlist_add_string(newconfig,
2745             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
2746             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
2747                 goto out;
2748
2749         /*
2750          * The new pool is automatically part of the namespace unless we
2751          * explicitly export it.
2752          */
2753         if (!flags.import)
2754                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
2755         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2756         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
2757         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
2758                 goto out;
2759         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
2760                 goto out;
2761
2762         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
2763                 retval = zpool_standard_error(hdl, errno, msg);
2764                 goto out;
2765         }
2766
2767         freelist = B_FALSE;
2768         memory_err = B_FALSE;
2769
2770 out:
2771         if (varray != NULL) {
2772                 int v;
2773
2774                 for (v = 0; v < vcount; v++)
2775                         nvlist_free(varray[v]);
2776                 free(varray);
2777         }
2778         zcmd_free_nvlists(&zc);
2779         if (zc_props)
2780                 nvlist_free(zc_props);
2781         if (newconfig)
2782                 nvlist_free(newconfig);
2783         if (freelist) {
2784                 nvlist_free(*newroot);
2785                 *newroot = NULL;
2786         }
2787
2788         if (retval != 0)
2789                 return (retval);
2790
2791         if (memory_err)
2792                 return (no_memory(hdl));
2793
2794         return (0);
2795 }
2796
2797 /*
2798  * Remove the given device.  Currently, this is supported only for hot spares
2799  * and level 2 cache devices.
2800  */
2801 int
2802 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2803 {
2804         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2805         char msg[1024];
2806         nvlist_t *tgt;
2807         boolean_t avail_spare, l2cache, islog;
2808         libzfs_handle_t *hdl = zhp->zpool_hdl;
2809         uint64_t version;
2810
2811         (void) snprintf(msg, sizeof (msg),
2812             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2813
2814         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2815         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2816             &islog)) == 0)
2817                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2818         /*
2819          * XXX - this should just go away.
2820          */
2821         if (!avail_spare && !l2cache && !islog) {
2822                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2823                     "only inactive hot spares, cache, top-level, "
2824                     "or log devices can be removed"));
2825                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2826         }
2827
2828         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
2829         if (islog && version < SPA_VERSION_HOLES) {
2830                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2831                     "pool must be upgrade to support log removal"));
2832                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
2833         }
2834
2835         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2836
2837         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2838                 return (0);
2839
2840         return (zpool_standard_error(hdl, errno, msg));
2841 }
2842
2843 /*
2844  * Clear the errors for the pool, or the particular device if specified.
2845  */
2846 int
2847 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
2848 {
2849         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2850         char msg[1024];
2851         nvlist_t *tgt;
2852         zpool_rewind_policy_t policy;
2853         boolean_t avail_spare, l2cache;
2854         libzfs_handle_t *hdl = zhp->zpool_hdl;
2855         nvlist_t *nvi = NULL;
2856         int error;
2857
2858         if (path)
2859                 (void) snprintf(msg, sizeof (msg),
2860                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2861                     path);
2862         else
2863                 (void) snprintf(msg, sizeof (msg),
2864                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2865                     zhp->zpool_name);
2866
2867         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2868         if (path) {
2869                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2870                     &l2cache, NULL)) == 0)
2871                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
2872
2873                 /*
2874                  * Don't allow error clearing for hot spares.  Do allow
2875                  * error clearing for l2cache devices.
2876                  */
2877                 if (avail_spare)
2878                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
2879
2880                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2881                     &zc.zc_guid) == 0);
2882         }
2883
2884         zpool_get_rewind_policy(rewindnvl, &policy);
2885         zc.zc_cookie = policy.zrp_request;
2886
2887         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
2888                 return (-1);
2889
2890         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
2891                 return (-1);
2892
2893         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
2894             errno == ENOMEM) {
2895                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
2896                         zcmd_free_nvlists(&zc);
2897                         return (-1);
2898                 }
2899         }
2900
2901         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
2902             errno != EPERM && errno != EACCES)) {
2903                 if (policy.zrp_request &
2904                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2905                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
2906                         zpool_rewind_exclaim(hdl, zc.zc_name,
2907                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
2908                             nvi);
2909                         nvlist_free(nvi);
2910                 }
2911                 zcmd_free_nvlists(&zc);
2912                 return (0);
2913         }
2914
2915         zcmd_free_nvlists(&zc);
2916         return (zpool_standard_error(hdl, errno, msg));
2917 }
2918
2919 /*
2920  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2921  */
2922 int
2923 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2924 {
2925         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2926         char msg[1024];
2927         libzfs_handle_t *hdl = zhp->zpool_hdl;
2928
2929         (void) snprintf(msg, sizeof (msg),
2930             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2931            (u_longlong_t)guid);
2932
2933         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2934         zc.zc_guid = guid;
2935         zc.zc_cookie = ZPOOL_NO_REWIND;
2936
2937         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2938                 return (0);
2939
2940         return (zpool_standard_error(hdl, errno, msg));
2941 }
2942
2943 /*
2944  * Convert from a devid string to a path.
2945  */
2946 static char *
2947 devid_to_path(char *devid_str)
2948 {
2949         ddi_devid_t devid;
2950         char *minor;
2951         char *path;
2952         devid_nmlist_t *list = NULL;
2953         int ret;
2954
2955         if (devid_str_decode(devid_str, &devid, &minor) != 0)
2956                 return (NULL);
2957
2958         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2959
2960         devid_str_free(minor);
2961         devid_free(devid);
2962
2963         if (ret != 0)
2964                 return (NULL);
2965
2966         if ((path = strdup(list[0].devname)) == NULL)
2967                 return (NULL);
2968
2969         devid_free_nmlist(list);
2970
2971         return (path);
2972 }
2973
2974 /*
2975  * Convert from a path to a devid string.
2976  */
2977 static char *
2978 path_to_devid(const char *path)
2979 {
2980         int fd;
2981         ddi_devid_t devid;
2982         char *minor, *ret;
2983
2984         if ((fd = open(path, O_RDONLY)) < 0)
2985                 return (NULL);
2986
2987         minor = NULL;
2988         ret = NULL;
2989         if (devid_get(fd, &devid) == 0) {
2990                 if (devid_get_minor_name(fd, &minor) == 0)
2991                         ret = devid_str_encode(devid, minor);
2992                 if (minor != NULL)
2993                         devid_str_free(minor);
2994                 devid_free(devid);
2995         }
2996         (void) close(fd);
2997
2998         return (ret);
2999 }
3000
3001 /*
3002  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3003  * ignore any failure here, since a common case is for an unprivileged user to
3004  * type 'zpool status', and we'll display the correct information anyway.
3005  */
3006 static void
3007 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3008 {
3009         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3010
3011         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3012         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3013         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3014             &zc.zc_guid) == 0);
3015
3016         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3017 }
3018
3019 /*
3020  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3021  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3022  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3023  * third case only occurs when preceded by a string matching the regular
3024  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
3025  */
3026 static char *
3027 strip_partition(libzfs_handle_t *hdl, char *path)
3028 {
3029         char *tmp = zfs_strdup(hdl, path);
3030         char *part = NULL, *d = NULL;
3031
3032         if ((part = strstr(tmp, "-part")) && part != tmp) {
3033                 d = part + 5;
3034         } else if ((part = strrchr(tmp, 'p')) &&
3035             part > tmp + 1 && isdigit(*(part-1))) {
3036                 d = part + 1;
3037         } else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
3038                 for (d = &tmp[2]; isalpha(*d); part = ++d);
3039         }
3040         if (part && d && *d != '\0') {
3041                 for (; isdigit(*d); d++);
3042                 if (*d == '\0')
3043                         *part = '\0';
3044         }
3045         return (tmp);
3046 }
3047
3048 #define PATH_BUF_LEN    64
3049
3050 /*
3051  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3052  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3053  * We also check if this is a whole disk, in which case we strip off the
3054  * trailing 's0' slice name.
3055  *
3056  * This routine is also responsible for identifying when disks have been
3057  * reconfigured in a new location.  The kernel will have opened the device by
3058  * devid, but the path will still refer to the old location.  To catch this, we
3059  * first do a path -> devid translation (which is fast for the common case).  If
3060  * the devid matches, we're done.  If not, we do a reverse devid -> path
3061  * translation and issue the appropriate ioctl() to update the path of the vdev.
3062  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3063  * of these checks.
3064  */
3065 char *
3066 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3067     boolean_t verbose)
3068 {
3069         char *path, *devid, *type;
3070         uint64_t value;
3071         char buf[PATH_BUF_LEN];
3072         vdev_stat_t *vs;
3073         uint_t vsc;
3074
3075         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3076             &value) == 0) {
3077                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3078                     &value) == 0);
3079                 (void) snprintf(buf, sizeof (buf), "%llu",
3080                     (u_longlong_t)value);
3081                 path = buf;
3082         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3083                 /*
3084                  * If the device is dead (faulted, offline, etc) then don't
3085                  * bother opening it.  Otherwise we may be forcing the user to
3086                  * open a misbehaving device, which can have undesirable
3087                  * effects.
3088                  */
3089                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3090                     (uint64_t **)&vs, &vsc) != 0 ||
3091                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3092                     zhp != NULL &&
3093                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3094                         /*
3095                          * Determine if the current path is correct.
3096                          */
3097                         char *newdevid = path_to_devid(path);
3098
3099                         if (newdevid == NULL ||
3100                             strcmp(devid, newdevid) != 0) {
3101                                 char *newpath;
3102
3103                                 if ((newpath = devid_to_path(devid)) != NULL) {
3104                                         /*
3105                                          * Update the path appropriately.
3106                                          */
3107                                         set_path(zhp, nv, newpath);
3108                                         if (nvlist_add_string(nv,
3109                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3110                                                 verify(nvlist_lookup_string(nv,
3111                                                     ZPOOL_CONFIG_PATH,
3112                                                     &path) == 0);
3113                                         free(newpath);
3114                                 }
3115                         }
3116
3117                         if (newdevid)
3118                                 devid_str_free(newdevid);
3119                 }
3120
3121                 /*
3122                  * For a block device only use the name.
3123                  */
3124                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3125                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3126                         path = strrchr(path, '/');
3127                         path++;
3128                 }
3129
3130                 /*
3131                  * Remove the partition from the path it this is a whole disk.
3132                  */
3133                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3134                     &value) == 0 && value) {
3135                         return strip_partition(hdl, path);
3136                 }
3137         } else {
3138                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3139
3140                 /*
3141                  * If it's a raidz device, we need to stick in the parity level.
3142                  */
3143                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3144                         char tmpbuf[PATH_BUF_LEN];
3145
3146                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3147                             &value) == 0);
3148                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%llu", path,
3149                             (u_longlong_t)value);
3150                         path = tmpbuf;
3151                 }
3152
3153                 /*
3154                  * We identify each top-level vdev by using a <type-id>
3155                  * naming convention.
3156                  */
3157                 if (verbose) {
3158                         uint64_t id;
3159
3160                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3161                             &id) == 0);
3162                         (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3163                             (u_longlong_t)id);
3164                         path = buf;
3165                 }
3166         }
3167
3168         return (zfs_strdup(hdl, path));
3169 }
3170
3171 static int
3172 zbookmark_compare(const void *a, const void *b)
3173 {
3174         return (memcmp(a, b, sizeof (zbookmark_t)));
3175 }
3176
3177 /*
3178  * Retrieve the persistent error log, uniquify the members, and return to the
3179  * caller.
3180  */
3181 int
3182 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3183 {
3184         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3185         uint64_t count;
3186         zbookmark_t *zb = NULL;
3187         int i;
3188
3189         /*
3190          * Retrieve the raw error list from the kernel.  If the number of errors
3191          * has increased, allocate more space and continue until we get the
3192          * entire list.
3193          */
3194         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3195             &count) == 0);
3196         if (count == 0)
3197                 return (0);
3198         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3199             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3200                 return (-1);
3201         zc.zc_nvlist_dst_size = count;
3202         (void) strcpy(zc.zc_name, zhp->zpool_name);
3203         for (;;) {
3204                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3205                     &zc) != 0) {
3206                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3207                         if (errno == ENOMEM) {
3208                                 count = zc.zc_nvlist_dst_size;
3209                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3210                                     zfs_alloc(zhp->zpool_hdl, count *
3211                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3212                                         return (-1);
3213                         } else {
3214                                 return (-1);
3215                         }
3216                 } else {
3217                         break;
3218                 }
3219         }
3220
3221         /*
3222          * Sort the resulting bookmarks.  This is a little confusing due to the
3223          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3224          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3225          * _not_ copied as part of the process.  So we point the start of our
3226          * array appropriate and decrement the total number of elements.
3227          */
3228         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3229             zc.zc_nvlist_dst_size;
3230         count -= zc.zc_nvlist_dst_size;
3231
3232         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3233
3234         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3235
3236         /*
3237          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3238          */
3239         for (i = 0; i < count; i++) {
3240                 nvlist_t *nv;
3241
3242                 /* ignoring zb_blkid and zb_level for now */
3243                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3244                     zb[i-1].zb_object == zb[i].zb_object)
3245                         continue;
3246
3247                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3248                         goto nomem;
3249                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3250                     zb[i].zb_objset) != 0) {
3251                         nvlist_free(nv);
3252                         goto nomem;
3253                 }
3254                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3255                     zb[i].zb_object) != 0) {
3256                         nvlist_free(nv);
3257                         goto nomem;
3258                 }
3259                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3260                         nvlist_free(nv);
3261                         goto nomem;
3262                 }
3263                 nvlist_free(nv);
3264         }
3265
3266         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3267         return (0);
3268
3269 nomem:
3270         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3271         return (no_memory(zhp->zpool_hdl));
3272 }
3273
3274 /*
3275  * Upgrade a ZFS pool to the latest on-disk version.
3276  */
3277 int
3278 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3279 {
3280         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3281         libzfs_handle_t *hdl = zhp->zpool_hdl;
3282
3283         (void) strcpy(zc.zc_name, zhp->zpool_name);
3284         zc.zc_cookie = new_version;
3285
3286         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3287                 return (zpool_standard_error_fmt(hdl, errno,
3288                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3289                     zhp->zpool_name));
3290         return (0);
3291 }
3292
3293 void
3294 zpool_set_history_str(const char *subcommand, int argc, char **argv,
3295     char *history_str)
3296 {
3297         int i;
3298
3299         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3300         for (i = 1; i < argc; i++) {
3301                 if (strlen(history_str) + 1 + strlen(argv[i]) >
3302                     HIS_MAX_RECORD_LEN)
3303                         break;
3304                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3305                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3306         }
3307 }
3308
3309 /*
3310  * Stage command history for logging.
3311  */
3312 int
3313 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3314 {
3315         if (history_str == NULL)
3316                 return (EINVAL);
3317
3318         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3319                 return (EINVAL);
3320
3321         if (hdl->libzfs_log_str != NULL)
3322                 free(hdl->libzfs_log_str);
3323
3324         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3325                 return (no_memory(hdl));
3326
3327         return (0);
3328 }
3329
3330 /*
3331  * Perform ioctl to get some command history of a pool.
3332  *
3333  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3334  * logical offset of the history buffer to start reading from.
3335  *
3336  * Upon return, 'off' is the next logical offset to read from and
3337  * 'len' is the actual amount of bytes read into 'buf'.
3338  */
3339 static int
3340 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3341 {
3342         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3343         libzfs_handle_t *hdl = zhp->zpool_hdl;
3344
3345         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3346
3347         zc.zc_history = (uint64_t)(uintptr_t)buf;
3348         zc.zc_history_len = *len;
3349         zc.zc_history_offset = *off;
3350
3351         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3352                 switch (errno) {
3353                 case EPERM:
3354                         return (zfs_error_fmt(hdl, EZFS_PERM,
3355                             dgettext(TEXT_DOMAIN,
3356                             "cannot show history for pool '%s'"),
3357                             zhp->zpool_name));
3358                 case ENOENT:
3359                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3360                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3361                             "'%s'"), zhp->zpool_name));
3362                 case ENOTSUP:
3363                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3364                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3365                             "'%s', pool must be upgraded"), zhp->zpool_name));
3366                 default:
3367                         return (zpool_standard_error_fmt(hdl, errno,
3368                             dgettext(TEXT_DOMAIN,
3369                             "cannot get history for '%s'"), zhp->zpool_name));
3370                 }
3371         }
3372
3373         *len = zc.zc_history_len;
3374         *off = zc.zc_history_offset;
3375
3376         return (0);
3377 }
3378
3379 /*
3380  * Process the buffer of nvlists, unpacking and storing each nvlist record
3381  * into 'records'.  'leftover' is set to the number of bytes that weren't
3382  * processed as there wasn't a complete record.
3383  */
3384 int
3385 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3386     nvlist_t ***records, uint_t *numrecords)
3387 {
3388         uint64_t reclen;
3389         nvlist_t *nv;
3390         int i;
3391
3392         while (bytes_read > sizeof (reclen)) {
3393
3394                 /* get length of packed record (stored as little endian) */
3395                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3396                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3397
3398                 if (bytes_read < sizeof (reclen) + reclen)
3399                         break;
3400
3401                 /* unpack record */
3402                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3403                         return (ENOMEM);
3404                 bytes_read -= sizeof (reclen) + reclen;
3405                 buf += sizeof (reclen) + reclen;
3406
3407                 /* add record to nvlist array */
3408                 (*numrecords)++;
3409                 if (ISP2(*numrecords + 1)) {
3410                         *records = realloc(*records,
3411                             *numrecords * 2 * sizeof (nvlist_t *));
3412                 }
3413                 (*records)[*numrecords - 1] = nv;
3414         }
3415
3416         *leftover = bytes_read;
3417         return (0);
3418 }
3419
3420 #define HIS_BUF_LEN     (128*1024)
3421
3422 /*
3423  * Retrieve the command history of a pool.
3424  */
3425 int
3426 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3427 {
3428         char buf[HIS_BUF_LEN];
3429         uint64_t off = 0;
3430         nvlist_t **records = NULL;
3431         uint_t numrecords = 0;
3432         int err, i;
3433
3434         do {
3435                 uint64_t bytes_read = sizeof (buf);
3436                 uint64_t leftover;
3437
3438                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3439                         break;
3440
3441                 /* if nothing else was read in, we're at EOF, just return */
3442                 if (!bytes_read)
3443                         break;
3444
3445                 if ((err = zpool_history_unpack(buf, bytes_read,
3446                     &leftover, &records, &numrecords)) != 0)
3447                         break;
3448                 off -= leftover;
3449
3450                 /* CONSTCOND */
3451         } while (1);
3452
3453         if (!err) {
3454                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3455                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3456                     records, numrecords) == 0);
3457         }
3458         for (i = 0; i < numrecords; i++)
3459                 nvlist_free(records[i]);
3460         free(records);
3461
3462         return (err);
3463 }
3464
3465 /*
3466  * Retrieve the next event.  If there is a new event available 'nvp' will
3467  * contain a newly allocated nvlist and 'dropped' will be set to the number
3468  * of missed events since the last call to this function.  When 'nvp' is
3469  * set to NULL it indicates no new events are available.  In either case
3470  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3471  * the case of a fatal error the function will return a non-zero value.
3472  * When the function is called in blocking mode it will not return until
3473  * a new event is available.
3474  */
3475 int
3476 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3477     int *dropped, int block, int cleanup_fd)
3478 {
3479         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3480         int error = 0;
3481
3482         *nvp = NULL;
3483         *dropped = 0;
3484         zc.zc_cleanup_fd = cleanup_fd;
3485
3486         if (!block)
3487                 zc.zc_guid = ZEVENT_NONBLOCK;
3488
3489         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3490                 return (-1);
3491
3492 retry:
3493         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3494                 switch (errno) {
3495                 case ESHUTDOWN:
3496                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3497                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3498                         goto out;
3499                 case ENOENT:
3500                         /* Blocking error case should not occur */
3501                         if (block)
3502                                 error = zpool_standard_error_fmt(hdl, errno,
3503                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3504
3505                         goto out;
3506                 case ENOMEM:
3507                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3508                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3509                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3510                                 goto out;
3511                         } else {
3512                                 goto retry;
3513                         }
3514                 default:
3515                         error = zpool_standard_error_fmt(hdl, errno,
3516                             dgettext(TEXT_DOMAIN, "cannot get event"));
3517                         goto out;
3518                 }
3519         }
3520
3521         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3522         if (error != 0)
3523                 goto out;
3524
3525         *dropped = (int)zc.zc_cookie;
3526 out:
3527         zcmd_free_nvlists(&zc);
3528
3529         return (error);
3530 }
3531
3532 /*
3533  * Clear all events.
3534  */
3535 int
3536 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3537 {
3538         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3539         char msg[1024];
3540
3541         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3542             "cannot clear events"));
3543
3544         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3545                 return (zpool_standard_error_fmt(hdl, errno, msg));
3546
3547         if (count != NULL)
3548                 *count = (int)zc.zc_cookie; /* # of events cleared */
3549
3550         return (0);
3551 }
3552
3553 void
3554 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3555     char *pathname, size_t len)
3556 {
3557         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3558         boolean_t mounted = B_FALSE;
3559         char *mntpnt = NULL;
3560         char dsname[MAXNAMELEN];
3561
3562         if (dsobj == 0) {
3563                 /* special case for the MOS */
3564                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
3565                 return;
3566         }
3567
3568         /* get the dataset's name */
3569         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3570         zc.zc_obj = dsobj;
3571         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3572             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3573                 /* just write out a path of two object numbers */
3574                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3575                     (longlong_t)dsobj, (longlong_t)obj);
3576                 return;
3577         }
3578         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3579
3580         /* find out if the dataset is mounted */
3581         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3582
3583         /* get the corrupted object's path */
3584         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3585         zc.zc_obj = obj;
3586         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3587             &zc) == 0) {
3588                 if (mounted) {
3589                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3590                             zc.zc_value);
3591                 } else {
3592                         (void) snprintf(pathname, len, "%s:%s",
3593                             dsname, zc.zc_value);
3594                 }
3595         } else {
3596                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
3597         }
3598         free(mntpnt);
3599 }
3600
3601 /*
3602  * Read the EFI label from the config, if a label does not exist then
3603  * pass back the error to the caller. If the caller has passed a non-NULL
3604  * diskaddr argument then we set it to the starting address of the EFI
3605  * partition.
3606  */
3607 static int
3608 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3609 {
3610         char *path;
3611         int fd;
3612         char diskname[MAXPATHLEN];
3613         int err = -1;
3614
3615         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3616                 return (err);
3617
3618         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3619             strrchr(path, '/'));
3620         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3621                 struct dk_gpt *vtoc;
3622
3623                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3624                         if (sb != NULL)
3625                                 *sb = vtoc->efi_parts[0].p_start;
3626                         efi_free(vtoc);
3627                 }
3628                 (void) close(fd);
3629         }
3630         return (err);
3631 }
3632
3633 /*
3634  * determine where a partition starts on a disk in the current
3635  * configuration
3636  */
3637 static diskaddr_t
3638 find_start_block(nvlist_t *config)
3639 {
3640         nvlist_t **child;
3641         uint_t c, children;
3642         diskaddr_t sb = MAXOFFSET_T;
3643         uint64_t wholedisk;
3644
3645         if (nvlist_lookup_nvlist_array(config,
3646             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3647                 if (nvlist_lookup_uint64(config,
3648                     ZPOOL_CONFIG_WHOLE_DISK,
3649                     &wholedisk) != 0 || !wholedisk) {
3650                         return (MAXOFFSET_T);
3651                 }
3652                 if (read_efi_label(config, &sb) < 0)
3653                         sb = MAXOFFSET_T;
3654                 return (sb);
3655         }
3656
3657         for (c = 0; c < children; c++) {
3658                 sb = find_start_block(child[c]);
3659                 if (sb != MAXOFFSET_T) {
3660                         return (sb);
3661                 }
3662         }
3663         return (MAXOFFSET_T);
3664 }
3665
3666 int
3667 zpool_label_disk_wait(char *path, int timeout)
3668 {
3669         struct stat64 statbuf;
3670         int i;
3671
3672         /*
3673          * Wait timeout miliseconds for a newly created device to be available
3674          * from the given path.  There is a small window when a /dev/ device
3675          * will exist and the udev link will not, so we must wait for the
3676          * symlink.  Depending on the udev rules this may take a few seconds.
3677          */
3678         for (i = 0; i < timeout; i++) {
3679                 usleep(1000);
3680
3681                 errno = 0;
3682                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
3683                         return (0);
3684         }
3685
3686         return (ENOENT);
3687 }
3688
3689 int
3690 zpool_label_disk_check(char *path)
3691 {
3692         struct dk_gpt *vtoc;
3693         int fd, err;
3694
3695         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
3696                 return errno;
3697
3698         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
3699                 (void) close(fd);
3700                 return err;
3701         }
3702
3703         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
3704                 efi_free(vtoc);
3705                 (void) close(fd);
3706                 return EIDRM;
3707         }
3708
3709         efi_free(vtoc);
3710         (void) close(fd);
3711         return 0;
3712 }
3713
3714 /*
3715  * Label an individual disk.  The name provided is the short name,
3716  * stripped of any leading /dev path.
3717  */
3718 int
3719 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
3720 {
3721         char path[MAXPATHLEN];
3722         struct dk_gpt *vtoc;
3723         int rval, fd;
3724         size_t resv = EFI_MIN_RESV_SIZE;
3725         uint64_t slice_size;
3726         diskaddr_t start_block;
3727         char errbuf[1024];
3728
3729         /* prepare an error message just in case */
3730         (void) snprintf(errbuf, sizeof (errbuf),
3731             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3732
3733         if (zhp) {
3734                 nvlist_t *nvroot;
3735
3736                 if (pool_is_bootable(zhp)) {
3737                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3738                             "EFI labeled devices are not supported on root "
3739                             "pools."));
3740                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
3741                 }
3742
3743                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
3744                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
3745
3746                 if (zhp->zpool_start_block == 0)
3747                         start_block = find_start_block(nvroot);
3748                 else
3749                         start_block = zhp->zpool_start_block;
3750                 zhp->zpool_start_block = start_block;
3751         } else {
3752                 /* new pool */
3753                 start_block = NEW_START_BLOCK;
3754         }
3755
3756         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
3757             BACKUP_SLICE);
3758
3759         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
3760                 /*
3761                  * This shouldn't happen.  We've long since verified that this
3762                  * is a valid device.
3763                  */
3764                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3765                     "unable to open device '%s': %d"), path, errno);
3766                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
3767         }
3768
3769         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
3770                 /*
3771                  * The only way this can fail is if we run out of memory, or we
3772                  * were unable to read the disk's capacity
3773                  */
3774                 if (errno == ENOMEM)
3775                         (void) no_memory(hdl);
3776
3777                 (void) close(fd);
3778                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3779                     "unable to read disk capacity"), name);
3780
3781                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
3782         }
3783
3784         slice_size = vtoc->efi_last_u_lba + 1;
3785         slice_size -= EFI_MIN_RESV_SIZE;
3786         if (start_block == MAXOFFSET_T)
3787                 start_block = NEW_START_BLOCK;
3788         slice_size -= start_block;
3789
3790         vtoc->efi_parts[0].p_start = start_block;
3791         vtoc->efi_parts[0].p_size = slice_size;
3792
3793         /*
3794          * Why we use V_USR: V_BACKUP confuses users, and is considered
3795          * disposable by some EFI utilities (since EFI doesn't have a backup
3796          * slice).  V_UNASSIGNED is supposed to be used only for zero size
3797          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
3798          * etc. were all pretty specific.  V_USR is as close to reality as we
3799          * can get, in the absence of V_OTHER.
3800          */
3801         vtoc->efi_parts[0].p_tag = V_USR;
3802         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
3803
3804         vtoc->efi_parts[8].p_start = slice_size + start_block;
3805         vtoc->efi_parts[8].p_size = resv;
3806         vtoc->efi_parts[8].p_tag = V_RESERVED;
3807
3808         if ((rval = efi_write(fd, vtoc)) != 0) {
3809                 /*
3810                  * Some block drivers (like pcata) may not support EFI
3811                  * GPT labels.  Print out a helpful error message dir-
3812                  * ecting the user to manually label the disk and give
3813                  * a specific slice.
3814                  */
3815                 (void) close(fd);
3816                 efi_free(vtoc);
3817
3818                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
3819                     "parted(8) and then provide a specific slice: %d"), rval);
3820                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3821         }
3822
3823         (void) close(fd);
3824         efi_free(vtoc);
3825
3826         /* Wait for the first expected slice to appear. */
3827         (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
3828             isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
3829         rval = zpool_label_disk_wait(path, 3000);
3830         if (rval) {
3831                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
3832                     "detect device partitions on '%s': %d"), path, rval);
3833                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3834         }
3835
3836         /* We can't be to paranoid.  Read the label back and verify it. */
3837         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
3838         rval = zpool_label_disk_check(path);
3839         if (rval) {
3840                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
3841                     "EFI label on '%s' is damaged.  Ensure\nthis device "
3842                     "is not in in use, and is functioning properly: %d"),
3843                     path, rval);
3844                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3845         }
3846
3847         return 0;
3848 }