Limit maximum ashift value to 12
[zfs.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <ctype.h>
27 #include <errno.h>
28 #include <devid.h>
29 #include <fcntl.h>
30 #include <libintl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <sys/stat.h>
37 #include <sys/efi_partition.h>
38 #include <sys/vtoc.h>
39 #include <sys/zfs_ioctl.h>
40 #include <dlfcn.h>
41
42 #include "zfs_namecheck.h"
43 #include "zfs_prop.h"
44 #include "libzfs_impl.h"
45 #include "zfs_comutil.h"
46
47 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
48
49 typedef struct prop_flags {
50         int create:1;   /* Validate property on creation */
51         int import:1;   /* Validate property on import */
52 } prop_flags_t;
53
54 /*
55  * ====================================================================
56  *   zpool property functions
57  * ====================================================================
58  */
59
60 static int
61 zpool_get_all_props(zpool_handle_t *zhp)
62 {
63         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
64         libzfs_handle_t *hdl = zhp->zpool_hdl;
65
66         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
67
68         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
69                 return (-1);
70
71         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
72                 if (errno == ENOMEM) {
73                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
74                                 zcmd_free_nvlists(&zc);
75                                 return (-1);
76                         }
77                 } else {
78                         zcmd_free_nvlists(&zc);
79                         return (-1);
80                 }
81         }
82
83         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
84                 zcmd_free_nvlists(&zc);
85                 return (-1);
86         }
87
88         zcmd_free_nvlists(&zc);
89
90         return (0);
91 }
92
93 static int
94 zpool_props_refresh(zpool_handle_t *zhp)
95 {
96         nvlist_t *old_props;
97
98         old_props = zhp->zpool_props;
99
100         if (zpool_get_all_props(zhp) != 0)
101                 return (-1);
102
103         nvlist_free(old_props);
104         return (0);
105 }
106
107 static char *
108 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
109     zprop_source_t *src)
110 {
111         nvlist_t *nv, *nvl;
112         uint64_t ival;
113         char *value;
114         zprop_source_t source;
115
116         nvl = zhp->zpool_props;
117         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
118                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
119                 source = ival;
120                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
121         } else {
122                 source = ZPROP_SRC_DEFAULT;
123                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
124                         value = "-";
125         }
126
127         if (src)
128                 *src = source;
129
130         return (value);
131 }
132
133 uint64_t
134 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
135 {
136         nvlist_t *nv, *nvl;
137         uint64_t value;
138         zprop_source_t source;
139
140         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
141                 /*
142                  * zpool_get_all_props() has most likely failed because
143                  * the pool is faulted, but if all we need is the top level
144                  * vdev's guid then get it from the zhp config nvlist.
145                  */
146                 if ((prop == ZPOOL_PROP_GUID) &&
147                     (nvlist_lookup_nvlist(zhp->zpool_config,
148                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
149                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
150                     == 0)) {
151                         return (value);
152                 }
153                 return (zpool_prop_default_numeric(prop));
154         }
155
156         nvl = zhp->zpool_props;
157         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
158                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
159                 source = value;
160                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
161         } else {
162                 source = ZPROP_SRC_DEFAULT;
163                 value = zpool_prop_default_numeric(prop);
164         }
165
166         if (src)
167                 *src = source;
168
169         return (value);
170 }
171
172 /*
173  * Map VDEV STATE to printed strings.
174  */
175 char *
176 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
177 {
178         switch (state) {
179         default:
180                 break;
181         case VDEV_STATE_CLOSED:
182         case VDEV_STATE_OFFLINE:
183                 return (gettext("OFFLINE"));
184         case VDEV_STATE_REMOVED:
185                 return (gettext("REMOVED"));
186         case VDEV_STATE_CANT_OPEN:
187                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
188                         return (gettext("FAULTED"));
189                 else if (aux == VDEV_AUX_SPLIT_POOL)
190                         return (gettext("SPLIT"));
191                 else
192                         return (gettext("UNAVAIL"));
193         case VDEV_STATE_FAULTED:
194                 return (gettext("FAULTED"));
195         case VDEV_STATE_DEGRADED:
196                 return (gettext("DEGRADED"));
197         case VDEV_STATE_HEALTHY:
198                 return (gettext("ONLINE"));
199         }
200
201         return (gettext("UNKNOWN"));
202 }
203
204 /*
205  * Get a zpool property value for 'prop' and return the value in
206  * a pre-allocated buffer.
207  */
208 int
209 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
210     zprop_source_t *srctype)
211 {
212         uint64_t intval;
213         const char *strval;
214         zprop_source_t src = ZPROP_SRC_NONE;
215         nvlist_t *nvroot;
216         vdev_stat_t *vs;
217         uint_t vsc;
218
219         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
220                 switch (prop) {
221                 case ZPOOL_PROP_NAME:
222                         (void) strlcpy(buf, zpool_get_name(zhp), len);
223                         break;
224
225                 case ZPOOL_PROP_HEALTH:
226                         (void) strlcpy(buf, "FAULTED", len);
227                         break;
228
229                 case ZPOOL_PROP_GUID:
230                         intval = zpool_get_prop_int(zhp, prop, &src);
231                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
232                         break;
233
234                 case ZPOOL_PROP_ALTROOT:
235                 case ZPOOL_PROP_CACHEFILE:
236                         if (zhp->zpool_props != NULL ||
237                             zpool_get_all_props(zhp) == 0) {
238                                 (void) strlcpy(buf,
239                                     zpool_get_prop_string(zhp, prop, &src),
240                                     len);
241                                 if (srctype != NULL)
242                                         *srctype = src;
243                                 return (0);
244                         }
245                         /* FALLTHROUGH */
246                 default:
247                         (void) strlcpy(buf, "-", len);
248                         break;
249                 }
250
251                 if (srctype != NULL)
252                         *srctype = src;
253                 return (0);
254         }
255
256         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
257             prop != ZPOOL_PROP_NAME)
258                 return (-1);
259
260         switch (zpool_prop_get_type(prop)) {
261         case PROP_TYPE_STRING:
262                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
263                     len);
264                 break;
265
266         case PROP_TYPE_NUMBER:
267                 intval = zpool_get_prop_int(zhp, prop, &src);
268
269                 switch (prop) {
270                 case ZPOOL_PROP_SIZE:
271                 case ZPOOL_PROP_ALLOCATED:
272                 case ZPOOL_PROP_FREE:
273                 case ZPOOL_PROP_ASHIFT:
274                         (void) zfs_nicenum(intval, buf, len);
275                         break;
276
277                 case ZPOOL_PROP_CAPACITY:
278                         (void) snprintf(buf, len, "%llu%%",
279                             (u_longlong_t)intval);
280                         break;
281
282                 case ZPOOL_PROP_DEDUPRATIO:
283                         (void) snprintf(buf, len, "%llu.%02llux",
284                             (u_longlong_t)(intval / 100),
285                             (u_longlong_t)(intval % 100));
286                         break;
287
288                 case ZPOOL_PROP_HEALTH:
289                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
290                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
291                         verify(nvlist_lookup_uint64_array(nvroot,
292                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
293                             == 0);
294
295                         (void) strlcpy(buf, zpool_state_to_name(intval,
296                             vs->vs_aux), len);
297                         break;
298                 default:
299                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
300                 }
301                 break;
302
303         case PROP_TYPE_INDEX:
304                 intval = zpool_get_prop_int(zhp, prop, &src);
305                 if (zpool_prop_index_to_string(prop, intval, &strval)
306                     != 0)
307                         return (-1);
308                 (void) strlcpy(buf, strval, len);
309                 break;
310
311         default:
312                 abort();
313         }
314
315         if (srctype)
316                 *srctype = src;
317
318         return (0);
319 }
320
321 /*
322  * Check if the bootfs name has the same pool name as it is set to.
323  * Assuming bootfs is a valid dataset name.
324  */
325 static boolean_t
326 bootfs_name_valid(const char *pool, char *bootfs)
327 {
328         int len = strlen(pool);
329
330         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
331                 return (B_FALSE);
332
333         if (strncmp(pool, bootfs, len) == 0 &&
334             (bootfs[len] == '/' || bootfs[len] == '\0'))
335                 return (B_TRUE);
336
337         return (B_FALSE);
338 }
339
340 /*
341  * Inspect the configuration to determine if any of the devices contain
342  * an EFI label.
343  */
344 static boolean_t
345 pool_uses_efi(nvlist_t *config)
346 {
347         nvlist_t **child;
348         uint_t c, children;
349
350         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
351             &child, &children) != 0)
352                 return (read_efi_label(config, NULL) >= 0);
353
354         for (c = 0; c < children; c++) {
355                 if (pool_uses_efi(child[c]))
356                         return (B_TRUE);
357         }
358         return (B_FALSE);
359 }
360
361 static boolean_t
362 pool_is_bootable(zpool_handle_t *zhp)
363 {
364         char bootfs[ZPOOL_MAXNAMELEN];
365
366         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
367             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
368             sizeof (bootfs)) != 0);
369 }
370
371
372 /*
373  * Given an nvlist of zpool properties to be set, validate that they are
374  * correct, and parse any numeric properties (index, boolean, etc) if they are
375  * specified as strings.
376  */
377 static nvlist_t *
378 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
379     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
380 {
381         nvpair_t *elem;
382         nvlist_t *retprops;
383         zpool_prop_t prop;
384         char *strval;
385         uint64_t intval;
386         char *slash;
387         struct stat64 statbuf;
388         zpool_handle_t *zhp;
389         nvlist_t *nvroot;
390
391         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
392                 (void) no_memory(hdl);
393                 return (NULL);
394         }
395
396         elem = NULL;
397         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
398                 const char *propname = nvpair_name(elem);
399
400                 /*
401                  * Make sure this property is valid and applies to this type.
402                  */
403                 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
404                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
405                             "invalid property '%s'"), propname);
406                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
407                         goto error;
408                 }
409
410                 if (zpool_prop_readonly(prop)) {
411                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
412                             "is readonly"), propname);
413                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
414                         goto error;
415                 }
416
417                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
418                     &strval, &intval, errbuf) != 0)
419                         goto error;
420
421                 /*
422                  * Perform additional checking for specific properties.
423                  */
424                 switch (prop) {
425                 default:
426                         break;
427                 case ZPOOL_PROP_VERSION:
428                         if (intval < version || intval > SPA_VERSION) {
429                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
430                                     "property '%s' number %d is invalid."),
431                                     propname, intval);
432                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
433                                 goto error;
434                         }
435                         break;
436
437                 case ZPOOL_PROP_ASHIFT:
438                         if (!flags.create) {
439                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
440                                     "property '%s' can only be set at "
441                                     "creation time"), propname);
442                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
443                                 goto error;
444                         }
445
446                         if (intval != 0 && (intval < 9 || intval > 12)) {
447                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
448                                     "property '%s' number %d is invalid."),
449                                     propname, intval);
450                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
451                                 goto error;
452                         }
453                         break;
454
455                 case ZPOOL_PROP_BOOTFS:
456                         if (flags.create || flags.import) {
457                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
458                                     "property '%s' cannot be set at creation "
459                                     "or import time"), propname);
460                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
461                                 goto error;
462                         }
463
464                         if (version < SPA_VERSION_BOOTFS) {
465                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
466                                     "pool must be upgraded to support "
467                                     "'%s' property"), propname);
468                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
469                                 goto error;
470                         }
471
472                         /*
473                          * bootfs property value has to be a dataset name and
474                          * the dataset has to be in the same pool as it sets to.
475                          */
476                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
477                             strval)) {
478                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
479                                     "is an invalid name"), strval);
480                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
481                                 goto error;
482                         }
483
484                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
485                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
486                                     "could not open pool '%s'"), poolname);
487                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
488                                 goto error;
489                         }
490                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
491                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
492
493                         /*
494                          * bootfs property cannot be set on a disk which has
495                          * been EFI labeled.
496                          */
497                         if (pool_uses_efi(nvroot)) {
498                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
499                                     "property '%s' not supported on "
500                                     "EFI labeled devices"), propname);
501                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
502                                 zpool_close(zhp);
503                                 goto error;
504                         }
505                         zpool_close(zhp);
506                         break;
507
508                 case ZPOOL_PROP_ALTROOT:
509                         if (!flags.create && !flags.import) {
510                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
511                                     "property '%s' can only be set during pool "
512                                     "creation or import"), propname);
513                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
514                                 goto error;
515                         }
516
517                         if (strval[0] != '/') {
518                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
519                                     "bad alternate root '%s'"), strval);
520                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
521                                 goto error;
522                         }
523                         break;
524
525                 case ZPOOL_PROP_CACHEFILE:
526                         if (strval[0] == '\0')
527                                 break;
528
529                         if (strcmp(strval, "none") == 0)
530                                 break;
531
532                         if (strval[0] != '/') {
533                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
534                                     "property '%s' must be empty, an "
535                                     "absolute path, or 'none'"), propname);
536                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
537                                 goto error;
538                         }
539
540                         slash = strrchr(strval, '/');
541
542                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
543                             strcmp(slash, "/..") == 0) {
544                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
545                                     "'%s' is not a valid file"), strval);
546                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
547                                 goto error;
548                         }
549
550                         *slash = '\0';
551
552                         if (strval[0] != '\0' &&
553                             (stat64(strval, &statbuf) != 0 ||
554                             !S_ISDIR(statbuf.st_mode))) {
555                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
556                                     "'%s' is not a valid directory"),
557                                     strval);
558                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
559                                 goto error;
560                         }
561
562                         *slash = '/';
563                         break;
564
565                 case ZPOOL_PROP_READONLY:
566                         if (!flags.import) {
567                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
568                                     "property '%s' can only be set at "
569                                     "import time"), propname);
570                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
571                                 goto error;
572                         }
573                         break;
574                 }
575         }
576
577         return (retprops);
578 error:
579         nvlist_free(retprops);
580         return (NULL);
581 }
582
583 /*
584  * Set zpool property : propname=propval.
585  */
586 int
587 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
588 {
589         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
590         int ret = -1;
591         char errbuf[1024];
592         nvlist_t *nvl = NULL;
593         nvlist_t *realprops;
594         uint64_t version;
595         prop_flags_t flags = { 0 };
596
597         (void) snprintf(errbuf, sizeof (errbuf),
598             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
599             zhp->zpool_name);
600
601         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
602                 return (no_memory(zhp->zpool_hdl));
603
604         if (nvlist_add_string(nvl, propname, propval) != 0) {
605                 nvlist_free(nvl);
606                 return (no_memory(zhp->zpool_hdl));
607         }
608
609         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
610         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
611             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
612                 nvlist_free(nvl);
613                 return (-1);
614         }
615
616         nvlist_free(nvl);
617         nvl = realprops;
618
619         /*
620          * Execute the corresponding ioctl() to set this property.
621          */
622         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
623
624         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
625                 nvlist_free(nvl);
626                 return (-1);
627         }
628
629         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
630
631         zcmd_free_nvlists(&zc);
632         nvlist_free(nvl);
633
634         if (ret)
635                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
636         else
637                 (void) zpool_props_refresh(zhp);
638
639         return (ret);
640 }
641
642 int
643 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
644 {
645         libzfs_handle_t *hdl = zhp->zpool_hdl;
646         zprop_list_t *entry;
647         char buf[ZFS_MAXPROPLEN];
648
649         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
650                 return (-1);
651
652         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
653
654                 if (entry->pl_fixed)
655                         continue;
656
657                 if (entry->pl_prop != ZPROP_INVAL &&
658                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
659                     NULL) == 0) {
660                         if (strlen(buf) > entry->pl_width)
661                                 entry->pl_width = strlen(buf);
662                 }
663         }
664
665         return (0);
666 }
667
668
669 /*
670  * Don't start the slice at the default block of 34; many storage
671  * devices will use a stripe width of 128k, other vendors prefer a 1m
672  * alignment.  It is best to play it safe and ensure a 1m alignment
673  * give 512b blocks.  When the block size is larger by a power of 2
674  * we will still be 1m aligned.
675  */
676 #define NEW_START_BLOCK 2048
677
678 /*
679  * Validate the given pool name, optionally putting an extended error message in
680  * 'buf'.
681  */
682 boolean_t
683 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
684 {
685         namecheck_err_t why;
686         char what;
687         int ret;
688
689         ret = pool_namecheck(pool, &why, &what);
690
691         /*
692          * The rules for reserved pool names were extended at a later point.
693          * But we need to support users with existing pools that may now be
694          * invalid.  So we only check for this expanded set of names during a
695          * create (or import), and only in userland.
696          */
697         if (ret == 0 && !isopen &&
698             (strncmp(pool, "mirror", 6) == 0 ||
699             strncmp(pool, "raidz", 5) == 0 ||
700             strncmp(pool, "spare", 5) == 0 ||
701             strcmp(pool, "log") == 0)) {
702                 if (hdl != NULL)
703                         zfs_error_aux(hdl,
704                             dgettext(TEXT_DOMAIN, "name is reserved"));
705                 return (B_FALSE);
706         }
707
708
709         if (ret != 0) {
710                 if (hdl != NULL) {
711                         switch (why) {
712                         case NAME_ERR_TOOLONG:
713                                 zfs_error_aux(hdl,
714                                     dgettext(TEXT_DOMAIN, "name is too long"));
715                                 break;
716
717                         case NAME_ERR_INVALCHAR:
718                                 zfs_error_aux(hdl,
719                                     dgettext(TEXT_DOMAIN, "invalid character "
720                                     "'%c' in pool name"), what);
721                                 break;
722
723                         case NAME_ERR_NOLETTER:
724                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
725                                     "name must begin with a letter"));
726                                 break;
727
728                         case NAME_ERR_RESERVED:
729                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
730                                     "name is reserved"));
731                                 break;
732
733                         case NAME_ERR_DISKLIKE:
734                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
735                                     "pool name is reserved"));
736                                 break;
737
738                         case NAME_ERR_LEADING_SLASH:
739                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
740                                     "leading slash in name"));
741                                 break;
742
743                         case NAME_ERR_EMPTY_COMPONENT:
744                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
745                                     "empty component in name"));
746                                 break;
747
748                         case NAME_ERR_TRAILING_SLASH:
749                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
750                                     "trailing slash in name"));
751                                 break;
752
753                         case NAME_ERR_MULTIPLE_AT:
754                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
755                                     "multiple '@' delimiters in name"));
756                                 break;
757                         case NAME_ERR_NO_AT:
758                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
759                                     "permission set is missing '@'"));
760                                 break;
761                         }
762                 }
763                 return (B_FALSE);
764         }
765
766         return (B_TRUE);
767 }
768
769 /*
770  * Open a handle to the given pool, even if the pool is currently in the FAULTED
771  * state.
772  */
773 zpool_handle_t *
774 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
775 {
776         zpool_handle_t *zhp;
777         boolean_t missing;
778
779         /*
780          * Make sure the pool name is valid.
781          */
782         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
783                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
784                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
785                     pool);
786                 return (NULL);
787         }
788
789         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
790                 return (NULL);
791
792         zhp->zpool_hdl = hdl;
793         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
794
795         if (zpool_refresh_stats(zhp, &missing) != 0) {
796                 zpool_close(zhp);
797                 return (NULL);
798         }
799
800         if (missing) {
801                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
802                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
803                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
804                 zpool_close(zhp);
805                 return (NULL);
806         }
807
808         return (zhp);
809 }
810
811 /*
812  * Like the above, but silent on error.  Used when iterating over pools (because
813  * the configuration cache may be out of date).
814  */
815 int
816 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
817 {
818         zpool_handle_t *zhp;
819         boolean_t missing;
820
821         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
822                 return (-1);
823
824         zhp->zpool_hdl = hdl;
825         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
826
827         if (zpool_refresh_stats(zhp, &missing) != 0) {
828                 zpool_close(zhp);
829                 return (-1);
830         }
831
832         if (missing) {
833                 zpool_close(zhp);
834                 *ret = NULL;
835                 return (0);
836         }
837
838         *ret = zhp;
839         return (0);
840 }
841
842 /*
843  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
844  * state.
845  */
846 zpool_handle_t *
847 zpool_open(libzfs_handle_t *hdl, const char *pool)
848 {
849         zpool_handle_t *zhp;
850
851         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
852                 return (NULL);
853
854         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
855                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
856                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
857                 zpool_close(zhp);
858                 return (NULL);
859         }
860
861         return (zhp);
862 }
863
864 /*
865  * Close the handle.  Simply frees the memory associated with the handle.
866  */
867 void
868 zpool_close(zpool_handle_t *zhp)
869 {
870         if (zhp->zpool_config)
871                 nvlist_free(zhp->zpool_config);
872         if (zhp->zpool_old_config)
873                 nvlist_free(zhp->zpool_old_config);
874         if (zhp->zpool_props)
875                 nvlist_free(zhp->zpool_props);
876         free(zhp);
877 }
878
879 /*
880  * Return the name of the pool.
881  */
882 const char *
883 zpool_get_name(zpool_handle_t *zhp)
884 {
885         return (zhp->zpool_name);
886 }
887
888
889 /*
890  * Return the state of the pool (ACTIVE or UNAVAILABLE)
891  */
892 int
893 zpool_get_state(zpool_handle_t *zhp)
894 {
895         return (zhp->zpool_state);
896 }
897
898 /*
899  * Create the named pool, using the provided vdev list.  It is assumed
900  * that the consumer has already validated the contents of the nvlist, so we
901  * don't have to worry about error semantics.
902  */
903 int
904 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
905     nvlist_t *props, nvlist_t *fsprops)
906 {
907         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
908         nvlist_t *zc_fsprops = NULL;
909         nvlist_t *zc_props = NULL;
910         char msg[1024];
911         char *altroot;
912         int ret = -1;
913
914         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
915             "cannot create '%s'"), pool);
916
917         if (!zpool_name_valid(hdl, B_FALSE, pool))
918                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
919
920         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
921                 return (-1);
922
923         if (props) {
924                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
925
926                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
927                     SPA_VERSION_1, flags, msg)) == NULL) {
928                         goto create_failed;
929                 }
930         }
931
932         if (fsprops) {
933                 uint64_t zoned;
934                 char *zonestr;
935
936                 zoned = ((nvlist_lookup_string(fsprops,
937                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
938                     strcmp(zonestr, "on") == 0);
939
940                 if ((zc_fsprops = zfs_valid_proplist(hdl,
941                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
942                         goto create_failed;
943                 }
944                 if (!zc_props &&
945                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
946                         goto create_failed;
947                 }
948                 if (nvlist_add_nvlist(zc_props,
949                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
950                         goto create_failed;
951                 }
952         }
953
954         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
955                 goto create_failed;
956
957         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
958
959         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
960
961                 zcmd_free_nvlists(&zc);
962                 nvlist_free(zc_props);
963                 nvlist_free(zc_fsprops);
964
965                 switch (errno) {
966                 case EBUSY:
967                         /*
968                          * This can happen if the user has specified the same
969                          * device multiple times.  We can't reliably detect this
970                          * until we try to add it and see we already have a
971                          * label.  This can also happen under if the device is
972                          * part of an active md or lvm device.
973                          */
974                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
975                             "one or more vdevs refer to the same device, or one of\n"
976                             "the devices is part of an active md or lvm device"));
977                         return (zfs_error(hdl, EZFS_BADDEV, msg));
978
979                 case EOVERFLOW:
980                         /*
981                          * This occurs when one of the devices is below
982                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
983                          * device was the problem device since there's no
984                          * reliable way to determine device size from userland.
985                          */
986                         {
987                                 char buf[64];
988
989                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
990
991                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
992                                     "one or more devices is less than the "
993                                     "minimum size (%s)"), buf);
994                         }
995                         return (zfs_error(hdl, EZFS_BADDEV, msg));
996
997                 case ENOSPC:
998                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
999                             "one or more devices is out of space"));
1000                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1001
1002                 case ENOTBLK:
1003                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1004                             "cache device must be a disk or disk slice"));
1005                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1006
1007                 default:
1008                         return (zpool_standard_error(hdl, errno, msg));
1009                 }
1010         }
1011
1012         /*
1013          * If this is an alternate root pool, then we automatically set the
1014          * mountpoint of the root dataset to be '/'.
1015          */
1016         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
1017             &altroot) == 0) {
1018                 zfs_handle_t *zhp;
1019
1020                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
1021                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1022                     "/") == 0);
1023
1024                 zfs_close(zhp);
1025         }
1026
1027 create_failed:
1028         zcmd_free_nvlists(&zc);
1029         nvlist_free(zc_props);
1030         nvlist_free(zc_fsprops);
1031         return (ret);
1032 }
1033
1034 /*
1035  * Destroy the given pool.  It is up to the caller to ensure that there are no
1036  * datasets left in the pool.
1037  */
1038 int
1039 zpool_destroy(zpool_handle_t *zhp)
1040 {
1041         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1042         zfs_handle_t *zfp = NULL;
1043         libzfs_handle_t *hdl = zhp->zpool_hdl;
1044         char msg[1024];
1045
1046         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1047             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1048                 return (-1);
1049
1050         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1051
1052         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1053                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1054                     "cannot destroy '%s'"), zhp->zpool_name);
1055
1056                 if (errno == EROFS) {
1057                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1058                             "one or more devices is read only"));
1059                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1060                 } else {
1061                         (void) zpool_standard_error(hdl, errno, msg);
1062                 }
1063
1064                 if (zfp)
1065                         zfs_close(zfp);
1066                 return (-1);
1067         }
1068
1069         if (zfp) {
1070                 remove_mountpoint(zfp);
1071                 zfs_close(zfp);
1072         }
1073
1074         return (0);
1075 }
1076
1077 /*
1078  * Add the given vdevs to the pool.  The caller must have already performed the
1079  * necessary verification to ensure that the vdev specification is well-formed.
1080  */
1081 int
1082 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1083 {
1084         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1085         int ret;
1086         libzfs_handle_t *hdl = zhp->zpool_hdl;
1087         char msg[1024];
1088         nvlist_t **spares, **l2cache;
1089         uint_t nspares, nl2cache;
1090
1091         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1092             "cannot add to '%s'"), zhp->zpool_name);
1093
1094         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1095             SPA_VERSION_SPARES &&
1096             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1097             &spares, &nspares) == 0) {
1098                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1099                     "upgraded to add hot spares"));
1100                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1101         }
1102
1103         if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1104             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1105                 uint64_t s;
1106
1107                 for (s = 0; s < nspares; s++) {
1108                         char *path;
1109
1110                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1111                             &path) == 0 && pool_uses_efi(spares[s])) {
1112                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1113                                     "device '%s' contains an EFI label and "
1114                                     "cannot be used on root pools."),
1115                                     zpool_vdev_name(hdl, NULL, spares[s],
1116                                     B_FALSE));
1117                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1118                         }
1119                 }
1120         }
1121
1122         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1123             SPA_VERSION_L2CACHE &&
1124             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1125             &l2cache, &nl2cache) == 0) {
1126                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1127                     "upgraded to add cache devices"));
1128                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1129         }
1130
1131         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1132                 return (-1);
1133         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1134
1135         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1136                 switch (errno) {
1137                 case EBUSY:
1138                         /*
1139                          * This can happen if the user has specified the same
1140                          * device multiple times.  We can't reliably detect this
1141                          * until we try to add it and see we already have a
1142                          * label.
1143                          */
1144                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1145                             "one or more vdevs refer to the same device"));
1146                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1147                         break;
1148
1149                 case EOVERFLOW:
1150                         /*
1151                          * This occurrs when one of the devices is below
1152                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1153                          * device was the problem device since there's no
1154                          * reliable way to determine device size from userland.
1155                          */
1156                         {
1157                                 char buf[64];
1158
1159                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1160
1161                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1162                                     "device is less than the minimum "
1163                                     "size (%s)"), buf);
1164                         }
1165                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1166                         break;
1167
1168                 case ENOTSUP:
1169                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1170                             "pool must be upgraded to add these vdevs"));
1171                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1172                         break;
1173
1174                 case EDOM:
1175                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1176                             "root pool can not have multiple vdevs"
1177                             " or separate logs"));
1178                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1179                         break;
1180
1181                 case ENOTBLK:
1182                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1183                             "cache device must be a disk or disk slice"));
1184                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1185                         break;
1186
1187                 default:
1188                         (void) zpool_standard_error(hdl, errno, msg);
1189                 }
1190
1191                 ret = -1;
1192         } else {
1193                 ret = 0;
1194         }
1195
1196         zcmd_free_nvlists(&zc);
1197
1198         return (ret);
1199 }
1200
1201 /*
1202  * Exports the pool from the system.  The caller must ensure that there are no
1203  * mounted datasets in the pool.
1204  */
1205 int
1206 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1207 {
1208         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1209         char msg[1024];
1210
1211         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1212             "cannot export '%s'"), zhp->zpool_name);
1213
1214         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1215         zc.zc_cookie = force;
1216         zc.zc_guid = hardforce;
1217
1218         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1219                 switch (errno) {
1220                 case EXDEV:
1221                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1222                             "use '-f' to override the following errors:\n"
1223                             "'%s' has an active shared spare which could be"
1224                             " used by other pools once '%s' is exported."),
1225                             zhp->zpool_name, zhp->zpool_name);
1226                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1227                             msg));
1228                 default:
1229                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1230                             msg));
1231                 }
1232         }
1233
1234         return (0);
1235 }
1236
1237 int
1238 zpool_export(zpool_handle_t *zhp, boolean_t force)
1239 {
1240         return (zpool_export_common(zhp, force, B_FALSE));
1241 }
1242
1243 int
1244 zpool_export_force(zpool_handle_t *zhp)
1245 {
1246         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1247 }
1248
1249 static void
1250 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1251     nvlist_t *config)
1252 {
1253         nvlist_t *nv = NULL;
1254         uint64_t rewindto;
1255         int64_t loss = -1;
1256         struct tm t;
1257         char timestr[128];
1258
1259         if (!hdl->libzfs_printerr || config == NULL)
1260                 return;
1261
1262         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
1263                 return;
1264
1265         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1266                 return;
1267         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1268
1269         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1270             strftime(timestr, 128, "%c", &t) != 0) {
1271                 if (dryrun) {
1272                         (void) printf(dgettext(TEXT_DOMAIN,
1273                             "Would be able to return %s "
1274                             "to its state as of %s.\n"),
1275                             name, timestr);
1276                 } else {
1277                         (void) printf(dgettext(TEXT_DOMAIN,
1278                             "Pool %s returned to its state as of %s.\n"),
1279                             name, timestr);
1280                 }
1281                 if (loss > 120) {
1282                         (void) printf(dgettext(TEXT_DOMAIN,
1283                             "%s approximately %lld "),
1284                             dryrun ? "Would discard" : "Discarded",
1285                             ((longlong_t)loss + 30) / 60);
1286                         (void) printf(dgettext(TEXT_DOMAIN,
1287                             "minutes of transactions.\n"));
1288                 } else if (loss > 0) {
1289                         (void) printf(dgettext(TEXT_DOMAIN,
1290                             "%s approximately %lld "),
1291                             dryrun ? "Would discard" : "Discarded",
1292                             (longlong_t)loss);
1293                         (void) printf(dgettext(TEXT_DOMAIN,
1294                             "seconds of transactions.\n"));
1295                 }
1296         }
1297 }
1298
1299 void
1300 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1301     nvlist_t *config)
1302 {
1303         nvlist_t *nv = NULL;
1304         int64_t loss = -1;
1305         uint64_t edata = UINT64_MAX;
1306         uint64_t rewindto;
1307         struct tm t;
1308         char timestr[128];
1309
1310         if (!hdl->libzfs_printerr)
1311                 return;
1312
1313         if (reason >= 0)
1314                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1315         else
1316                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1317
1318         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1319         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1320             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1321                 goto no_info;
1322
1323         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1324         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1325             &edata);
1326
1327         (void) printf(dgettext(TEXT_DOMAIN,
1328             "Recovery is possible, but will result in some data loss.\n"));
1329
1330         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1331             strftime(timestr, 128, "%c", &t) != 0) {
1332                 (void) printf(dgettext(TEXT_DOMAIN,
1333                     "\tReturning the pool to its state as of %s\n"
1334                     "\tshould correct the problem.  "),
1335                     timestr);
1336         } else {
1337                 (void) printf(dgettext(TEXT_DOMAIN,
1338                     "\tReverting the pool to an earlier state "
1339                     "should correct the problem.\n\t"));
1340         }
1341
1342         if (loss > 120) {
1343                 (void) printf(dgettext(TEXT_DOMAIN,
1344                     "Approximately %lld minutes of data\n"
1345                     "\tmust be discarded, irreversibly.  "),
1346                     ((longlong_t)loss + 30) / 60);
1347         } else if (loss > 0) {
1348                 (void) printf(dgettext(TEXT_DOMAIN,
1349                     "Approximately %lld seconds of data\n"
1350                     "\tmust be discarded, irreversibly.  "),
1351                     (longlong_t)loss);
1352         }
1353         if (edata != 0 && edata != UINT64_MAX) {
1354                 if (edata == 1) {
1355                         (void) printf(dgettext(TEXT_DOMAIN,
1356                             "After rewind, at least\n"
1357                             "\tone persistent user-data error will remain.  "));
1358                 } else {
1359                         (void) printf(dgettext(TEXT_DOMAIN,
1360                             "After rewind, several\n"
1361                             "\tpersistent user-data errors will remain.  "));
1362                 }
1363         }
1364         (void) printf(dgettext(TEXT_DOMAIN,
1365             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1366             reason >= 0 ? "clear" : "import", name);
1367
1368         (void) printf(dgettext(TEXT_DOMAIN,
1369             "A scrub of the pool\n"
1370             "\tis strongly recommended after recovery.\n"));
1371         return;
1372
1373 no_info:
1374         (void) printf(dgettext(TEXT_DOMAIN,
1375             "Destroy and re-create the pool from\n\ta backup source.\n"));
1376 }
1377
1378 /*
1379  * zpool_import() is a contracted interface. Should be kept the same
1380  * if possible.
1381  *
1382  * Applications should use zpool_import_props() to import a pool with
1383  * new properties value to be set.
1384  */
1385 int
1386 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1387     char *altroot)
1388 {
1389         nvlist_t *props = NULL;
1390         int ret;
1391
1392         if (altroot != NULL) {
1393                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1394                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1395                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1396                             newname));
1397                 }
1398
1399                 if (nvlist_add_string(props,
1400                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1401                     nvlist_add_string(props,
1402                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1403                         nvlist_free(props);
1404                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1405                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1406                             newname));
1407                 }
1408         }
1409
1410         ret = zpool_import_props(hdl, config, newname, props,
1411             ZFS_IMPORT_NORMAL);
1412         if (props)
1413                 nvlist_free(props);
1414         return (ret);
1415 }
1416
1417 static void
1418 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1419     int indent)
1420 {
1421         nvlist_t **child;
1422         uint_t c, children;
1423         char *vname;
1424         uint64_t is_log = 0;
1425
1426         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1427             &is_log);
1428
1429         if (name != NULL)
1430                 (void) printf("\t%*s%s%s\n", indent, "", name,
1431                     is_log ? " [log]" : "");
1432
1433         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1434             &child, &children) != 0)
1435                 return;
1436
1437         for (c = 0; c < children; c++) {
1438                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1439                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1440                 free(vname);
1441         }
1442 }
1443
1444 /*
1445  * Import the given pool using the known configuration and a list of
1446  * properties to be set. The configuration should have come from
1447  * zpool_find_import(). The 'newname' parameters control whether the pool
1448  * is imported with a different name.
1449  */
1450 int
1451 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1452     nvlist_t *props, int flags)
1453 {
1454         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1455         zpool_rewind_policy_t policy;
1456         nvlist_t *nv = NULL;
1457         nvlist_t *nvinfo = NULL;
1458         nvlist_t *missing = NULL;
1459         char *thename;
1460         char *origname;
1461         int ret;
1462         int error = 0;
1463         char errbuf[1024];
1464
1465         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1466             &origname) == 0);
1467
1468         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1469             "cannot import pool '%s'"), origname);
1470
1471         if (newname != NULL) {
1472                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1473                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1474                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1475                             newname));
1476                 thename = (char *)newname;
1477         } else {
1478                 thename = origname;
1479         }
1480
1481         if (props) {
1482                 uint64_t version;
1483                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1484
1485                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1486                     &version) == 0);
1487
1488                 if ((props = zpool_valid_proplist(hdl, origname,
1489                     props, version, flags, errbuf)) == NULL) {
1490                         return (-1);
1491                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1492                         nvlist_free(props);
1493                         return (-1);
1494                 }
1495         }
1496
1497         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1498
1499         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1500             &zc.zc_guid) == 0);
1501
1502         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1503                 nvlist_free(props);
1504                 return (-1);
1505         }
1506         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1507                 nvlist_free(props);
1508                 return (-1);
1509         }
1510
1511         zc.zc_cookie = flags;
1512         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1513             errno == ENOMEM) {
1514                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1515                         zcmd_free_nvlists(&zc);
1516                         return (-1);
1517                 }
1518         }
1519         if (ret != 0)
1520                 error = errno;
1521
1522         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1523         zpool_get_rewind_policy(config, &policy);
1524
1525         if (error) {
1526                 char desc[1024];
1527
1528                 /*
1529                  * Dry-run failed, but we print out what success
1530                  * looks like if we found a best txg
1531                  */
1532                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1533                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1534                             B_TRUE, nv);
1535                         nvlist_free(nv);
1536                         return (-1);
1537                 }
1538
1539                 if (newname == NULL)
1540                         (void) snprintf(desc, sizeof (desc),
1541                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1542                             thename);
1543                 else
1544                         (void) snprintf(desc, sizeof (desc),
1545                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1546                             origname, thename);
1547
1548                 switch (error) {
1549                 case ENOTSUP:
1550                         /*
1551                          * Unsupported version.
1552                          */
1553                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1554                         break;
1555
1556                 case EINVAL:
1557                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1558                         break;
1559
1560                 case EROFS:
1561                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1562                             "one or more devices is read only"));
1563                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1564                         break;
1565
1566                 case ENXIO:
1567                         if (nv && nvlist_lookup_nvlist(nv,
1568                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1569                             nvlist_lookup_nvlist(nvinfo,
1570                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1571                                 (void) printf(dgettext(TEXT_DOMAIN,
1572                                     "The devices below are missing, use "
1573                                     "'-m' to import the pool anyway:\n"));
1574                                 print_vdev_tree(hdl, NULL, missing, 2);
1575                                 (void) printf("\n");
1576                         }
1577                         (void) zpool_standard_error(hdl, error, desc);
1578                         break;
1579
1580                 case EEXIST:
1581                         (void) zpool_standard_error(hdl, error, desc);
1582                         break;
1583
1584                 default:
1585                         (void) zpool_standard_error(hdl, error, desc);
1586                         zpool_explain_recover(hdl,
1587                             newname ? origname : thename, -error, nv);
1588                         break;
1589                 }
1590
1591                 nvlist_free(nv);
1592                 ret = -1;
1593         } else {
1594                 zpool_handle_t *zhp;
1595
1596                 /*
1597                  * This should never fail, but play it safe anyway.
1598                  */
1599                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1600                         ret = -1;
1601                 else if (zhp != NULL)
1602                         zpool_close(zhp);
1603                 if (policy.zrp_request &
1604                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1605                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1606                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1607                 }
1608                 nvlist_free(nv);
1609                 return (0);
1610         }
1611
1612         zcmd_free_nvlists(&zc);
1613         nvlist_free(props);
1614
1615         return (ret);
1616 }
1617
1618 /*
1619  * Scan the pool.
1620  */
1621 int
1622 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1623 {
1624         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1625         char msg[1024];
1626         libzfs_handle_t *hdl = zhp->zpool_hdl;
1627
1628         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1629         zc.zc_cookie = func;
1630
1631         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1632             (errno == ENOENT && func != POOL_SCAN_NONE))
1633                 return (0);
1634
1635         if (func == POOL_SCAN_SCRUB) {
1636                 (void) snprintf(msg, sizeof (msg),
1637                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1638         } else if (func == POOL_SCAN_NONE) {
1639                 (void) snprintf(msg, sizeof (msg),
1640                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1641                     zc.zc_name);
1642         } else {
1643                 assert(!"unexpected result");
1644         }
1645
1646         if (errno == EBUSY) {
1647                 nvlist_t *nvroot;
1648                 pool_scan_stat_t *ps = NULL;
1649                 uint_t psc;
1650
1651                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1652                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1653                 (void) nvlist_lookup_uint64_array(nvroot,
1654                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1655                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1656                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1657                 else
1658                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1659         } else if (errno == ENOENT) {
1660                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1661         } else {
1662                 return (zpool_standard_error(hdl, errno, msg));
1663         }
1664 }
1665
1666 /*
1667  * Find a vdev that matches the search criteria specified. We use the
1668  * the nvpair name to determine how we should look for the device.
1669  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1670  * spare; but FALSE if its an INUSE spare.
1671  */
1672 static nvlist_t *
1673 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1674     boolean_t *l2cache, boolean_t *log)
1675 {
1676         uint_t c, children;
1677         nvlist_t **child;
1678         nvlist_t *ret;
1679         uint64_t is_log;
1680         char *srchkey;
1681         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1682
1683         /* Nothing to look for */
1684         if (search == NULL || pair == NULL)
1685                 return (NULL);
1686
1687         /* Obtain the key we will use to search */
1688         srchkey = nvpair_name(pair);
1689
1690         switch (nvpair_type(pair)) {
1691         case DATA_TYPE_UINT64:
1692                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1693                         uint64_t srchval, theguid;
1694
1695                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1696                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1697                             &theguid) == 0);
1698                         if (theguid == srchval)
1699                                 return (nv);
1700                 }
1701                 break;
1702
1703         case DATA_TYPE_STRING: {
1704                 char *srchval, *val;
1705
1706                 verify(nvpair_value_string(pair, &srchval) == 0);
1707                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1708                         break;
1709
1710                 /*
1711                  * Search for the requested value. Special cases:
1712                  *
1713                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in with a
1714                  *   partition suffix "1", "-part1", or "p1".  The suffix is  hidden
1715                  *   from the user, but included in the string, so this matches around
1716                  *   it.
1717                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1718                  *
1719                  * Otherwise, all other searches are simple string compares.
1720                  */
1721                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
1722                         uint64_t wholedisk = 0;
1723
1724                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1725                             &wholedisk);
1726                         if (wholedisk) {
1727                                 char buf[MAXPATHLEN];
1728
1729                                 zfs_append_partition(srchval, buf, sizeof (buf));
1730                                 if (strcmp(val, buf) == 0)
1731                                         return (nv);
1732
1733                                 break;
1734                         }
1735                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1736                         char *type, *idx, *end, *p;
1737                         uint64_t id, vdev_id;
1738
1739                         /*
1740                          * Determine our vdev type, keeping in mind
1741                          * that the srchval is composed of a type and
1742                          * vdev id pair (i.e. mirror-4).
1743                          */
1744                         if ((type = strdup(srchval)) == NULL)
1745                                 return (NULL);
1746
1747                         if ((p = strrchr(type, '-')) == NULL) {
1748                                 free(type);
1749                                 break;
1750                         }
1751                         idx = p + 1;
1752                         *p = '\0';
1753
1754                         /*
1755                          * If the types don't match then keep looking.
1756                          */
1757                         if (strncmp(val, type, strlen(val)) != 0) {
1758                                 free(type);
1759                                 break;
1760                         }
1761
1762                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
1763                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1764                             strncmp(type, VDEV_TYPE_MIRROR,
1765                             strlen(VDEV_TYPE_MIRROR)) == 0);
1766                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
1767                             &id) == 0);
1768
1769                         errno = 0;
1770                         vdev_id = strtoull(idx, &end, 10);
1771
1772                         free(type);
1773                         if (errno != 0)
1774                                 return (NULL);
1775
1776                         /*
1777                          * Now verify that we have the correct vdev id.
1778                          */
1779                         if (vdev_id == id)
1780                                 return (nv);
1781                 }
1782
1783                 /*
1784                  * Common case
1785                  */
1786                 if (strcmp(srchval, val) == 0)
1787                         return (nv);
1788                 break;
1789         }
1790
1791         default:
1792                 break;
1793         }
1794
1795         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1796             &child, &children) != 0)
1797                 return (NULL);
1798
1799         for (c = 0; c < children; c++) {
1800                 if ((ret = vdev_to_nvlist_iter(child[c], search,
1801                     avail_spare, l2cache, NULL)) != NULL) {
1802                         /*
1803                          * The 'is_log' value is only set for the toplevel
1804                          * vdev, not the leaf vdevs.  So we always lookup the
1805                          * log device from the root of the vdev tree (where
1806                          * 'log' is non-NULL).
1807                          */
1808                         if (log != NULL &&
1809                             nvlist_lookup_uint64(child[c],
1810                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1811                             is_log) {
1812                                 *log = B_TRUE;
1813                         }
1814                         return (ret);
1815                 }
1816         }
1817
1818         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1819             &child, &children) == 0) {
1820                 for (c = 0; c < children; c++) {
1821                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1822                             avail_spare, l2cache, NULL)) != NULL) {
1823                                 *avail_spare = B_TRUE;
1824                                 return (ret);
1825                         }
1826                 }
1827         }
1828
1829         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1830             &child, &children) == 0) {
1831                 for (c = 0; c < children; c++) {
1832                         if ((ret = vdev_to_nvlist_iter(child[c], search,
1833                             avail_spare, l2cache, NULL)) != NULL) {
1834                                 *l2cache = B_TRUE;
1835                                 return (ret);
1836                         }
1837                 }
1838         }
1839
1840         return (NULL);
1841 }
1842
1843 /*
1844  * Given a physical path (minus the "/devices" prefix), find the
1845  * associated vdev.
1846  */
1847 nvlist_t *
1848 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
1849     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1850 {
1851         nvlist_t *search, *nvroot, *ret;
1852
1853         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1854         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
1855
1856         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1857             &nvroot) == 0);
1858
1859         *avail_spare = B_FALSE;
1860         *l2cache = B_FALSE;
1861         if (log != NULL)
1862                 *log = B_FALSE;
1863         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1864         nvlist_free(search);
1865
1866         return (ret);
1867 }
1868
1869 /*
1870  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
1871  */
1872 boolean_t
1873 zpool_vdev_is_interior(const char *name)
1874 {
1875         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1876             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
1877                 return (B_TRUE);
1878         return (B_FALSE);
1879 }
1880
1881 nvlist_t *
1882 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1883     boolean_t *l2cache, boolean_t *log)
1884 {
1885         char buf[MAXPATHLEN];
1886         char *end;
1887         nvlist_t *nvroot, *search, *ret;
1888         uint64_t guid;
1889
1890         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1891
1892         guid = strtoull(path, &end, 10);
1893         if (guid != 0 && *end == '\0') {
1894                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
1895         } else if (zpool_vdev_is_interior(path)) {
1896                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
1897         } else if (path[0] != '/') {
1898                 if (zfs_resolve_shortname(path, buf, sizeof (buf)) < 0) {
1899                         nvlist_free(search);
1900                         return (NULL);
1901                 }
1902                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
1903         } else {
1904                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
1905         }
1906
1907         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1908             &nvroot) == 0);
1909
1910         *avail_spare = B_FALSE;
1911         *l2cache = B_FALSE;
1912         if (log != NULL)
1913                 *log = B_FALSE;
1914         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1915         nvlist_free(search);
1916
1917         return (ret);
1918 }
1919
1920 static int
1921 vdev_online(nvlist_t *nv)
1922 {
1923         uint64_t ival;
1924
1925         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1926             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1927             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1928                 return (0);
1929
1930         return (1);
1931 }
1932
1933 /*
1934  * Helper function for zpool_get_physpaths().
1935  */
1936 static int
1937 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
1938     size_t *bytes_written)
1939 {
1940         size_t bytes_left, pos, rsz;
1941         char *tmppath;
1942         const char *format;
1943
1944         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
1945             &tmppath) != 0)
1946                 return (EZFS_NODEVICE);
1947
1948         pos = *bytes_written;
1949         bytes_left = physpath_size - pos;
1950         format = (pos == 0) ? "%s" : " %s";
1951
1952         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
1953         *bytes_written += rsz;
1954
1955         if (rsz >= bytes_left) {
1956                 /* if physpath was not copied properly, clear it */
1957                 if (bytes_left != 0) {
1958                         physpath[pos] = 0;
1959                 }
1960                 return (EZFS_NOSPC);
1961         }
1962         return (0);
1963 }
1964
1965 static int
1966 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
1967     size_t *rsz, boolean_t is_spare)
1968 {
1969         char *type;
1970         int ret;
1971
1972         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
1973                 return (EZFS_INVALCONFIG);
1974
1975         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1976                 /*
1977                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
1978                  * For a spare vdev, we only want to boot from the active
1979                  * spare device.
1980                  */
1981                 if (is_spare) {
1982                         uint64_t spare = 0;
1983                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
1984                             &spare);
1985                         if (!spare)
1986                                 return (EZFS_INVALCONFIG);
1987                 }
1988
1989                 if (vdev_online(nv)) {
1990                         if ((ret = vdev_get_one_physpath(nv, physpath,
1991                             phypath_size, rsz)) != 0)
1992                                 return (ret);
1993                 }
1994         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
1995             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
1996             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
1997                 nvlist_t **child;
1998                 uint_t count;
1999                 int i, ret;
2000
2001                 if (nvlist_lookup_nvlist_array(nv,
2002                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2003                         return (EZFS_INVALCONFIG);
2004
2005                 for (i = 0; i < count; i++) {
2006                         ret = vdev_get_physpaths(child[i], physpath,
2007                             phypath_size, rsz, is_spare);
2008                         if (ret == EZFS_NOSPC)
2009                                 return (ret);
2010                 }
2011         }
2012
2013         return (EZFS_POOL_INVALARG);
2014 }
2015
2016 /*
2017  * Get phys_path for a root pool config.
2018  * Return 0 on success; non-zero on failure.
2019  */
2020 static int
2021 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2022 {
2023         size_t rsz;
2024         nvlist_t *vdev_root;
2025         nvlist_t **child;
2026         uint_t count;
2027         char *type;
2028
2029         rsz = 0;
2030
2031         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2032             &vdev_root) != 0)
2033                 return (EZFS_INVALCONFIG);
2034
2035         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2036             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2037             &child, &count) != 0)
2038                 return (EZFS_INVALCONFIG);
2039
2040         /*
2041          * root pool can not have EFI labeled disks and can only have
2042          * a single top-level vdev.
2043          */
2044         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2045             pool_uses_efi(vdev_root))
2046                 return (EZFS_POOL_INVALARG);
2047
2048         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2049             B_FALSE);
2050
2051         /* No online devices */
2052         if (rsz == 0)
2053                 return (EZFS_NODEVICE);
2054
2055         return (0);
2056 }
2057
2058 /*
2059  * Get phys_path for a root pool
2060  * Return 0 on success; non-zero on failure.
2061  */
2062 int
2063 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2064 {
2065         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2066             phypath_size));
2067 }
2068
2069 /*
2070  * If the device has being dynamically expanded then we need to relabel
2071  * the disk to use the new unallocated space.
2072  */
2073 static int
2074 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
2075 {
2076         char errbuf[1024];
2077         int fd, error;
2078
2079         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2080                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2081                     "relabel '%s': unable to open device"), path);
2082                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2083         }
2084
2085         /*
2086          * It's possible that we might encounter an error if the device
2087          * does not have any unallocated space left. If so, we simply
2088          * ignore that error and continue on.
2089          */
2090         error = efi_use_whole_disk(fd);
2091         (void) close(fd);
2092         if (error && error != VT_ENOSPC) {
2093                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2094                     "relabel '%s': unable to read disk capacity"), path);
2095                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2096         }
2097         return (0);
2098 }
2099
2100 /*
2101  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2102  * ZFS_ONLINE_* flags.
2103  */
2104 int
2105 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2106     vdev_state_t *newstate)
2107 {
2108         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2109         char msg[1024];
2110         nvlist_t *tgt;
2111         boolean_t avail_spare, l2cache, islog;
2112         libzfs_handle_t *hdl = zhp->zpool_hdl;
2113
2114         if (flags & ZFS_ONLINE_EXPAND) {
2115                 (void) snprintf(msg, sizeof (msg),
2116                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2117         } else {
2118                 (void) snprintf(msg, sizeof (msg),
2119                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2120         }
2121
2122         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2123         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2124             &islog)) == NULL)
2125                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2126
2127         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2128
2129         if (avail_spare)
2130                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2131
2132         if (flags & ZFS_ONLINE_EXPAND ||
2133             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2134                 char *pathname = NULL;
2135                 uint64_t wholedisk = 0;
2136
2137                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2138                     &wholedisk);
2139                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2140                     &pathname) == 0);
2141
2142                 /*
2143                  * XXX - L2ARC 1.0 devices can't support expansion.
2144                  */
2145                 if (l2cache) {
2146                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2147                             "cannot expand cache devices"));
2148                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2149                 }
2150
2151                 if (wholedisk) {
2152                         pathname += strlen(DISK_ROOT) + 1;
2153                         (void) zpool_relabel_disk(hdl, pathname);
2154                 }
2155         }
2156
2157         zc.zc_cookie = VDEV_STATE_ONLINE;
2158         zc.zc_obj = flags;
2159
2160         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2161                 if (errno == EINVAL) {
2162                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2163                             "from this pool into a new one.  Use '%s' "
2164                             "instead"), "zpool detach");
2165                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2166                 }
2167                 return (zpool_standard_error(hdl, errno, msg));
2168         }
2169
2170         *newstate = zc.zc_cookie;
2171         return (0);
2172 }
2173
2174 /*
2175  * Take the specified vdev offline
2176  */
2177 int
2178 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2179 {
2180         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2181         char msg[1024];
2182         nvlist_t *tgt;
2183         boolean_t avail_spare, l2cache;
2184         libzfs_handle_t *hdl = zhp->zpool_hdl;
2185
2186         (void) snprintf(msg, sizeof (msg),
2187             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2188
2189         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2190         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2191             NULL)) == NULL)
2192                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2193
2194         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2195
2196         if (avail_spare)
2197                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2198
2199         zc.zc_cookie = VDEV_STATE_OFFLINE;
2200         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2201
2202         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2203                 return (0);
2204
2205         switch (errno) {
2206         case EBUSY:
2207
2208                 /*
2209                  * There are no other replicas of this device.
2210                  */
2211                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2212
2213         case EEXIST:
2214                 /*
2215                  * The log device has unplayed logs
2216                  */
2217                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2218
2219         default:
2220                 return (zpool_standard_error(hdl, errno, msg));
2221         }
2222 }
2223
2224 /*
2225  * Mark the given vdev faulted.
2226  */
2227 int
2228 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2229 {
2230         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2231         char msg[1024];
2232         libzfs_handle_t *hdl = zhp->zpool_hdl;
2233
2234         (void) snprintf(msg, sizeof (msg),
2235            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2236
2237         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2238         zc.zc_guid = guid;
2239         zc.zc_cookie = VDEV_STATE_FAULTED;
2240         zc.zc_obj = aux;
2241
2242         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2243                 return (0);
2244
2245         switch (errno) {
2246         case EBUSY:
2247
2248                 /*
2249                  * There are no other replicas of this device.
2250                  */
2251                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2252
2253         default:
2254                 return (zpool_standard_error(hdl, errno, msg));
2255         }
2256
2257 }
2258
2259 /*
2260  * Mark the given vdev degraded.
2261  */
2262 int
2263 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2264 {
2265         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2266         char msg[1024];
2267         libzfs_handle_t *hdl = zhp->zpool_hdl;
2268
2269         (void) snprintf(msg, sizeof (msg),
2270            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2271
2272         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2273         zc.zc_guid = guid;
2274         zc.zc_cookie = VDEV_STATE_DEGRADED;
2275         zc.zc_obj = aux;
2276
2277         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2278                 return (0);
2279
2280         return (zpool_standard_error(hdl, errno, msg));
2281 }
2282
2283 /*
2284  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2285  * a hot spare.
2286  */
2287 static boolean_t
2288 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2289 {
2290         nvlist_t **child;
2291         uint_t c, children;
2292         char *type;
2293
2294         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2295             &children) == 0) {
2296                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2297                     &type) == 0);
2298
2299                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2300                     children == 2 && child[which] == tgt)
2301                         return (B_TRUE);
2302
2303                 for (c = 0; c < children; c++)
2304                         if (is_replacing_spare(child[c], tgt, which))
2305                                 return (B_TRUE);
2306         }
2307
2308         return (B_FALSE);
2309 }
2310
2311 /*
2312  * Attach new_disk (fully described by nvroot) to old_disk.
2313  * If 'replacing' is specified, the new disk will replace the old one.
2314  */
2315 int
2316 zpool_vdev_attach(zpool_handle_t *zhp,
2317     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2318 {
2319         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2320         char msg[1024];
2321         int ret;
2322         nvlist_t *tgt;
2323         boolean_t avail_spare, l2cache, islog;
2324         uint64_t val;
2325         char *newname;
2326         nvlist_t **child;
2327         uint_t children;
2328         nvlist_t *config_root;
2329         libzfs_handle_t *hdl = zhp->zpool_hdl;
2330         boolean_t rootpool = pool_is_bootable(zhp);
2331
2332         if (replacing)
2333                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2334                     "cannot replace %s with %s"), old_disk, new_disk);
2335         else
2336                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2337                     "cannot attach %s to %s"), new_disk, old_disk);
2338
2339         /*
2340          * If this is a root pool, make sure that we're not attaching an
2341          * EFI labeled device.
2342          */
2343         if (rootpool && pool_uses_efi(nvroot)) {
2344                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2345                     "EFI labeled devices are not supported on root pools."));
2346                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2347         }
2348
2349         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2350         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2351             &islog)) == 0)
2352                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2353
2354         if (avail_spare)
2355                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2356
2357         if (l2cache)
2358                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2359
2360         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2361         zc.zc_cookie = replacing;
2362
2363         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2364             &child, &children) != 0 || children != 1) {
2365                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2366                     "new device must be a single disk"));
2367                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2368         }
2369
2370         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2371             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2372
2373         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2374                 return (-1);
2375
2376         /*
2377          * If the target is a hot spare that has been swapped in, we can only
2378          * replace it with another hot spare.
2379          */
2380         if (replacing &&
2381             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2382             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2383             NULL) == NULL || !avail_spare) &&
2384             is_replacing_spare(config_root, tgt, 1)) {
2385                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2386                     "can only be replaced by another hot spare"));
2387                 free(newname);
2388                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2389         }
2390
2391         free(newname);
2392
2393         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2394                 return (-1);
2395
2396         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2397
2398         zcmd_free_nvlists(&zc);
2399
2400         if (ret == 0) {
2401                 if (rootpool) {
2402                         /*
2403                          * XXX need a better way to prevent user from
2404                          * booting up a half-baked vdev.
2405                          */
2406                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2407                             "sure to wait until resilver is done "
2408                             "before rebooting.\n"));
2409                 }
2410                 return (0);
2411         }
2412
2413         switch (errno) {
2414         case ENOTSUP:
2415                 /*
2416                  * Can't attach to or replace this type of vdev.
2417                  */
2418                 if (replacing) {
2419                         uint64_t version = zpool_get_prop_int(zhp,
2420                             ZPOOL_PROP_VERSION, NULL);
2421
2422                         if (islog)
2423                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2424                                     "cannot replace a log with a spare"));
2425                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2426                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2427                                     "already in replacing/spare config; wait "
2428                                     "for completion or use 'zpool detach'"));
2429                         else
2430                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2431                                     "cannot replace a replacing device"));
2432                 } else {
2433                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2434                             "can only attach to mirrors and top-level "
2435                             "disks"));
2436                 }
2437                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2438                 break;
2439
2440         case EINVAL:
2441                 /*
2442                  * The new device must be a single disk.
2443                  */
2444                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2445                     "new device must be a single disk"));
2446                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2447                 break;
2448
2449         case EBUSY:
2450                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2451                     new_disk);
2452                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2453                 break;
2454
2455         case EOVERFLOW:
2456                 /*
2457                  * The new device is too small.
2458                  */
2459                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2460                     "device is too small"));
2461                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2462                 break;
2463
2464         case EDOM:
2465                 /*
2466                  * The new device has a different alignment requirement.
2467                  */
2468                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2469                     "devices have different sector alignment"));
2470                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2471                 break;
2472
2473         case ENAMETOOLONG:
2474                 /*
2475                  * The resulting top-level vdev spec won't fit in the label.
2476                  */
2477                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2478                 break;
2479
2480         default:
2481                 (void) zpool_standard_error(hdl, errno, msg);
2482         }
2483
2484         return (-1);
2485 }
2486
2487 /*
2488  * Detach the specified device.
2489  */
2490 int
2491 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2492 {
2493         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2494         char msg[1024];
2495         nvlist_t *tgt;
2496         boolean_t avail_spare, l2cache;
2497         libzfs_handle_t *hdl = zhp->zpool_hdl;
2498
2499         (void) snprintf(msg, sizeof (msg),
2500             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2501
2502         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2503         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2504             NULL)) == 0)
2505                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2506
2507         if (avail_spare)
2508                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2509
2510         if (l2cache)
2511                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2512
2513         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2514
2515         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2516                 return (0);
2517
2518         switch (errno) {
2519
2520         case ENOTSUP:
2521                 /*
2522                  * Can't detach from this type of vdev.
2523                  */
2524                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2525                     "applicable to mirror and replacing vdevs"));
2526                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2527                 break;
2528
2529         case EBUSY:
2530                 /*
2531                  * There are no other replicas of this device.
2532                  */
2533                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2534                 break;
2535
2536         default:
2537                 (void) zpool_standard_error(hdl, errno, msg);
2538         }
2539
2540         return (-1);
2541 }
2542
2543 /*
2544  * Find a mirror vdev in the source nvlist.
2545  *
2546  * The mchild array contains a list of disks in one of the top-level mirrors
2547  * of the source pool.  The schild array contains a list of disks that the
2548  * user specified on the command line.  We loop over the mchild array to
2549  * see if any entry in the schild array matches.
2550  *
2551  * If a disk in the mchild array is found in the schild array, we return
2552  * the index of that entry.  Otherwise we return -1.
2553  */
2554 static int
2555 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2556     nvlist_t **schild, uint_t schildren)
2557 {
2558         uint_t mc;
2559
2560         for (mc = 0; mc < mchildren; mc++) {
2561                 uint_t sc;
2562                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2563                     mchild[mc], B_FALSE);
2564
2565                 for (sc = 0; sc < schildren; sc++) {
2566                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2567                             schild[sc], B_FALSE);
2568                         boolean_t result = (strcmp(mpath, spath) == 0);
2569
2570                         free(spath);
2571                         if (result) {
2572                                 free(mpath);
2573                                 return (mc);
2574                         }
2575                 }
2576
2577                 free(mpath);
2578         }
2579
2580         return (-1);
2581 }
2582
2583 /*
2584  * Split a mirror pool.  If newroot points to null, then a new nvlist
2585  * is generated and it is the responsibility of the caller to free it.
2586  */
2587 int
2588 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2589     nvlist_t *props, splitflags_t flags)
2590 {
2591         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2592         char msg[1024];
2593         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2594         nvlist_t **varray = NULL, *zc_props = NULL;
2595         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2596         libzfs_handle_t *hdl = zhp->zpool_hdl;
2597         uint64_t vers;
2598         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2599         int retval = 0;
2600
2601         (void) snprintf(msg, sizeof (msg),
2602             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2603
2604         if (!zpool_name_valid(hdl, B_FALSE, newname))
2605                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2606
2607         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2608                 (void) fprintf(stderr, gettext("Internal error: unable to "
2609                     "retrieve pool configuration\n"));
2610                 return (-1);
2611         }
2612
2613         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2614             == 0);
2615         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2616
2617         if (props) {
2618                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2619                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2620                     props, vers, flags, msg)) == NULL)
2621                         return (-1);
2622         }
2623
2624         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2625             &children) != 0) {
2626                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2627                     "Source pool is missing vdev tree"));
2628                 if (zc_props)
2629                         nvlist_free(zc_props);
2630                 return (-1);
2631         }
2632
2633         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2634         vcount = 0;
2635
2636         if (*newroot == NULL ||
2637             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2638             &newchild, &newchildren) != 0)
2639                 newchildren = 0;
2640
2641         for (c = 0; c < children; c++) {
2642                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2643                 char *type;
2644                 nvlist_t **mchild, *vdev;
2645                 uint_t mchildren;
2646                 int entry;
2647
2648                 /*
2649                  * Unlike cache & spares, slogs are stored in the
2650                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2651                  */
2652                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2653                     &is_log);
2654                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2655                     &is_hole);
2656                 if (is_log || is_hole) {
2657                         /*
2658                          * Create a hole vdev and put it in the config.
2659                          */
2660                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2661                                 goto out;
2662                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2663                             VDEV_TYPE_HOLE) != 0)
2664                                 goto out;
2665                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2666                             1) != 0)
2667                                 goto out;
2668                         if (lastlog == 0)
2669                                 lastlog = vcount;
2670                         varray[vcount++] = vdev;
2671                         continue;
2672                 }
2673                 lastlog = 0;
2674                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2675                     == 0);
2676                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2677                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2678                             "Source pool must be composed only of mirrors\n"));
2679                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2680                         goto out;
2681                 }
2682
2683                 verify(nvlist_lookup_nvlist_array(child[c],
2684                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2685
2686                 /* find or add an entry for this top-level vdev */
2687                 if (newchildren > 0 &&
2688                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2689                     newchild, newchildren)) >= 0) {
2690                         /* We found a disk that the user specified. */
2691                         vdev = mchild[entry];
2692                         ++found;
2693                 } else {
2694                         /* User didn't specify a disk for this vdev. */
2695                         vdev = mchild[mchildren - 1];
2696                 }
2697
2698                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2699                         goto out;
2700         }
2701
2702         /* did we find every disk the user specified? */
2703         if (found != newchildren) {
2704                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2705                     "include at most one disk from each mirror"));
2706                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2707                 goto out;
2708         }
2709
2710         /* Prepare the nvlist for populating. */
2711         if (*newroot == NULL) {
2712                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2713                         goto out;
2714                 freelist = B_TRUE;
2715                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2716                     VDEV_TYPE_ROOT) != 0)
2717                         goto out;
2718         } else {
2719                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2720         }
2721
2722         /* Add all the children we found */
2723         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2724             lastlog == 0 ? vcount : lastlog) != 0)
2725                 goto out;
2726
2727         /*
2728          * If we're just doing a dry run, exit now with success.
2729          */
2730         if (flags.dryrun) {
2731                 memory_err = B_FALSE;
2732                 freelist = B_FALSE;
2733                 goto out;
2734         }
2735
2736         /* now build up the config list & call the ioctl */
2737         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
2738                 goto out;
2739
2740         if (nvlist_add_nvlist(newconfig,
2741             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
2742             nvlist_add_string(newconfig,
2743             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
2744             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
2745                 goto out;
2746
2747         /*
2748          * The new pool is automatically part of the namespace unless we
2749          * explicitly export it.
2750          */
2751         if (!flags.import)
2752                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
2753         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2754         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
2755         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
2756                 goto out;
2757         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
2758                 goto out;
2759
2760         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
2761                 retval = zpool_standard_error(hdl, errno, msg);
2762                 goto out;
2763         }
2764
2765         freelist = B_FALSE;
2766         memory_err = B_FALSE;
2767
2768 out:
2769         if (varray != NULL) {
2770                 int v;
2771
2772                 for (v = 0; v < vcount; v++)
2773                         nvlist_free(varray[v]);
2774                 free(varray);
2775         }
2776         zcmd_free_nvlists(&zc);
2777         if (zc_props)
2778                 nvlist_free(zc_props);
2779         if (newconfig)
2780                 nvlist_free(newconfig);
2781         if (freelist) {
2782                 nvlist_free(*newroot);
2783                 *newroot = NULL;
2784         }
2785
2786         if (retval != 0)
2787                 return (retval);
2788
2789         if (memory_err)
2790                 return (no_memory(hdl));
2791
2792         return (0);
2793 }
2794
2795 /*
2796  * Remove the given device.  Currently, this is supported only for hot spares
2797  * and level 2 cache devices.
2798  */
2799 int
2800 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2801 {
2802         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2803         char msg[1024];
2804         nvlist_t *tgt;
2805         boolean_t avail_spare, l2cache, islog;
2806         libzfs_handle_t *hdl = zhp->zpool_hdl;
2807         uint64_t version;
2808
2809         (void) snprintf(msg, sizeof (msg),
2810             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2811
2812         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2813         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2814             &islog)) == 0)
2815                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2816         /*
2817          * XXX - this should just go away.
2818          */
2819         if (!avail_spare && !l2cache && !islog) {
2820                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2821                     "only inactive hot spares, cache, top-level, "
2822                     "or log devices can be removed"));
2823                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2824         }
2825
2826         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
2827         if (islog && version < SPA_VERSION_HOLES) {
2828                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2829                     "pool must be upgrade to support log removal"));
2830                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
2831         }
2832
2833         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2834
2835         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2836                 return (0);
2837
2838         return (zpool_standard_error(hdl, errno, msg));
2839 }
2840
2841 /*
2842  * Clear the errors for the pool, or the particular device if specified.
2843  */
2844 int
2845 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
2846 {
2847         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2848         char msg[1024];
2849         nvlist_t *tgt;
2850         zpool_rewind_policy_t policy;
2851         boolean_t avail_spare, l2cache;
2852         libzfs_handle_t *hdl = zhp->zpool_hdl;
2853         nvlist_t *nvi = NULL;
2854         int error;
2855
2856         if (path)
2857                 (void) snprintf(msg, sizeof (msg),
2858                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2859                     path);
2860         else
2861                 (void) snprintf(msg, sizeof (msg),
2862                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2863                     zhp->zpool_name);
2864
2865         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2866         if (path) {
2867                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2868                     &l2cache, NULL)) == 0)
2869                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
2870
2871                 /*
2872                  * Don't allow error clearing for hot spares.  Do allow
2873                  * error clearing for l2cache devices.
2874                  */
2875                 if (avail_spare)
2876                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
2877
2878                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2879                     &zc.zc_guid) == 0);
2880         }
2881
2882         zpool_get_rewind_policy(rewindnvl, &policy);
2883         zc.zc_cookie = policy.zrp_request;
2884
2885         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
2886                 return (-1);
2887
2888         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
2889                 return (-1);
2890
2891         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
2892             errno == ENOMEM) {
2893                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
2894                         zcmd_free_nvlists(&zc);
2895                         return (-1);
2896                 }
2897         }
2898
2899         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
2900             errno != EPERM && errno != EACCES)) {
2901                 if (policy.zrp_request &
2902                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2903                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
2904                         zpool_rewind_exclaim(hdl, zc.zc_name,
2905                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
2906                             nvi);
2907                         nvlist_free(nvi);
2908                 }
2909                 zcmd_free_nvlists(&zc);
2910                 return (0);
2911         }
2912
2913         zcmd_free_nvlists(&zc);
2914         return (zpool_standard_error(hdl, errno, msg));
2915 }
2916
2917 /*
2918  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2919  */
2920 int
2921 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2922 {
2923         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2924         char msg[1024];
2925         libzfs_handle_t *hdl = zhp->zpool_hdl;
2926
2927         (void) snprintf(msg, sizeof (msg),
2928             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2929            (u_longlong_t)guid);
2930
2931         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2932         zc.zc_guid = guid;
2933         zc.zc_cookie = ZPOOL_NO_REWIND;
2934
2935         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2936                 return (0);
2937
2938         return (zpool_standard_error(hdl, errno, msg));
2939 }
2940
2941 /*
2942  * Convert from a devid string to a path.
2943  */
2944 static char *
2945 devid_to_path(char *devid_str)
2946 {
2947         ddi_devid_t devid;
2948         char *minor;
2949         char *path;
2950         devid_nmlist_t *list = NULL;
2951         int ret;
2952
2953         if (devid_str_decode(devid_str, &devid, &minor) != 0)
2954                 return (NULL);
2955
2956         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2957
2958         devid_str_free(minor);
2959         devid_free(devid);
2960
2961         if (ret != 0)
2962                 return (NULL);
2963
2964         if ((path = strdup(list[0].devname)) == NULL)
2965                 return (NULL);
2966
2967         devid_free_nmlist(list);
2968
2969         return (path);
2970 }
2971
2972 /*
2973  * Convert from a path to a devid string.
2974  */
2975 static char *
2976 path_to_devid(const char *path)
2977 {
2978         int fd;
2979         ddi_devid_t devid;
2980         char *minor, *ret;
2981
2982         if ((fd = open(path, O_RDONLY)) < 0)
2983                 return (NULL);
2984
2985         minor = NULL;
2986         ret = NULL;
2987         if (devid_get(fd, &devid) == 0) {
2988                 if (devid_get_minor_name(fd, &minor) == 0)
2989                         ret = devid_str_encode(devid, minor);
2990                 if (minor != NULL)
2991                         devid_str_free(minor);
2992                 devid_free(devid);
2993         }
2994         (void) close(fd);
2995
2996         return (ret);
2997 }
2998
2999 /*
3000  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3001  * ignore any failure here, since a common case is for an unprivileged user to
3002  * type 'zpool status', and we'll display the correct information anyway.
3003  */
3004 static void
3005 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3006 {
3007         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3008
3009         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3010         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3011         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3012             &zc.zc_guid) == 0);
3013
3014         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3015 }
3016
3017 /*
3018  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3019  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3020  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3021  * third case only occurs when preceded by a string matching the regular
3022  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
3023  */
3024 static char *
3025 strip_partition(libzfs_handle_t *hdl, char *path)
3026 {
3027         char *tmp = zfs_strdup(hdl, path);
3028         char *part = NULL, *d = NULL;
3029
3030         if ((part = strstr(tmp, "-part")) && part != tmp) {
3031                 d = part + 5;
3032         } else if ((part = strrchr(tmp, 'p')) &&
3033             part > tmp + 1 && isdigit(*(part-1))) {
3034                 d = part + 1;
3035         } else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
3036                 for (d = &tmp[2]; isalpha(*d); part = ++d);
3037         }
3038         if (part && d && *d != '\0') {
3039                 for (; isdigit(*d); d++);
3040                 if (*d == '\0')
3041                         *part = '\0';
3042         }
3043         return (tmp);
3044 }
3045
3046 #define PATH_BUF_LEN    64
3047
3048 /*
3049  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3050  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3051  * We also check if this is a whole disk, in which case we strip off the
3052  * trailing 's0' slice name.
3053  *
3054  * This routine is also responsible for identifying when disks have been
3055  * reconfigured in a new location.  The kernel will have opened the device by
3056  * devid, but the path will still refer to the old location.  To catch this, we
3057  * first do a path -> devid translation (which is fast for the common case).  If
3058  * the devid matches, we're done.  If not, we do a reverse devid -> path
3059  * translation and issue the appropriate ioctl() to update the path of the vdev.
3060  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3061  * of these checks.
3062  */
3063 char *
3064 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3065     boolean_t verbose)
3066 {
3067         char *path, *devid, *type;
3068         uint64_t value;
3069         char buf[PATH_BUF_LEN];
3070         vdev_stat_t *vs;
3071         uint_t vsc;
3072
3073         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3074             &value) == 0) {
3075                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3076                     &value) == 0);
3077                 (void) snprintf(buf, sizeof (buf), "%llu",
3078                     (u_longlong_t)value);
3079                 path = buf;
3080         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3081                 /*
3082                  * If the device is dead (faulted, offline, etc) then don't
3083                  * bother opening it.  Otherwise we may be forcing the user to
3084                  * open a misbehaving device, which can have undesirable
3085                  * effects.
3086                  */
3087                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3088                     (uint64_t **)&vs, &vsc) != 0 ||
3089                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3090                     zhp != NULL &&
3091                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3092                         /*
3093                          * Determine if the current path is correct.
3094                          */
3095                         char *newdevid = path_to_devid(path);
3096
3097                         if (newdevid == NULL ||
3098                             strcmp(devid, newdevid) != 0) {
3099                                 char *newpath;
3100
3101                                 if ((newpath = devid_to_path(devid)) != NULL) {
3102                                         /*
3103                                          * Update the path appropriately.
3104                                          */
3105                                         set_path(zhp, nv, newpath);
3106                                         if (nvlist_add_string(nv,
3107                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3108                                                 verify(nvlist_lookup_string(nv,
3109                                                     ZPOOL_CONFIG_PATH,
3110                                                     &path) == 0);
3111                                         free(newpath);
3112                                 }
3113                         }
3114
3115                         if (newdevid)
3116                                 devid_str_free(newdevid);
3117                 }
3118
3119                 /*
3120                  * For a block device only use the name.
3121                  */
3122                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3123                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3124                         path = strrchr(path, '/');
3125                         path++;
3126                 }
3127
3128                 /*
3129                  * Remove the partition from the path it this is a whole disk.
3130                  */
3131                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3132                     &value) == 0 && value) {
3133                         return strip_partition(hdl, path);
3134                 }
3135         } else {
3136                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3137
3138                 /*
3139                  * If it's a raidz device, we need to stick in the parity level.
3140                  */
3141                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3142                         char tmpbuf[PATH_BUF_LEN];
3143
3144                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3145                             &value) == 0);
3146                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%llu", path,
3147                             (u_longlong_t)value);
3148                         path = tmpbuf;
3149                 }
3150
3151                 /*
3152                  * We identify each top-level vdev by using a <type-id>
3153                  * naming convention.
3154                  */
3155                 if (verbose) {
3156                         uint64_t id;
3157
3158                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3159                             &id) == 0);
3160                         (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3161                             (u_longlong_t)id);
3162                         path = buf;
3163                 }
3164         }
3165
3166         return (zfs_strdup(hdl, path));
3167 }
3168
3169 static int
3170 zbookmark_compare(const void *a, const void *b)
3171 {
3172         return (memcmp(a, b, sizeof (zbookmark_t)));
3173 }
3174
3175 /*
3176  * Retrieve the persistent error log, uniquify the members, and return to the
3177  * caller.
3178  */
3179 int
3180 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3181 {
3182         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3183         uint64_t count;
3184         zbookmark_t *zb = NULL;
3185         int i;
3186
3187         /*
3188          * Retrieve the raw error list from the kernel.  If the number of errors
3189          * has increased, allocate more space and continue until we get the
3190          * entire list.
3191          */
3192         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3193             &count) == 0);
3194         if (count == 0)
3195                 return (0);
3196         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3197             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3198                 return (-1);
3199         zc.zc_nvlist_dst_size = count;
3200         (void) strcpy(zc.zc_name, zhp->zpool_name);
3201         for (;;) {
3202                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3203                     &zc) != 0) {
3204                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3205                         if (errno == ENOMEM) {
3206                                 count = zc.zc_nvlist_dst_size;
3207                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3208                                     zfs_alloc(zhp->zpool_hdl, count *
3209                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3210                                         return (-1);
3211                         } else {
3212                                 return (-1);
3213                         }
3214                 } else {
3215                         break;
3216                 }
3217         }
3218
3219         /*
3220          * Sort the resulting bookmarks.  This is a little confusing due to the
3221          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3222          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3223          * _not_ copied as part of the process.  So we point the start of our
3224          * array appropriate and decrement the total number of elements.
3225          */
3226         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3227             zc.zc_nvlist_dst_size;
3228         count -= zc.zc_nvlist_dst_size;
3229
3230         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3231
3232         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3233
3234         /*
3235          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3236          */
3237         for (i = 0; i < count; i++) {
3238                 nvlist_t *nv;
3239
3240                 /* ignoring zb_blkid and zb_level for now */
3241                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3242                     zb[i-1].zb_object == zb[i].zb_object)
3243                         continue;
3244
3245                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3246                         goto nomem;
3247                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3248                     zb[i].zb_objset) != 0) {
3249                         nvlist_free(nv);
3250                         goto nomem;
3251                 }
3252                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3253                     zb[i].zb_object) != 0) {
3254                         nvlist_free(nv);
3255                         goto nomem;
3256                 }
3257                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3258                         nvlist_free(nv);
3259                         goto nomem;
3260                 }
3261                 nvlist_free(nv);
3262         }
3263
3264         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3265         return (0);
3266
3267 nomem:
3268         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3269         return (no_memory(zhp->zpool_hdl));
3270 }
3271
3272 /*
3273  * Upgrade a ZFS pool to the latest on-disk version.
3274  */
3275 int
3276 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3277 {
3278         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3279         libzfs_handle_t *hdl = zhp->zpool_hdl;
3280
3281         (void) strcpy(zc.zc_name, zhp->zpool_name);
3282         zc.zc_cookie = new_version;
3283
3284         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3285                 return (zpool_standard_error_fmt(hdl, errno,
3286                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3287                     zhp->zpool_name));
3288         return (0);
3289 }
3290
3291 void
3292 zpool_set_history_str(const char *subcommand, int argc, char **argv,
3293     char *history_str)
3294 {
3295         int i;
3296
3297         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3298         for (i = 1; i < argc; i++) {
3299                 if (strlen(history_str) + 1 + strlen(argv[i]) >
3300                     HIS_MAX_RECORD_LEN)
3301                         break;
3302                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3303                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3304         }
3305 }
3306
3307 /*
3308  * Stage command history for logging.
3309  */
3310 int
3311 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3312 {
3313         if (history_str == NULL)
3314                 return (EINVAL);
3315
3316         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3317                 return (EINVAL);
3318
3319         if (hdl->libzfs_log_str != NULL)
3320                 free(hdl->libzfs_log_str);
3321
3322         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3323                 return (no_memory(hdl));
3324
3325         return (0);
3326 }
3327
3328 /*
3329  * Perform ioctl to get some command history of a pool.
3330  *
3331  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3332  * logical offset of the history buffer to start reading from.
3333  *
3334  * Upon return, 'off' is the next logical offset to read from and
3335  * 'len' is the actual amount of bytes read into 'buf'.
3336  */
3337 static int
3338 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3339 {
3340         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3341         libzfs_handle_t *hdl = zhp->zpool_hdl;
3342
3343         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3344
3345         zc.zc_history = (uint64_t)(uintptr_t)buf;
3346         zc.zc_history_len = *len;
3347         zc.zc_history_offset = *off;
3348
3349         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3350                 switch (errno) {
3351                 case EPERM:
3352                         return (zfs_error_fmt(hdl, EZFS_PERM,
3353                             dgettext(TEXT_DOMAIN,
3354                             "cannot show history for pool '%s'"),
3355                             zhp->zpool_name));
3356                 case ENOENT:
3357                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3358                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3359                             "'%s'"), zhp->zpool_name));
3360                 case ENOTSUP:
3361                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3362                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3363                             "'%s', pool must be upgraded"), zhp->zpool_name));
3364                 default:
3365                         return (zpool_standard_error_fmt(hdl, errno,
3366                             dgettext(TEXT_DOMAIN,
3367                             "cannot get history for '%s'"), zhp->zpool_name));
3368                 }
3369         }
3370
3371         *len = zc.zc_history_len;
3372         *off = zc.zc_history_offset;
3373
3374         return (0);
3375 }
3376
3377 /*
3378  * Process the buffer of nvlists, unpacking and storing each nvlist record
3379  * into 'records'.  'leftover' is set to the number of bytes that weren't
3380  * processed as there wasn't a complete record.
3381  */
3382 int
3383 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3384     nvlist_t ***records, uint_t *numrecords)
3385 {
3386         uint64_t reclen;
3387         nvlist_t *nv;
3388         int i;
3389
3390         while (bytes_read > sizeof (reclen)) {
3391
3392                 /* get length of packed record (stored as little endian) */
3393                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3394                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3395
3396                 if (bytes_read < sizeof (reclen) + reclen)
3397                         break;
3398
3399                 /* unpack record */
3400                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3401                         return (ENOMEM);
3402                 bytes_read -= sizeof (reclen) + reclen;
3403                 buf += sizeof (reclen) + reclen;
3404
3405                 /* add record to nvlist array */
3406                 (*numrecords)++;
3407                 if (ISP2(*numrecords + 1)) {
3408                         *records = realloc(*records,
3409                             *numrecords * 2 * sizeof (nvlist_t *));
3410                 }
3411                 (*records)[*numrecords - 1] = nv;
3412         }
3413
3414         *leftover = bytes_read;
3415         return (0);
3416 }
3417
3418 #define HIS_BUF_LEN     (128*1024)
3419
3420 /*
3421  * Retrieve the command history of a pool.
3422  */
3423 int
3424 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3425 {
3426         char buf[HIS_BUF_LEN];
3427         uint64_t off = 0;
3428         nvlist_t **records = NULL;
3429         uint_t numrecords = 0;
3430         int err, i;
3431
3432         do {
3433                 uint64_t bytes_read = sizeof (buf);
3434                 uint64_t leftover;
3435
3436                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3437                         break;
3438
3439                 /* if nothing else was read in, we're at EOF, just return */
3440                 if (!bytes_read)
3441                         break;
3442
3443                 if ((err = zpool_history_unpack(buf, bytes_read,
3444                     &leftover, &records, &numrecords)) != 0)
3445                         break;
3446                 off -= leftover;
3447
3448                 /* CONSTCOND */
3449         } while (1);
3450
3451         if (!err) {
3452                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3453                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3454                     records, numrecords) == 0);
3455         }
3456         for (i = 0; i < numrecords; i++)
3457                 nvlist_free(records[i]);
3458         free(records);
3459
3460         return (err);
3461 }
3462
3463 /*
3464  * Retrieve the next event.  If there is a new event available 'nvp' will
3465  * contain a newly allocated nvlist and 'dropped' will be set to the number
3466  * of missed events since the last call to this function.  When 'nvp' is
3467  * set to NULL it indicates no new events are available.  In either case
3468  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3469  * the case of a fatal error the function will return a non-zero value.
3470  * When the function is called in blocking mode it will not return until
3471  * a new event is available.
3472  */
3473 int
3474 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3475     int *dropped, int block, int cleanup_fd)
3476 {
3477         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3478         int error = 0;
3479
3480         *nvp = NULL;
3481         *dropped = 0;
3482         zc.zc_cleanup_fd = cleanup_fd;
3483
3484         if (!block)
3485                 zc.zc_guid = ZEVENT_NONBLOCK;
3486
3487         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3488                 return (-1);
3489
3490 retry:
3491         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3492                 switch (errno) {
3493                 case ESHUTDOWN:
3494                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3495                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3496                         goto out;
3497                 case ENOENT:
3498                         /* Blocking error case should not occur */
3499                         if (block)
3500                                 error = zpool_standard_error_fmt(hdl, errno,
3501                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3502
3503                         goto out;
3504                 case ENOMEM:
3505                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3506                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3507                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3508                                 goto out;
3509                         } else {
3510                                 goto retry;
3511                         }
3512                 default:
3513                         error = zpool_standard_error_fmt(hdl, errno,
3514                             dgettext(TEXT_DOMAIN, "cannot get event"));
3515                         goto out;
3516                 }
3517         }
3518
3519         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3520         if (error != 0)
3521                 goto out;
3522
3523         *dropped = (int)zc.zc_cookie;
3524 out:
3525         zcmd_free_nvlists(&zc);
3526
3527         return (error);
3528 }
3529
3530 /*
3531  * Clear all events.
3532  */
3533 int
3534 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3535 {
3536         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3537         char msg[1024];
3538
3539         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3540             "cannot clear events"));
3541
3542         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3543                 return (zpool_standard_error_fmt(hdl, errno, msg));
3544
3545         if (count != NULL)
3546                 *count = (int)zc.zc_cookie; /* # of events cleared */
3547
3548         return (0);
3549 }
3550
3551 void
3552 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3553     char *pathname, size_t len)
3554 {
3555         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
3556         boolean_t mounted = B_FALSE;
3557         char *mntpnt = NULL;
3558         char dsname[MAXNAMELEN];
3559
3560         if (dsobj == 0) {
3561                 /* special case for the MOS */
3562                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
3563                 return;
3564         }
3565
3566         /* get the dataset's name */
3567         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3568         zc.zc_obj = dsobj;
3569         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3570             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3571                 /* just write out a path of two object numbers */
3572                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3573                     (longlong_t)dsobj, (longlong_t)obj);
3574                 return;
3575         }
3576         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3577
3578         /* find out if the dataset is mounted */
3579         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3580
3581         /* get the corrupted object's path */
3582         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3583         zc.zc_obj = obj;
3584         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3585             &zc) == 0) {
3586                 if (mounted) {
3587                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3588                             zc.zc_value);
3589                 } else {
3590                         (void) snprintf(pathname, len, "%s:%s",
3591                             dsname, zc.zc_value);
3592                 }
3593         } else {
3594                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
3595         }
3596         free(mntpnt);
3597 }
3598
3599 /*
3600  * Read the EFI label from the config, if a label does not exist then
3601  * pass back the error to the caller. If the caller has passed a non-NULL
3602  * diskaddr argument then we set it to the starting address of the EFI
3603  * partition.
3604  */
3605 static int
3606 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3607 {
3608         char *path;
3609         int fd;
3610         char diskname[MAXPATHLEN];
3611         int err = -1;
3612
3613         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3614                 return (err);
3615
3616         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3617             strrchr(path, '/'));
3618         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3619                 struct dk_gpt *vtoc;
3620
3621                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3622                         if (sb != NULL)
3623                                 *sb = vtoc->efi_parts[0].p_start;
3624                         efi_free(vtoc);
3625                 }
3626                 (void) close(fd);
3627         }
3628         return (err);
3629 }
3630
3631 /*
3632  * determine where a partition starts on a disk in the current
3633  * configuration
3634  */
3635 static diskaddr_t
3636 find_start_block(nvlist_t *config)
3637 {
3638         nvlist_t **child;
3639         uint_t c, children;
3640         diskaddr_t sb = MAXOFFSET_T;
3641         uint64_t wholedisk;
3642
3643         if (nvlist_lookup_nvlist_array(config,
3644             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3645                 if (nvlist_lookup_uint64(config,
3646                     ZPOOL_CONFIG_WHOLE_DISK,
3647                     &wholedisk) != 0 || !wholedisk) {
3648                         return (MAXOFFSET_T);
3649                 }
3650                 if (read_efi_label(config, &sb) < 0)
3651                         sb = MAXOFFSET_T;
3652                 return (sb);
3653         }
3654
3655         for (c = 0; c < children; c++) {
3656                 sb = find_start_block(child[c]);
3657                 if (sb != MAXOFFSET_T) {
3658                         return (sb);
3659                 }
3660         }
3661         return (MAXOFFSET_T);
3662 }
3663
3664 int
3665 zpool_label_disk_wait(char *path, int timeout)
3666 {
3667         struct stat64 statbuf;
3668         int i;
3669
3670         /*
3671          * Wait timeout miliseconds for a newly created device to be available
3672          * from the given path.  There is a small window when a /dev/ device
3673          * will exist and the udev link will not, so we must wait for the
3674          * symlink.  Depending on the udev rules this may take a few seconds.
3675          */
3676         for (i = 0; i < timeout; i++) {
3677                 usleep(1000);
3678
3679                 errno = 0;
3680                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
3681                         return (0);
3682         }
3683
3684         return (ENOENT);
3685 }
3686
3687 int
3688 zpool_label_disk_check(char *path)
3689 {
3690         struct dk_gpt *vtoc;
3691         int fd, err;
3692
3693         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
3694                 return errno;
3695
3696         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
3697                 (void) close(fd);
3698                 return err;
3699         }
3700
3701         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
3702                 efi_free(vtoc);
3703                 (void) close(fd);
3704                 return EIDRM;
3705         }
3706
3707         efi_free(vtoc);
3708         (void) close(fd);
3709         return 0;
3710 }
3711
3712 /*
3713  * Label an individual disk.  The name provided is the short name,
3714  * stripped of any leading /dev path.
3715  */
3716 int
3717 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
3718 {
3719         char path[MAXPATHLEN];
3720         struct dk_gpt *vtoc;
3721         int rval, fd;
3722         size_t resv = EFI_MIN_RESV_SIZE;
3723         uint64_t slice_size;
3724         diskaddr_t start_block;
3725         char errbuf[1024];
3726
3727         /* prepare an error message just in case */
3728         (void) snprintf(errbuf, sizeof (errbuf),
3729             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3730
3731         if (zhp) {
3732                 nvlist_t *nvroot;
3733
3734                 if (pool_is_bootable(zhp)) {
3735                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3736                             "EFI labeled devices are not supported on root "
3737                             "pools."));
3738                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
3739                 }
3740
3741                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
3742                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
3743
3744                 if (zhp->zpool_start_block == 0)
3745                         start_block = find_start_block(nvroot);
3746                 else
3747                         start_block = zhp->zpool_start_block;
3748                 zhp->zpool_start_block = start_block;
3749         } else {
3750                 /* new pool */
3751                 start_block = NEW_START_BLOCK;
3752         }
3753
3754         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
3755             BACKUP_SLICE);
3756
3757         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
3758                 /*
3759                  * This shouldn't happen.  We've long since verified that this
3760                  * is a valid device.
3761                  */
3762                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3763                     "unable to open device '%s': %d"), path, errno);
3764                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
3765         }
3766
3767         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
3768                 /*
3769                  * The only way this can fail is if we run out of memory, or we
3770                  * were unable to read the disk's capacity
3771                  */
3772                 if (errno == ENOMEM)
3773                         (void) no_memory(hdl);
3774
3775                 (void) close(fd);
3776                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3777                     "unable to read disk capacity"), name);
3778
3779                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
3780         }
3781
3782         slice_size = vtoc->efi_last_u_lba + 1;
3783         slice_size -= EFI_MIN_RESV_SIZE;
3784         if (start_block == MAXOFFSET_T)
3785                 start_block = NEW_START_BLOCK;
3786         slice_size -= start_block;
3787
3788         vtoc->efi_parts[0].p_start = start_block;
3789         vtoc->efi_parts[0].p_size = slice_size;
3790
3791         /*
3792          * Why we use V_USR: V_BACKUP confuses users, and is considered
3793          * disposable by some EFI utilities (since EFI doesn't have a backup
3794          * slice).  V_UNASSIGNED is supposed to be used only for zero size
3795          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
3796          * etc. were all pretty specific.  V_USR is as close to reality as we
3797          * can get, in the absence of V_OTHER.
3798          */
3799         vtoc->efi_parts[0].p_tag = V_USR;
3800         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
3801
3802         vtoc->efi_parts[8].p_start = slice_size + start_block;
3803         vtoc->efi_parts[8].p_size = resv;
3804         vtoc->efi_parts[8].p_tag = V_RESERVED;
3805
3806         if ((rval = efi_write(fd, vtoc)) != 0) {
3807                 /*
3808                  * Some block drivers (like pcata) may not support EFI
3809                  * GPT labels.  Print out a helpful error message dir-
3810                  * ecting the user to manually label the disk and give
3811                  * a specific slice.
3812                  */
3813                 (void) close(fd);
3814                 efi_free(vtoc);
3815
3816                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
3817                     "parted(8) and then provide a specific slice: %d"), rval);
3818                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3819         }
3820
3821         (void) close(fd);
3822         efi_free(vtoc);
3823
3824         /* Wait for the first expected slice to appear. */
3825         (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
3826             isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
3827         rval = zpool_label_disk_wait(path, 3000);
3828         if (rval) {
3829                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
3830                     "detect device partitions on '%s': %d"), path, rval);
3831                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3832         }
3833
3834         /* We can't be to paranoid.  Read the label back and verify it. */
3835         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
3836         rval = zpool_label_disk_check(path);
3837         if (rval) {
3838                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
3839                     "EFI label on '%s' is damaged.  Ensure\nthis device "
3840                     "is not in in use, and is functioning properly: %d"),
3841                     path, rval);
3842                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3843         }
3844
3845         return 0;
3846 }