Fix zpool_read_label()
[zfs.git] / lib / libzfs / libzfs_import.c
index ee00648..9e79bd9 100644 (file)
@@ -20,6 +20,8 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -85,6 +87,7 @@ typedef struct pool_entry {
 typedef struct name_entry {
        char                    *ne_name;
        uint64_t                ne_guid;
+       uint64_t                ne_order;
        struct name_entry       *ne_next;
 } name_entry_t;
 
@@ -130,7 +133,6 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
        uint64_t guid;
        name_entry_t *ne, *best;
        char *path, *devid;
-       int matched;
 
        if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
            &child, &children) == 0) {
@@ -146,44 +148,33 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
         * the path and see if we can calculate a new devid.
         *
         * There may be multiple names associated with a particular guid, in
-        * which case we have overlapping slices or multiple paths to the same
-        * disk.  If this is the case, then we want to pick the path that is
-        * the most similar to the original, where "most similar" is the number
-        * of matching characters starting from the end of the path.  This will
-        * preserve slice numbers even if the disks have been reorganized, and
-        * will also catch preferred disk names if multiple paths exist.
+        * which case we have overlapping partitions or multiple paths to the
+        * same disk.  In this case we prefer to use the path name which
+        * matches the ZPOOL_CONFIG_PATH.  If no matching entry is found we
+        * use the lowest order device which corresponds to the first match
+        * while traversing the ZPOOL_IMPORT_PATH search path.
         */
        verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
        if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
                path = NULL;
 
-       matched = 0;
        best = NULL;
        for (ne = names; ne != NULL; ne = ne->ne_next) {
                if (ne->ne_guid == guid) {
-                       const char *src, *dst;
-                       int count;
 
                        if (path == NULL) {
                                best = ne;
                                break;
                        }
 
-                       src = ne->ne_name + strlen(ne->ne_name) - 1;
-                       dst = path + strlen(path) - 1;
-                       for (count = 0; src >= ne->ne_name && dst >= path;
-                           src--, dst--, count++)
-                               if (*src != *dst)
-                                       break;
-
-                       /*
-                        * At this point, 'count' is the number of characters
-                        * matched from the end.
-                        */
-                       if (count > matched || best == NULL) {
+                       if ((strlen(path) == strlen(ne->ne_name)) &&
+                           !strncmp(path, ne->ne_name, strlen(path))) {
                                best = ne;
-                               matched = count;
+                               break;
                        }
+
+                       if (best == NULL || ne->ne_order < best->ne_order)
+                               best = ne;
                }
        }
 
@@ -209,7 +200,7 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
  */
 static int
 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
-    nvlist_t *config)
+    int order, nvlist_t *config)
 {
        uint64_t pool_guid, vdev_guid, top_guid, txg, state;
        pool_entry_t *pe;
@@ -234,6 +225,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
                        return (-1);
                }
                ne->ne_guid = vdev_guid;
+               ne->ne_order = order;
                ne->ne_next = pl->names;
                pl->names = ne;
                return (0);
@@ -335,6 +327,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
        }
 
        ne->ne_guid = vdev_guid;
+       ne->ne_order = order;
        ne->ne_next = pl->names;
        pl->names = ne;
 
@@ -441,8 +434,8 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
        uint_t i, nspares, nl2cache;
        boolean_t config_seen;
        uint64_t best_txg;
-       char *name, *hostname;
-       uint64_t version, guid;
+       char *name, *hostname = NULL;
+       uint64_t guid;
        uint_t children = 0;
        nvlist_t **child = NULL;
        uint_t holes;
@@ -528,47 +521,48 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
                                 * configuration:
                                 *
                                 *      version
-                                *      pool guid
-                                *      name
-                                *      pool state
+                                *      pool guid
+                                *      name
+                                *      comment (if available)
+                                *      pool state
                                 *      hostid (if available)
                                 *      hostname (if available)
                                 */
-                               uint64_t state;
+                               uint64_t state, version;
+                               char *comment = NULL;
+
+                               version = fnvlist_lookup_uint64(tmp,
+                                   ZPOOL_CONFIG_VERSION);
+                               fnvlist_add_uint64(config,
+                                   ZPOOL_CONFIG_VERSION, version);
+                               guid = fnvlist_lookup_uint64(tmp,
+                                   ZPOOL_CONFIG_POOL_GUID);
+                               fnvlist_add_uint64(config,
+                                   ZPOOL_CONFIG_POOL_GUID, guid);
+                               name = fnvlist_lookup_string(tmp,
+                                   ZPOOL_CONFIG_POOL_NAME);
+                               fnvlist_add_string(config,
+                                   ZPOOL_CONFIG_POOL_NAME, name);
+
+                               if (nvlist_lookup_string(tmp,
+                                   ZPOOL_CONFIG_COMMENT, &comment) == 0)
+                                       fnvlist_add_string(config,
+                                           ZPOOL_CONFIG_COMMENT, comment);
+
+                               state = fnvlist_lookup_uint64(tmp,
+                                   ZPOOL_CONFIG_POOL_STATE);
+                               fnvlist_add_uint64(config,
+                                   ZPOOL_CONFIG_POOL_STATE, state);
 
-                               verify(nvlist_lookup_uint64(tmp,
-                                   ZPOOL_CONFIG_VERSION, &version) == 0);
-                               if (nvlist_add_uint64(config,
-                                   ZPOOL_CONFIG_VERSION, version) != 0)
-                                       goto nomem;
-                               verify(nvlist_lookup_uint64(tmp,
-                                   ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
-                               if (nvlist_add_uint64(config,
-                                   ZPOOL_CONFIG_POOL_GUID, guid) != 0)
-                                       goto nomem;
-                               verify(nvlist_lookup_string(tmp,
-                                   ZPOOL_CONFIG_POOL_NAME, &name) == 0);
-                               if (nvlist_add_string(config,
-                                   ZPOOL_CONFIG_POOL_NAME, name) != 0)
-                                       goto nomem;
-                               verify(nvlist_lookup_uint64(tmp,
-                                   ZPOOL_CONFIG_POOL_STATE, &state) == 0);
-                               if (nvlist_add_uint64(config,
-                                   ZPOOL_CONFIG_POOL_STATE, state) != 0)
-                                       goto nomem;
                                hostid = 0;
                                if (nvlist_lookup_uint64(tmp,
                                    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
-                                       if (nvlist_add_uint64(config,
-                                           ZPOOL_CONFIG_HOSTID, hostid) != 0)
-                                               goto nomem;
-                                       verify(nvlist_lookup_string(tmp,
-                                           ZPOOL_CONFIG_HOSTNAME,
-                                           &hostname) == 0);
-                                       if (nvlist_add_string(config,
-                                           ZPOOL_CONFIG_HOSTNAME,
-                                           hostname) != 0)
-                                               goto nomem;
+                                       fnvlist_add_uint64(config,
+                                           ZPOOL_CONFIG_HOSTID, hostid);
+                                       hostname = fnvlist_lookup_string(tmp,
+                                           ZPOOL_CONFIG_HOSTNAME);
+                                       fnvlist_add_string(config,
+                                           ZPOOL_CONFIG_HOSTNAME, hostname);
                                }
 
                                config_seen = B_TRUE;
@@ -868,7 +862,7 @@ zpool_read_label(int fd, nvlist_t **config)
 
        *config = NULL;
 
-       if (fstat64(fd, &statbuf) == -1)
+       if (fstat64_blk(fd, &statbuf) == -1)
                return (0);
        size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
 
@@ -906,6 +900,36 @@ zpool_read_label(int fd, nvlist_t **config)
        return (0);
 }
 
+/*
+ * Given a file descriptor, clear (zero) the label information.  This function
+ * is used in the appliance stack as part of the ZFS sysevent module and
+ * to implement the "zpool labelclear" command.
+ */
+int
+zpool_clear_label(int fd)
+{
+       struct stat64 statbuf;
+       int l;
+       vdev_label_t *label;
+       uint64_t size;
+
+       if (fstat64_blk(fd, &statbuf) == -1)
+               return (0);
+       size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+       if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
+               return (-1);
+
+       for (l = 0; l < VDEV_LABELS; l++) {
+               if (pwrite64(fd, label, sizeof (vdev_label_t),
+                   label_offset(size, l)) != sizeof (vdev_label_t))
+                       return (-1);
+       }
+
+       free(label);
+       return (0);
+}
+
 #ifdef HAVE_LIBBLKID
 /*
  * Use libblkid to quickly search for zfs devices
@@ -962,7 +986,7 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
                }
 
                if (config != NULL) {
-                       err = add_config(hdl, pools, devname, config);
+                       err = add_config(hdl, pools, devname, 0, config);
                        if (err != 0)
                                goto err_blkid3;
                }
@@ -977,6 +1001,17 @@ err_blkid1:
 }
 #endif /* HAVE_LIBBLKID */
 
+char *
+zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
+       "/dev/disk/by-vdev",    /* Custom rules, use first if they exist */
+       "/dev/mapper",          /* Use multipath devices before components */
+       "/dev/disk/by-uuid",    /* Single unique entry and persistent */
+       "/dev/disk/by-id",      /* May be multiple entries and persistent */
+       "/dev/disk/by-path",    /* Encodes physical location and persistent */
+       "/dev/disk/by-label",   /* Custom persistent labels */
+       "/dev"                  /* UNSAFE device names will change */
+};
+
 /*
  * Given a list of directories to search, find all pools stored on disk.  This
  * includes partial pools which are not available to import.  If no args are
@@ -995,7 +1030,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
        size_t pathleft;
        struct stat64 statbuf;
        nvlist_t *ret = NULL, *config;
-       static char *default_dir = DISK_ROOT;
        int fd;
        pool_list_t pools = { 0 };
        pool_entry_t *pe, *penext;
@@ -1015,8 +1049,9 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                    dgettext(TEXT_DOMAIN, "blkid failure falling back "
                    "to manual probing"));
 #endif /* HAVE_LIBBLKID */
-               dirs = 1;
-               dir = &default_dir;
+
+               dir = zpool_default_import_path;
+               dirs = DEFAULT_IMPORT_PATH_SIZE;
        }
 
        /*
@@ -1030,6 +1065,12 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 
                /* use realpath to normalize the path */
                if (realpath(dir[i], path) == 0) {
+
+                       /* it is safe to skip missing search paths */
+                       if (errno == ENOENT)
+                               continue;
+
+                       zfs_error_aux(hdl, strerror(errno));
                        (void) zfs_error_fmt(hdl, EZFS_BADPATH,
                            dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
                        goto error;
@@ -1078,6 +1119,10 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                         * parport* - Parallel port interface.
                         * lp*      - Printer interface.
                         * fd*      - Floppy interface.
+                        * hpet     - High Precision Event Timer, crashes qemu
+                        *            when accessed from a virtual machine.
+                        * core     - Symlink to /proc/kcore, causes a crash
+                        *            when access from Xen dom0.
                         */
                        if ((strncmp(name, "watchdog", 8) == 0) ||
                            (strncmp(name, "fuse", 4) == 0)     ||
@@ -1086,22 +1131,22 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                            (strncmp(name, "vcs", 3) == 0)      ||
                            (strncmp(name, "parport", 7) == 0)  ||
                            (strncmp(name, "lp", 2) == 0)       ||
-                           (strncmp(name, "fd", 2) == 0))
-                               continue;
-
-                       if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+                           (strncmp(name, "fd", 2) == 0)       ||
+                           (strncmp(name, "hpet", 4) == 0)     ||
+                           (strncmp(name, "core", 4) == 0))
                                continue;
 
                        /*
                         * Ignore failed stats.  We only want regular
-                        * files and block devs.
+                        * files and block devices.
                         */
-                       if (fstat64(fd, &statbuf) != 0 ||
+                       if ((fstatat64(dfd, name, &statbuf, 0) != 0) ||
                            (!S_ISREG(statbuf.st_mode) &&
-                           !S_ISBLK(statbuf.st_mode))) {
-                               (void) close(fd);
+                           !S_ISBLK(statbuf.st_mode)))
+                               continue;
+
+                       if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
                                continue;
-                       }
 
                        if ((zpool_read_label(fd, &config)) != 0) {
                                (void) close(fd);
@@ -1113,14 +1158,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 
                        if (config != NULL) {
                                boolean_t matched = B_TRUE;
+                               char *pname;
+
+                               if ((iarg->poolname != NULL) &&
+                                   (nvlist_lookup_string(config,
+                                   ZPOOL_CONFIG_POOL_NAME, &pname) == 0)) {
 
-                               if (iarg->poolname != NULL) {
-                                       char *pname;
+                                       if (strcmp(iarg->poolname, pname))
+                                              matched = B_FALSE;
 
-                                       matched = nvlist_lookup_string(config,
-                                           ZPOOL_CONFIG_POOL_NAME,
-                                           &pname) == 0 &&
-                                           strcmp(iarg->poolname, pname) == 0;
                                } else if (iarg->guid != 0) {
                                        uint64_t this_guid;
 
@@ -1136,7 +1182,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                                }
                                /* use the non-raw path for the config */
                                (void) strlcpy(end, name, pathleft);
-                               if (add_config(hdl, &pools, path, config) != 0)
+                               if (add_config(hdl, &pools, path, i+1, config))
                                        goto error;
                        }
                }