*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
/*
#include <sys/vtoc.h>
#include <sys/dktp/fdisk.h>
#include <sys/efi_partition.h>
-#include <thread_pool.h>
#include <sys/vdev_impl.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#endif
#include "libzfs.h"
#include "libzfs_impl.h"
typedef struct name_entry {
char *ne_name;
uint64_t ne_guid;
+ uint64_t ne_order;
struct name_entry *ne_next;
} name_entry_t;
uint64_t guid;
name_entry_t *ne, *best;
char *path, *devid;
- int matched;
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
* the path and see if we can calculate a new devid.
*
* There may be multiple names associated with a particular guid, in
- * which case we have overlapping slices or multiple paths to the same
- * disk. If this is the case, then we want to pick the path that is
- * the most similar to the original, where "most similar" is the number
- * of matching characters starting from the end of the path. This will
- * preserve slice numbers even if the disks have been reorganized, and
- * will also catch preferred disk names if multiple paths exist.
+ * which case we have overlapping partitions or multiple paths to the
+ * same disk. In this case we prefer to use the path name which
+ * matches the ZPOOL_CONFIG_PATH. If no matching entry is found we
+ * use the lowest order device which corresponds to the first match
+ * while traversing the ZPOOL_IMPORT_PATH search path.
*/
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
path = NULL;
- matched = 0;
best = NULL;
for (ne = names; ne != NULL; ne = ne->ne_next) {
if (ne->ne_guid == guid) {
- const char *src, *dst;
- int count;
if (path == NULL) {
best = ne;
break;
}
- src = ne->ne_name + strlen(ne->ne_name) - 1;
- dst = path + strlen(path) - 1;
- for (count = 0; src >= ne->ne_name && dst >= path;
- src--, dst--, count++)
- if (*src != *dst)
- break;
-
- /*
- * At this point, 'count' is the number of characters
- * matched from the end.
- */
- if (count > matched || best == NULL) {
+ if ((strlen(path) == strlen(ne->ne_name)) &&
+ !strncmp(path, ne->ne_name, strlen(path))) {
best = ne;
- matched = count;
+ break;
}
+
+ if (best == NULL || ne->ne_order < best->ne_order)
+ best = ne;
}
}
*/
static int
add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
- nvlist_t *config)
+ int order, nvlist_t *config)
{
uint64_t pool_guid, vdev_guid, top_guid, txg, state;
pool_entry_t *pe;
return (-1);
}
ne->ne_guid = vdev_guid;
+ ne->ne_order = order;
ne->ne_next = pl->names;
pl->names = ne;
return (0);
}
ne->ne_guid = vdev_guid;
+ ne->ne_order = order;
ne->ne_next = pl->names;
pl->names = ne;
uint_t i, nspares, nl2cache;
boolean_t config_seen;
uint64_t best_txg;
- char *name, *hostname;
+ char *name, *hostname, *comment;
uint64_t version, guid;
uint_t children = 0;
nvlist_t **child = NULL;
* version
* pool guid
* name
+ * comment (if available)
* pool state
* hostid (if available)
* hostname (if available)
if (nvlist_add_string(config,
ZPOOL_CONFIG_POOL_NAME, name) != 0)
goto nomem;
+
+ /*
+ * COMMENT is optional, don't bail if it's not
+ * there, instead, set it to NULL.
+ */
+ if (nvlist_lookup_string(tmp,
+ ZPOOL_CONFIG_COMMENT, &comment) != 0)
+ comment = NULL;
+ else if (nvlist_add_string(config,
+ ZPOOL_CONFIG_COMMENT, comment) != 0)
+ goto nomem;
+
verify(nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
if (nvlist_add_uint64(config,
ZPOOL_CONFIG_POOL_STATE, state) != 0)
goto nomem;
+
hostid = 0;
if (nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
return (0);
}
-typedef struct rdsk_node {
- char *rn_name;
- int rn_dfd;
- libzfs_handle_t *rn_hdl;
- nvlist_t *rn_config;
- avl_tree_t *rn_avl;
- avl_node_t rn_node;
- boolean_t rn_nozpool;
-} rdsk_node_t;
-
+#ifdef HAVE_LIBBLKID
+/*
+ * Use libblkid to quickly search for zfs devices
+ */
static int
-slice_cache_compare(const void *arg1, const void *arg2)
-{
- const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
- const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
- char *nm1slice, *nm2slice;
- int rv;
-
- /*
- * slices zero and two are the most likely to provide results,
- * so put those first
- */
- nm1slice = strstr(nm1, "s0");
- nm2slice = strstr(nm2, "s0");
- if (nm1slice && !nm2slice) {
- return (-1);
- }
- if (!nm1slice && nm2slice) {
- return (1);
- }
- nm1slice = strstr(nm1, "s2");
- nm2slice = strstr(nm2, "s2");
- if (nm1slice && !nm2slice) {
- return (-1);
- }
- if (!nm1slice && nm2slice) {
- return (1);
- }
-
- rv = strcmp(nm1, nm2);
- if (rv == 0)
- return (0);
- return (rv > 0 ? 1 : -1);
-}
-
-static void
-check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
- diskaddr_t size, uint_t blksz)
-{
- rdsk_node_t tmpnode;
- rdsk_node_t *node;
- char sname[MAXNAMELEN];
-
- tmpnode.rn_name = &sname[0];
- (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
- diskname, partno);
- /*
- * protect against division by zero for disk labels that
- * contain a bogus sector size
- */
- if (blksz == 0)
- blksz = DEV_BSIZE;
- /* too small to contain a zpool? */
- if ((size < (SPA_MINDEVSIZE / blksz)) &&
- (node = avl_find(r, &tmpnode, NULL)))
- node->rn_nozpool = B_TRUE;
-}
-
-static void
-nozpool_all_slices(avl_tree_t *r, const char *sname)
-{
- char diskname[MAXNAMELEN];
- char *ptr;
- int i;
-
- (void) strncpy(diskname, sname, MAXNAMELEN);
- if (((ptr = strrchr(diskname, 's')) == NULL) &&
- ((ptr = strrchr(diskname, 'p')) == NULL))
- return;
- ptr[0] = 's';
- ptr[1] = '\0';
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i, 0, 1);
- ptr[0] = 'p';
- for (i = 0; i <= FD_NUMPART; i++)
- check_one_slice(r, diskname, i, 0, 1);
-}
-
-static void
-check_slices(avl_tree_t *r, int fd, const char *sname)
-{
- struct extvtoc vtoc;
- struct dk_gpt *gpt;
- char diskname[MAXNAMELEN];
- char *ptr;
- int i;
-
- (void) strncpy(diskname, sname, MAXNAMELEN);
- if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
- return;
- ptr[1] = '\0';
-
- if (read_extvtoc(fd, &vtoc) >= 0) {
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i,
- vtoc.v_part[i].p_size, vtoc.v_sectorsz);
- } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
- /*
- * on x86 we'll still have leftover links that point
- * to slices s[9-15], so use NDKMAP instead
- */
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i,
- gpt->efi_parts[i].p_size, gpt->efi_lbasize);
- /* nodes p[1-4] are never used with EFI labels */
- ptr[0] = 'p';
- for (i = 1; i <= FD_NUMPART; i++)
- check_one_slice(r, diskname, i, 0, 1);
- efi_free(gpt);
- }
-}
-
-static void
-zpool_open_func(void *arg)
+zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
{
- rdsk_node_t *rn = arg;
- struct stat64 statbuf;
+ blkid_cache cache;
+ blkid_dev_iterate iter;
+ blkid_dev dev;
+ const char *devname;
nvlist_t *config;
- int fd;
+ int fd, err;
- if (rn->rn_nozpool)
- return;
- if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
- /* symlink to a device that's no longer there */
- if (errno == ENOENT)
- nozpool_all_slices(rn->rn_avl, rn->rn_name);
- return;
- }
- /*
- * Ignore failed stats. We only want regular
- * files, character devs and block devs.
- */
- if (fstat64(fd, &statbuf) != 0 ||
- (!S_ISREG(statbuf.st_mode) &&
- !S_ISCHR(statbuf.st_mode) &&
- !S_ISBLK(statbuf.st_mode))) {
- (void) close(fd);
- return;
- }
- /* this file is too small to hold a zpool */
- if (S_ISREG(statbuf.st_mode) &&
- statbuf.st_size < SPA_MINDEVSIZE) {
- (void) close(fd);
- return;
- } else if (!S_ISREG(statbuf.st_mode)) {
- /*
- * Try to read the disk label first so we don't have to
- * open a bunch of minor nodes that can't have a zpool.
- */
- check_slices(rn->rn_avl, fd, rn->rn_name);
+ err = blkid_get_cache(&cache, NULL);
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
+ goto err_blkid1;
}
- if ((zpool_read_label(fd, &config)) != 0) {
- (void) close(fd);
- (void) no_memory(rn->rn_hdl);
- return;
+ err = blkid_probe_all(cache);
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
+ goto err_blkid2;
}
- (void) close(fd);
+ iter = blkid_dev_iterate_begin(cache);
+ if (iter == NULL) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
+ goto err_blkid2;
+ }
- rn->rn_config = config;
- if (config != NULL) {
- assert(rn->rn_nozpool == B_FALSE);
+ err = blkid_dev_set_search(iter, "TYPE", "zfs");
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
+ goto err_blkid3;
}
-}
-/*
- * Given a file descriptor, clear (zero) the label information. This function
- * is currently only used in the appliance stack as part of the ZFS sysevent
- * module.
- */
-int
-zpool_clear_label(int fd)
-{
- struct stat64 statbuf;
- int l;
- vdev_label_t *label;
- uint64_t size;
+ while (blkid_dev_next(iter, &dev) == 0) {
+ devname = blkid_dev_devname(dev);
+ if ((fd = open64(devname, O_RDONLY)) < 0)
+ continue;
- if (fstat64(fd, &statbuf) == -1)
- return (0);
- size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+ err = zpool_read_label(fd, &config);
+ (void) close(fd);
- if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
- return (-1);
+ if (err != 0) {
+ (void) no_memory(hdl);
+ goto err_blkid3;
+ }
- for (l = 0; l < VDEV_LABELS; l++) {
- if (pwrite64(fd, label, sizeof (vdev_label_t),
- label_offset(size, l)) != sizeof (vdev_label_t))
- return (-1);
+ if (config != NULL) {
+ err = add_config(hdl, pools, devname, 0, config);
+ if (err != 0)
+ goto err_blkid3;
+ }
}
- free(label);
- return (0);
+err_blkid3:
+ blkid_dev_iterate_end(iter);
+err_blkid2:
+ blkid_put_cache(cache);
+err_blkid1:
+ return err;
}
+#endif /* HAVE_LIBBLKID */
+
+char *
+zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
+ "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
+ "/dev/disk/zpool", /* Custom rules, use first if they exist */
+ "/dev/mapper", /* Use multipath devices before components */
+ "/dev/disk/by-uuid", /* Single unique entry and persistent */
+ "/dev/disk/by-id", /* May be multiple entries and persistent */
+ "/dev/disk/by-path", /* Encodes physical location and persistent */
+ "/dev/disk/by-label", /* Custom persistent labels */
+ "/dev" /* UNSAFE device names will change */
+};
/*
* Given a list of directories to search, find all pools stored on disk. This
char path[MAXPATHLEN];
char *end, **dir = iarg->path;
size_t pathleft;
- nvlist_t *ret = NULL;
- static char *default_dir = "/dev/dsk";
+ struct stat64 statbuf;
+ nvlist_t *ret = NULL, *config;
+ int fd;
pool_list_t pools = { 0 };
pool_entry_t *pe, *penext;
vdev_entry_t *ve, *venext;
config_entry_t *ce, *cenext;
name_entry_t *ne, *nenext;
- avl_tree_t slice_cache;
- rdsk_node_t *slice;
- void *cookie;
+
+ verify(iarg->poolname == NULL || iarg->guid == 0);
if (dirs == 0) {
- dirs = 1;
- dir = &default_dir;
+#ifdef HAVE_LIBBLKID
+ /* Use libblkid to scan all device for their type */
+ if (zpool_find_import_blkid(hdl, &pools) == 0)
+ goto skip_scanning;
+
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid failure falling back "
+ "to manual probing"));
+#endif /* HAVE_LIBBLKID */
+
+ dir = zpool_default_import_path;
+ dirs = DEFAULT_IMPORT_PATH_SIZE;
}
/*
* and toplevel GUID.
*/
for (i = 0; i < dirs; i++) {
- tpool_t *t;
char *rdsk;
int dfd;
/* use realpath to normalize the path */
if (realpath(dir[i], path) == 0) {
+
+ /* it is safe to skip missing search paths */
+ if (errno == ENOENT)
+ continue;
+
+ zfs_error_aux(hdl, strerror(errno));
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
goto error;
goto error;
}
- avl_create(&slice_cache, slice_cache_compare,
- sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
- slice->rn_name = zfs_strdup(hdl, name);
- slice->rn_avl = &slice_cache;
- slice->rn_dfd = dfd;
- slice->rn_hdl = hdl;
- slice->rn_nozpool = B_FALSE;
- avl_add(&slice_cache, slice);
- }
- /*
- * create a thread pool to do all of this in parallel;
- * rn_nozpool is not protected, so this is racy in that
- * multiple tasks could decide that the same slice can
- * not hold a zpool, which is benign. Also choose
- * double the number of processors; we hold a lot of
- * locks in the kernel, so going beyond this doesn't
- * buy us much.
- */
- t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
- 0, NULL);
- for (slice = avl_first(&slice_cache); slice;
- (slice = avl_walk(&slice_cache, slice,
- AVL_AFTER)))
- (void) tpool_dispatch(t, zpool_open_func, slice);
- tpool_wait(t);
- tpool_destroy(t);
-
- cookie = NULL;
- while ((slice = avl_destroy_nodes(&slice_cache,
- &cookie)) != NULL) {
- if (slice->rn_config != NULL) {
- nvlist_t *config = slice->rn_config;
+ /*
+ * Skip checking devices with well known prefixes:
+ * watchdog - A special close is required to avoid
+ * triggering it and resetting the system.
+ * fuse - Fuse control device.
+ * ppp - Generic PPP driver.
+ * tty* - Generic serial interface.
+ * vcs* - Virtual console memory.
+ * parport* - Parallel port interface.
+ * lp* - Printer interface.
+ * fd* - Floppy interface.
+ * hpet - High Precision Event Timer, crashes qemu
+ * when accessed from a virtual machine.
+ * core - Symlink to /proc/kcore, causes a crash
+ * when access from Xen dom0.
+ */
+ if ((strncmp(name, "watchdog", 8) == 0) ||
+ (strncmp(name, "fuse", 4) == 0) ||
+ (strncmp(name, "ppp", 3) == 0) ||
+ (strncmp(name, "tty", 3) == 0) ||
+ (strncmp(name, "vcs", 3) == 0) ||
+ (strncmp(name, "parport", 7) == 0) ||
+ (strncmp(name, "lp", 2) == 0) ||
+ (strncmp(name, "fd", 2) == 0) ||
+ (strncmp(name, "hpet", 4) == 0) ||
+ (strncmp(name, "core", 4) == 0))
+ continue;
+
+ /*
+ * Ignore failed stats. We only want regular
+ * files and block devices.
+ */
+ if ((fstatat64(dfd, name, &statbuf, 0) != 0) ||
+ (!S_ISREG(statbuf.st_mode) &&
+ !S_ISBLK(statbuf.st_mode)))
+ continue;
+
+ if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+ continue;
+
+ if ((zpool_read_label(fd, &config)) != 0) {
+ (void) close(fd);
+ (void) no_memory(hdl);
+ goto error;
+ }
+
+ (void) close(fd);
+
+ if (config != NULL) {
boolean_t matched = B_TRUE;
+ char *pname;
+
+ if ((iarg->poolname != NULL) &&
+ (nvlist_lookup_string(config,
+ ZPOOL_CONFIG_POOL_NAME, &pname) == 0)) {
- if (iarg->poolname != NULL) {
- char *pname;
+ if (strcmp(iarg->poolname, pname))
+ matched = B_FALSE;
- matched = nvlist_lookup_string(config,
- ZPOOL_CONFIG_POOL_NAME,
- &pname) == 0 &&
- strcmp(iarg->poolname, pname) == 0;
} else if (iarg->guid != 0) {
uint64_t this_guid;
continue;
}
/* use the non-raw path for the config */
- (void) strlcpy(end, slice->rn_name, pathleft);
- if (add_config(hdl, &pools, path, config) != 0)
+ (void) strlcpy(end, name, pathleft);
+ if (add_config(hdl, &pools, path, i+1, config))
goto error;
}
- free(slice->rn_name);
- free(slice);
}
- avl_destroy(&slice_cache);
(void) closedir(dirp);
dirp = NULL;
}
+#ifdef HAVE_LIBBLKID
+skip_scanning:
+#endif
ret = get_configs(hdl, &pools, iarg->can_be_active);
error: