X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=lib%2Flibzfs%2Flibzfs_import.c;h=9e79bd9348999d13c4b977a8243ee3abe95bc6fc;hb=ff3510c1a562914902d78f3d56552e126103f5a4;hp=9629ab08385c26e8fe8528ceb2f3486b7e24cc33;hpb=2598c0012dc33496539ce893a0af601c66cbb7e2;p=zfs.git diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 9629ab0..9e79bd9 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -52,9 +54,11 @@ #include #include #include -#include #include +#ifdef HAVE_LIBBLKID +#include +#endif #include "libzfs.h" #include "libzfs_impl.h" @@ -83,6 +87,7 @@ typedef struct pool_entry { typedef struct name_entry { char *ne_name; uint64_t ne_guid; + uint64_t ne_order; struct name_entry *ne_next; } name_entry_t; @@ -128,7 +133,6 @@ fix_paths(nvlist_t *nv, name_entry_t *names) uint64_t guid; name_entry_t *ne, *best; char *path, *devid; - int matched; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { @@ -144,44 +148,33 @@ fix_paths(nvlist_t *nv, name_entry_t *names) * the path and see if we can calculate a new devid. * * There may be multiple names associated with a particular guid, in - * which case we have overlapping slices or multiple paths to the same - * disk. If this is the case, then we want to pick the path that is - * the most similar to the original, where "most similar" is the number - * of matching characters starting from the end of the path. This will - * preserve slice numbers even if the disks have been reorganized, and - * will also catch preferred disk names if multiple paths exist. + * which case we have overlapping partitions or multiple paths to the + * same disk. In this case we prefer to use the path name which + * matches the ZPOOL_CONFIG_PATH. If no matching entry is found we + * use the lowest order device which corresponds to the first match + * while traversing the ZPOOL_IMPORT_PATH search path. */ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) path = NULL; - matched = 0; best = NULL; for (ne = names; ne != NULL; ne = ne->ne_next) { if (ne->ne_guid == guid) { - const char *src, *dst; - int count; if (path == NULL) { best = ne; break; } - src = ne->ne_name + strlen(ne->ne_name) - 1; - dst = path + strlen(path) - 1; - for (count = 0; src >= ne->ne_name && dst >= path; - src--, dst--, count++) - if (*src != *dst) - break; - - /* - * At this point, 'count' is the number of characters - * matched from the end. - */ - if (count > matched || best == NULL) { + if ((strlen(path) == strlen(ne->ne_name)) && + !strncmp(path, ne->ne_name, strlen(path))) { best = ne; - matched = count; + break; } + + if (best == NULL || ne->ne_order < best->ne_order) + best = ne; } } @@ -207,7 +200,7 @@ fix_paths(nvlist_t *nv, name_entry_t *names) */ static int add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, - nvlist_t *config) + int order, nvlist_t *config) { uint64_t pool_guid, vdev_guid, top_guid, txg, state; pool_entry_t *pe; @@ -232,6 +225,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, return (-1); } ne->ne_guid = vdev_guid; + ne->ne_order = order; ne->ne_next = pl->names; pl->names = ne; return (0); @@ -333,6 +327,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, } ne->ne_guid = vdev_guid; + ne->ne_order = order; ne->ne_next = pl->names; pl->names = ne; @@ -434,13 +429,13 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; - nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; + nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; nvlist_t **spares, **l2cache; uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; - char *name, *hostname; - uint64_t version, guid; + char *name, *hostname = NULL; + uint64_t guid; uint_t children = 0; nvlist_t **child = NULL; uint_t holes; @@ -526,47 +521,48 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) * configuration: * * version - * pool guid - * name - * pool state + * pool guid + * name + * comment (if available) + * pool state * hostid (if available) * hostname (if available) */ - uint64_t state; + uint64_t state, version; + char *comment = NULL; + + version = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VERSION); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_VERSION, version); + guid = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_POOL_GUID); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_GUID, guid); + name = fnvlist_lookup_string(tmp, + ZPOOL_CONFIG_POOL_NAME); + fnvlist_add_string(config, + ZPOOL_CONFIG_POOL_NAME, name); + + if (nvlist_lookup_string(tmp, + ZPOOL_CONFIG_COMMENT, &comment) == 0) + fnvlist_add_string(config, + ZPOOL_CONFIG_COMMENT, comment); + + state = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_POOL_STATE); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_STATE, state); - verify(nvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_VERSION, &version) == 0); - if (nvlist_add_uint64(config, - ZPOOL_CONFIG_VERSION, version) != 0) - goto nomem; - verify(nvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_POOL_GUID, &guid) == 0); - if (nvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_GUID, guid) != 0) - goto nomem; - verify(nvlist_lookup_string(tmp, - ZPOOL_CONFIG_POOL_NAME, &name) == 0); - if (nvlist_add_string(config, - ZPOOL_CONFIG_POOL_NAME, name) != 0) - goto nomem; - verify(nvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_POOL_STATE, &state) == 0); - if (nvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_STATE, state) != 0) - goto nomem; hostid = 0; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { - if (nvlist_add_uint64(config, - ZPOOL_CONFIG_HOSTID, hostid) != 0) - goto nomem; - verify(nvlist_lookup_string(tmp, - ZPOOL_CONFIG_HOSTNAME, - &hostname) == 0); - if (nvlist_add_string(config, - ZPOOL_CONFIG_HOSTNAME, - hostname) != 0) - goto nomem; + fnvlist_add_uint64(config, + ZPOOL_CONFIG_HOSTID, hostid); + hostname = fnvlist_lookup_string(tmp, + ZPOOL_CONFIG_HOSTNAME); + fnvlist_add_string(config, + ZPOOL_CONFIG_HOSTNAME, hostname); } config_seen = B_TRUE; @@ -866,7 +862,7 @@ zpool_read_label(int fd, nvlist_t **config) *config = NULL; - if (fstat64(fd, &statbuf) == -1) + if (fstat64_blk(fd, &statbuf) == -1) return (0); size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); @@ -904,186 +900,10 @@ zpool_read_label(int fd, nvlist_t **config) return (0); } -typedef struct rdsk_node { - char *rn_name; - int rn_dfd; - libzfs_handle_t *rn_hdl; - nvlist_t *rn_config; - avl_tree_t *rn_avl; - avl_node_t rn_node; - boolean_t rn_nozpool; -} rdsk_node_t; - -static int -slice_cache_compare(const void *arg1, const void *arg2) -{ - const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; - const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; - char *nm1slice, *nm2slice; - int rv; - - /* - * slices zero and two are the most likely to provide results, - * so put those first - */ - nm1slice = strstr(nm1, "s0"); - nm2slice = strstr(nm2, "s0"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - nm1slice = strstr(nm1, "s2"); - nm2slice = strstr(nm2, "s2"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - - rv = strcmp(nm1, nm2); - if (rv == 0) - return (0); - return (rv > 0 ? 1 : -1); -} - -static void -check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, - diskaddr_t size, uint_t blksz) -{ - rdsk_node_t tmpnode; - rdsk_node_t *node; - char sname[MAXNAMELEN]; - - tmpnode.rn_name = &sname[0]; - (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", - diskname, partno); - /* - * protect against division by zero for disk labels that - * contain a bogus sector size - */ - if (blksz == 0) - blksz = DEV_BSIZE; - /* too small to contain a zpool? */ - if ((size < (SPA_MINDEVSIZE / blksz)) && - (node = avl_find(r, &tmpnode, NULL))) - node->rn_nozpool = B_TRUE; -} - -static void -nozpool_all_slices(avl_tree_t *r, const char *sname) -{ - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if (((ptr = strrchr(diskname, 's')) == NULL) && - ((ptr = strrchr(diskname, 'p')) == NULL)) - return; - ptr[0] = 's'; - ptr[1] = '\0'; - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, 0, 1); - ptr[0] = 'p'; - for (i = 0; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); -} - -static void -check_slices(avl_tree_t *r, int fd, const char *sname) -{ - struct extvtoc vtoc; - struct dk_gpt *gpt; - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) - return; - ptr[1] = '\0'; - - if (read_extvtoc(fd, &vtoc) >= 0) { - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - vtoc.v_part[i].p_size, vtoc.v_sectorsz); - } else if (efi_alloc_and_read(fd, &gpt) >= 0) { - /* - * on x86 we'll still have leftover links that point - * to slices s[9-15], so use NDKMAP instead - */ - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - gpt->efi_parts[i].p_size, gpt->efi_lbasize); - /* nodes p[1-4] are never used with EFI labels */ - ptr[0] = 'p'; - for (i = 1; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); - efi_free(gpt); - } -} - -static void -zpool_open_func(void *arg) -{ - rdsk_node_t *rn = arg; - struct stat64 statbuf; - nvlist_t *config; - int fd; - - if (rn->rn_nozpool) - return; - if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { - /* symlink to a device that's no longer there */ - if (errno == ENOENT) - nozpool_all_slices(rn->rn_avl, rn->rn_name); - return; - } - /* - * Ignore failed stats. We only want regular - * files, character devs and block devs. - */ - if (fstat64(fd, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && - !S_ISCHR(statbuf.st_mode) && - !S_ISBLK(statbuf.st_mode))) { - (void) close(fd); - return; - } - /* this file is too small to hold a zpool */ - if (S_ISREG(statbuf.st_mode) && - statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } else if (!S_ISREG(statbuf.st_mode)) { - /* - * Try to read the disk label first so we don't have to - * open a bunch of minor nodes that can't have a zpool. - */ - check_slices(rn->rn_avl, fd, rn->rn_name); - } - - if ((zpool_read_label(fd, &config)) != 0) { - (void) close(fd); - (void) no_memory(rn->rn_hdl); - return; - } - (void) close(fd); - - - rn->rn_config = config; - if (config != NULL) { - assert(rn->rn_nozpool == B_FALSE); - } -} - /* * Given a file descriptor, clear (zero) the label information. This function - * is currently only used in the appliance stack as part of the ZFS sysevent - * module. + * is used in the appliance stack as part of the ZFS sysevent module and + * to implement the "zpool labelclear" command. */ int zpool_clear_label(int fd) @@ -1093,7 +913,7 @@ zpool_clear_label(int fd) vdev_label_t *label; uint64_t size; - if (fstat64(fd, &statbuf) == -1) + if (fstat64_blk(fd, &statbuf) == -1) return (0); size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); @@ -1110,6 +930,88 @@ zpool_clear_label(int fd) return (0); } +#ifdef HAVE_LIBBLKID +/* + * Use libblkid to quickly search for zfs devices + */ +static int +zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) +{ + blkid_cache cache; + blkid_dev_iterate iter; + blkid_dev dev; + const char *devname; + nvlist_t *config; + int fd, err; + + err = blkid_get_cache(&cache, NULL); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err); + goto err_blkid1; + } + + err = blkid_probe_all(cache); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err); + goto err_blkid2; + } + + iter = blkid_dev_iterate_begin(cache); + if (iter == NULL) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()")); + goto err_blkid2; + } + + err = blkid_dev_set_search(iter, "TYPE", "zfs"); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err); + goto err_blkid3; + } + + while (blkid_dev_next(iter, &dev) == 0) { + devname = blkid_dev_devname(dev); + if ((fd = open64(devname, O_RDONLY)) < 0) + continue; + + err = zpool_read_label(fd, &config); + (void) close(fd); + + if (err != 0) { + (void) no_memory(hdl); + goto err_blkid3; + } + + if (config != NULL) { + err = add_config(hdl, pools, devname, 0, config); + if (err != 0) + goto err_blkid3; + } + } + +err_blkid3: + blkid_dev_iterate_end(iter); +err_blkid2: + blkid_put_cache(cache); +err_blkid1: + return err; +} +#endif /* HAVE_LIBBLKID */ + +char * +zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { + "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ + "/dev/mapper", /* Use multipath devices before components */ + "/dev/disk/by-uuid", /* Single unique entry and persistent */ + "/dev/disk/by-id", /* May be multiple entries and persistent */ + "/dev/disk/by-path", /* Encodes physical location and persistent */ + "/dev/disk/by-label", /* Custom persistent labels */ + "/dev" /* UNSAFE device names will change */ +}; + /* * Given a list of directories to search, find all pools stored on disk. This * includes partial pools which are not available to import. If no args are @@ -1126,20 +1028,30 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) char path[MAXPATHLEN]; char *end, **dir = iarg->path; size_t pathleft; - nvlist_t *ret = NULL; - static char *default_dir = "/dev/dsk"; + struct stat64 statbuf; + nvlist_t *ret = NULL, *config; + int fd; pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; - avl_tree_t slice_cache; - rdsk_node_t *slice; - void *cookie; + + verify(iarg->poolname == NULL || iarg->guid == 0); if (dirs == 0) { - dirs = 1; - dir = &default_dir; +#ifdef HAVE_LIBBLKID + /* Use libblkid to scan all device for their type */ + if (zpool_find_import_blkid(hdl, &pools) == 0) + goto skip_scanning; + + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid failure falling back " + "to manual probing")); +#endif /* HAVE_LIBBLKID */ + + dir = zpool_default_import_path; + dirs = DEFAULT_IMPORT_PATH_SIZE; } /* @@ -1148,12 +1060,17 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) * and toplevel GUID. */ for (i = 0; i < dirs; i++) { - tpool_t *t; char *rdsk; int dfd; /* use realpath to normalize the path */ if (realpath(dir[i], path) == 0) { + + /* it is safe to skip missing search paths */ + if (errno == ENOENT) + continue; + + zfs_error_aux(hdl, strerror(errno)); (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]); goto error; @@ -1182,8 +1099,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) goto error; } - avl_create(&slice_cache, slice_cache_compare, - sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); /* * This is not MT-safe, but we have no MT consumers of libzfs */ @@ -1193,46 +1108,65 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, name); - slice->rn_avl = &slice_cache; - slice->rn_dfd = dfd; - slice->rn_hdl = hdl; - slice->rn_nozpool = B_FALSE; - avl_add(&slice_cache, slice); - } - /* - * create a thread pool to do all of this in parallel; - * rn_nozpool is not protected, so this is racy in that - * multiple tasks could decide that the same slice can - * not hold a zpool, which is benign. Also choose - * double the number of processors; we hold a lot of - * locks in the kernel, so going beyond this doesn't - * buy us much. - */ - t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), - 0, NULL); - for (slice = avl_first(&slice_cache); slice; - (slice = avl_walk(&slice_cache, slice, - AVL_AFTER))) - (void) tpool_dispatch(t, zpool_open_func, slice); - tpool_wait(t); - tpool_destroy(t); - - cookie = NULL; - while ((slice = avl_destroy_nodes(&slice_cache, - &cookie)) != NULL) { - if (slice->rn_config != NULL) { - nvlist_t *config = slice->rn_config; + /* + * Skip checking devices with well known prefixes: + * watchdog - A special close is required to avoid + * triggering it and resetting the system. + * fuse - Fuse control device. + * ppp - Generic PPP driver. + * tty* - Generic serial interface. + * vcs* - Virtual console memory. + * parport* - Parallel port interface. + * lp* - Printer interface. + * fd* - Floppy interface. + * hpet - High Precision Event Timer, crashes qemu + * when accessed from a virtual machine. + * core - Symlink to /proc/kcore, causes a crash + * when access from Xen dom0. + */ + if ((strncmp(name, "watchdog", 8) == 0) || + (strncmp(name, "fuse", 4) == 0) || + (strncmp(name, "ppp", 3) == 0) || + (strncmp(name, "tty", 3) == 0) || + (strncmp(name, "vcs", 3) == 0) || + (strncmp(name, "parport", 7) == 0) || + (strncmp(name, "lp", 2) == 0) || + (strncmp(name, "fd", 2) == 0) || + (strncmp(name, "hpet", 4) == 0) || + (strncmp(name, "core", 4) == 0)) + continue; + + /* + * Ignore failed stats. We only want regular + * files and block devices. + */ + if ((fstatat64(dfd, name, &statbuf, 0) != 0) || + (!S_ISREG(statbuf.st_mode) && + !S_ISBLK(statbuf.st_mode))) + continue; + + if ((fd = openat64(dfd, name, O_RDONLY)) < 0) + continue; + + if ((zpool_read_label(fd, &config)) != 0) { + (void) close(fd); + (void) no_memory(hdl); + goto error; + } + + (void) close(fd); + + if (config != NULL) { boolean_t matched = B_TRUE; + char *pname; + + if ((iarg->poolname != NULL) && + (nvlist_lookup_string(config, + ZPOOL_CONFIG_POOL_NAME, &pname) == 0)) { - if (iarg->poolname != NULL) { - char *pname; + if (strcmp(iarg->poolname, pname)) + matched = B_FALSE; - matched = nvlist_lookup_string(config, - ZPOOL_CONFIG_POOL_NAME, - &pname) == 0 && - strcmp(iarg->poolname, pname) == 0; } else if (iarg->guid != 0) { uint64_t this_guid; @@ -1247,19 +1181,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) continue; } /* use the non-raw path for the config */ - (void) strlcpy(end, slice->rn_name, pathleft); - if (add_config(hdl, &pools, path, config) != 0) + (void) strlcpy(end, name, pathleft); + if (add_config(hdl, &pools, path, i+1, config)) goto error; } - free(slice->rn_name); - free(slice); } - avl_destroy(&slice_cache); (void) closedir(dirp); dirp = NULL; } +#ifdef HAVE_LIBBLKID +skip_scanning: +#endif ret = get_configs(hdl, &pools, iarg->can_be_active); error: