X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fzpool%2Fzpool_vdev.c;h=723e10b3da8fd3b605ee288c20294bbfa7a5689d;hb=0c28fb480836ab7bb1bbf8de6e572d2443273396;hp=ea887f8b9c77a236dcc727d99f59a5bf77954447;hpb=df30f56639f96175ba71d83b4456ccf410c46542;p=zfs.git diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index ea887f8..723e10b 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -67,6 +67,8 @@ #include #include #include +#include +#include #include #include #include @@ -82,6 +84,7 @@ #endif /* HAVE_LIBBLKID */ #include "zpool_util.h" +#include /* * For any given vdev specification, we can have multiple errors. The @@ -91,6 +94,115 @@ boolean_t error_seen; boolean_t is_force; +typedef struct vdev_disk_db_entry +{ + char id[24]; + int sector_size; +} vdev_disk_db_entry_t; + +/* + * Database of block devices that lie about physical sector sizes. The + * identification string must be precisely 24 characters to avoid false + * negatives + */ +static vdev_disk_db_entry_t vdev_disk_database[] = { + {"ATA Corsair Force 3 ", 8192}, + {"ATA INTEL SSDSA2CT04", 8192}, + {"ATA INTEL SSDSA2CW16", 8192}, + {"ATA INTEL SSDSC2CT18", 8192}, + {"ATA INTEL SSDSC2CW12", 8192}, + {"ATA KINGSTON SH100S3", 8192}, + {"ATA M4-CT064M4SSD2 ", 8192}, + {"ATA M4-CT128M4SSD2 ", 8192}, + {"ATA M4-CT256M4SSD2 ", 8192}, + {"ATA M4-CT512M4SSD2 ", 8192}, + {"ATA OCZ-AGILITY2 ", 8192}, + {"ATA OCZ-VERTEX2 3.5 ", 8192}, + {"ATA OCZ-VERTEX3 ", 8192}, + {"ATA OCZ-VERTEX3 LT ", 8192}, + {"ATA OCZ-VERTEX3 MI ", 8192}, + {"ATA SAMSUNG SSD 830 ", 8192}, + {"ATA Samsung SSD 840 ", 8192}, + {"ATA INTEL SSDSA2M040", 4096}, + {"ATA INTEL SSDSA2M080", 4096}, + {"ATA INTEL SSDSA2M160", 4096}, + /* Imported from Open Solaris*/ + {"ATA MARVELL SD88SA02", 4096}, + /* Advanced format Hard drives */ + {"ATA Hitachi HDS5C303", 4096}, + {"ATA SAMSUNG HD204UI ", 4096}, + {"ATA ST2000DL004 HD20", 4096}, + {"ATA WDC WD10EARS-00M", 4096}, + {"ATA WDC WD10EARS-00S", 4096}, + {"ATA WDC WD10EARS-00Z", 4096}, + {"ATA WDC WD15EARS-00M", 4096}, + {"ATA WDC WD15EARS-00S", 4096}, + {"ATA WDC WD15EARS-00Z", 4096}, + {"ATA WDC WD20EARS-00M", 4096}, + {"ATA WDC WD20EARS-00S", 4096}, + {"ATA WDC WD20EARS-00Z", 4096}, + /* Virtual disks: Assume zvols with default volblocksize */ +#if 0 + {"ATA QEMU HARDDISK ", 8192}, + {"IET VIRTUAL-DISK ", 8192}, + {"OI COMSTAR ", 8192}, +#endif +}; + +static const int vdev_disk_database_size = + sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]); + +#define INQ_REPLY_LEN 96 +#define INQ_CMD_LEN 6 + +static boolean_t +check_sector_size_database(char *path, int *sector_size) +{ + unsigned char inq_buff[INQ_REPLY_LEN]; + unsigned char sense_buffer[32]; + unsigned char inq_cmd_blk[INQ_CMD_LEN] = + {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0}; + sg_io_hdr_t io_hdr; + int error; + int fd; + int i; + + /* Prepare INQUIRY command */ + memset(&io_hdr, 0, sizeof(sg_io_hdr_t)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof(inq_cmd_blk); + io_hdr.mx_sb_len = sizeof(sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = INQ_REPLY_LEN; + io_hdr.dxferp = inq_buff; + io_hdr.cmdp = inq_cmd_blk; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = 10; /* 10 milliseconds is ample time */ + + if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) + return (B_FALSE); + + error = ioctl(fd, SG_IO, (unsigned long) &io_hdr); + + (void) close(fd); + + if (error < 0) + return (B_FALSE); + + if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK) + return (B_FALSE); + + for (i = 0; i < vdev_disk_database_size; i++) { + if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24)) + continue; + + *sector_size = vdev_disk_database[i].sector_size; + return (B_TRUE); + } + + return (B_FALSE); +} + /*PRINTFLIKE1*/ static void vdev_error(const char *fmt, ...) @@ -188,19 +300,10 @@ check_error(int err) static int check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare) { - struct stat64 statbuf; int err; #ifdef HAVE_LIBBLKID char *value; -#endif /* HAVE_LIBBLKID */ - - if (stat64(path, &statbuf) != 0) { - vdev_error(gettext("cannot stat %s: %s\n"), - path, strerror(errno)); - return (-1); - } -#ifdef HAVE_LIBBLKID /* No valid type detected device is safe to use */ value = blkid_get_tag_value(cache, "TYPE", path); if (value == NULL) @@ -256,7 +359,7 @@ check_disk(const char *path, blkid_cache cache, int force, * not easily decode the MBR return a failure and prompt to the * user to use force option since we cannot check the partitions. */ - if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { + if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) { check_error(errno); return -1; } @@ -315,7 +418,7 @@ check_disk(const char *path, blkid_cache cache, int force, efi_free(vtoc); (void) close(fd); - return (err); + return (err); } static int @@ -362,7 +465,7 @@ is_whole_disk(const char *path) struct dk_gpt *label; int fd; - if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) + if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) return (B_FALSE); if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { (void) close(fd); @@ -375,16 +478,18 @@ is_whole_disk(const char *path) /* * This may be a shorthand device path or it could be total gibberish. - * Check to see if it's a known device in /dev/, /dev/disk/by-id, - * /dev/disk/by-label, /dev/disk/by-path, /dev/disk/by-uuid, or - * /dev/disk/zpool/. As part of this check, see if we've been given - * an entire disk (minus the slice number). + * Check to see if it is a known device available in zfs_vdev_paths. + * As part of this check, see if we've been given an entire disk + * (minus the slice number). */ static int is_shorthand_path(const char *arg, char *path, struct stat64 *statbuf, boolean_t *wholedisk) { - if (zfs_resolve_shortname(arg, path, MAXPATHLEN) == 0) { + int error; + + error = zfs_resolve_shortname(arg, path, MAXPATHLEN); + if (error == 0) { *wholedisk = is_whole_disk(path); if (*wholedisk || (stat64(path, statbuf) == 0)) return (0); @@ -394,7 +499,59 @@ is_shorthand_path(const char *arg, char *path, memset(statbuf, 0, sizeof(*statbuf)); *wholedisk = B_FALSE; - return (ENOENT); + return (error); +} + +/* + * Determine if the given path is a hot spare within the given configuration. + * If no configuration is given we rely solely on the label. + */ +static boolean_t +is_spare(nvlist_t *config, const char *path) +{ + int fd; + pool_state_t state; + char *name = NULL; + nvlist_t *label; + uint64_t guid, spareguid; + nvlist_t *nvroot; + nvlist_t **spares; + uint_t i, nspares; + boolean_t inuse; + + if ((fd = open(path, O_RDONLY)) < 0) + return (B_FALSE); + + if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || + !inuse || + state != POOL_STATE_SPARE || + zpool_read_label(fd, &label) != 0) { + free(name); + (void) close(fd); + return (B_FALSE); + } + free(name); + (void) close(fd); + + if (config == NULL) + return (B_TRUE); + + verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); + nvlist_free(label); + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + verify(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &spareguid) == 0); + if (spareguid == guid) + return (B_TRUE); + } + } + + return (B_FALSE); } /* @@ -402,9 +559,9 @@ is_shorthand_path(const char *arg, char *path, * device, fill in the device id to make a complete nvlist. Valid forms for a * leaf vdev are: * - * /dev/xxx Complete disk path - * /xxx Full path to file - * xxx Shorthand for /dev/disk/yyy/xxx + * /dev/xxx Complete disk path + * /xxx Full path to file + * xxx Shorthand for /xxx */ static nvlist_t * make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log) @@ -414,6 +571,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log) nvlist_t *vdev = NULL; char *type = NULL; boolean_t wholedisk = B_FALSE; + uint64_t ashift = 0; int err; /* @@ -499,19 +657,31 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); + /* + * Override defaults if custom properties are provided. + */ if (props != NULL) { - uint64_t ashift = 0; char *value = NULL; if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) zfs_nicestrtonum(NULL, value, &ashift); + } - if (ashift > 0) - verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, - ashift) == 0); + /* + * If the device is known to incorrectly report its physical sector + * size explicitly provide the known correct value. + */ + if (ashift == 0) { + int sector_size; + + if (check_sector_size_database(path, §or_size) == B_TRUE) + ashift = highbit(sector_size) - 1; } + if (ashift > 0) + nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift); + return (vdev); } @@ -921,11 +1091,13 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) { nvlist_t **child; uint_t c, children; - char *type, *path, *diskname; + char *type, *path; char devpath[MAXPATHLEN]; char udevpath[MAXPATHLEN]; uint64_t wholedisk; struct stat64 statbuf; + int is_exclusive = 0; + int fd; int ret; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); @@ -948,8 +1120,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) &wholedisk)); if (!wholedisk) { - ret = zero_label(path); - return (ret); + (void) zero_label(path); + return (0); } if (realpath(path, devpath) == NULL) { @@ -968,40 +1140,57 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) * deletes and recreates the link during which access attempts * will fail with ENOENT. */ - zfs_append_partition(path, udevpath, sizeof (udevpath)); - if ((strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) && - (lstat64(udevpath, &statbuf) == 0) && - S_ISLNK(statbuf.st_mode)) - (void) unlink(udevpath); - - diskname = strrchr(devpath, '/'); - assert(diskname != NULL); - diskname++; - if (zpool_label_disk(g_zfs, zhp, diskname) == -1) - return (-1); + strncpy(udevpath, path, MAXPATHLEN); + (void) zfs_append_partition(udevpath, MAXPATHLEN); + + fd = open(devpath, O_RDWR|O_EXCL); + if (fd == -1) { + if (errno == EBUSY) + is_exclusive = 1; + } else { + (void) close(fd); + } /* - * Now we've labeled the disk and the partitions have been - * created. We still need to wait for udev to create the - * symlinks to those partitions. + * If the partition exists, contains a valid spare label, + * and is opened exclusively there is no need to partition + * it. Hot spares have already been partitioned and are + * held open exclusively by the kernel as a safety measure. + * + * If the provided path is for a /dev/disk/ device its + * symbolic link will be removed, partition table created, + * and then block until udev creates the new link. */ - if ((ret = zpool_label_disk_wait(udevpath, 1000)) != 0) { - (void) fprintf(stderr, - gettext( "cannot resolve path '%s'\n"), udevpath); - return (-1); + if (!is_exclusive || !is_spare(NULL, udevpath)) { + ret = strncmp(udevpath,UDISK_ROOT,strlen(UDISK_ROOT)); + if (ret == 0) { + ret = lstat64(udevpath, &statbuf); + if (ret == 0 && S_ISLNK(statbuf.st_mode)) + (void) unlink(udevpath); + } + + if (zpool_label_disk(g_zfs, zhp, + strrchr(devpath, '/') + 1) == -1) + return (-1); + + ret = zpool_label_disk_wait(udevpath, 1000); + if (ret) { + (void) fprintf(stderr, gettext("cannot " + "resolve path '%s': %d\n"), udevpath, ret); + return (-1); + } + + (void) zero_label(udevpath); } /* - * Update the path to refer to FIRST_SLICE. The presence of + * Update the path to refer to the partition. The presence of * the 'whole_disk' field indicates to the CLI that we should - * chop off the slice number when displaying the device in + * chop off the partition number when displaying the device in * future output. */ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, udevpath) == 0); - /* Just in case this partition already existed. */ - (void) zero_label(udevpath); - return (0); } @@ -1025,54 +1214,6 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) } /* - * Determine if the given path is a hot spare within the given configuration. - */ -static boolean_t -is_spare(nvlist_t *config, const char *path) -{ - int fd; - pool_state_t state; - char *name = NULL; - nvlist_t *label; - uint64_t guid, spareguid; - nvlist_t *nvroot; - nvlist_t **spares; - uint_t i, nspares; - boolean_t inuse; - - if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) - return (B_FALSE); - - if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || - !inuse || - state != POOL_STATE_SPARE || - zpool_read_label(fd, &label) != 0) { - free(name); - (void) close(fd); - return (B_FALSE); - } - free(name); - (void) close(fd); - - verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); - nvlist_free(label); - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - for (i = 0; i < nspares; i++) { - verify(nvlist_lookup_uint64(spares[i], - ZPOOL_CONFIG_GUID, &spareguid) == 0); - if (spareguid == guid) - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* * Go through and find any devices that are in use. We rely on libdiskmgt for * the majority of this task. */ @@ -1103,11 +1244,12 @@ check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, * regardless of what libblkid or zpool_in_use() says. */ if (replacing) { - if (wholedisk) - (void) snprintf(buf, sizeof (buf), "%ss0", - path); - else - (void) strlcpy(buf, path, sizeof (buf)); + (void) strlcpy(buf, path, sizeof (buf)); + if (wholedisk) { + ret = zfs_append_partition(buf, sizeof (buf)); + if (ret == -1) + return (-1); + } if (is_spare(config, buf)) return (0);