From f74fae8b305ce3b264489e71adb49c2e4e9e33db Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 May 2011 16:48:16 -0700 Subject: [PATCH] Fix 4K sector support Yesterday I ran across a 3TB drive which exposed 4K sectors to Linux. While I thought I had gotten this support correct it turns out there were 2 subtle bugs which prevented it from working. sudo ./cmd/zpool/zpool create -f large-sector /dev/sda cannot create 'large-sector': one or more devices is currently unavailable 1) The first issue was that it was possible that bdev_capacity() would return the number of 512 byte sectors rather than the number of 4096 sectors. Internally, certain Linux functions only operate with 512 byte sectors so you need to be careful. To avoid any confusion in the future I've updated bdev_capacity() to simply return the device (or partition) capacity in bytes. The higher levels of ZFS want the value in bytes anyway so this is cleaner. 2) When creating a bio the ->bi_sector count must always be expressed in 512 byte sectors. The existing code would scale the byte offset by the logical sector size. Until now this was always 512 so it never caused problems. Trying a 4K sector drive clearly exposed the issue. The problem has been fixed by hard coding the 512 byte sector which is exactly what the bio code does internally. With these changes I'm now able to create ZFS pools using 4K sector drives. No issues were observed during fairly extensive testing. This is also a low risk change if your using 512b sectors devices because none of the logic changes. Closes #256 --- module/zfs/vdev_disk.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 39c00ba..4145dec 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -87,10 +87,10 @@ bdev_capacity(struct block_device *bdev) /* The partition capacity referenced by the block device */ if (part) - return part->nr_sects; + return (part->nr_sects << 9); /* Otherwise assume the full device capacity */ - return get_capacity(bdev->bd_disk); + return (get_capacity(bdev->bd_disk) << 9); } static void @@ -218,7 +218,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) v->vdev_nowritecache = B_FALSE; /* Physical volume size in bytes */ - *psize = bdev_capacity(bdev) * block_size; + *psize = bdev_capacity(bdev); /* Based on the minimum sector size set the block size */ *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; @@ -417,7 +417,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, caddr_t bio_ptr; uint64_t bio_offset; int bio_size, bio_count = 16; - int i = 0, error = 0, block_size; + int i = 0, error = 0; ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size); @@ -431,7 +431,6 @@ retry: dr->dr_zio = zio; dr->dr_rw = flags; - block_size = vdev_bdev_block_size(bdev); /* * When the IO size exceeds the maximum bio size for the request @@ -472,7 +471,7 @@ retry: vdev_disk_dio_get(dr); dr->dr_bio[i]->bi_bdev = bdev; - dr->dr_bio[i]->bi_sector = bio_offset / block_size; + dr->dr_bio[i]->bi_sector = bio_offset >> 9; dr->dr_bio[i]->bi_rw = dr->dr_rw; dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_private = dr; @@ -715,7 +714,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) if (IS_ERR(bdev)) return -PTR_ERR(bdev); - s = bdev_capacity(bdev) * vdev_bdev_block_size(bdev); + s = bdev_capacity(bdev); if (s == 0) { vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); return EIO; -- 1.8.3.1