From fadd0c4da1e2ccd6014800d8b1a0fd117dd323e8 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Thu, 25 Jul 2013 10:39:31 -0700
Subject: [PATCH 01/16] Evict meta data from ghost lists + l2arc headers

When the meta limit is exceeded the ARC evicts some meta data
buffers from the mfu+mru lists.  Unfortunately, for meta data
heavy workloads it's possible for these buffers to accumulate
on the ghost lists if arc_c doesn't exceed arc_size.

To handle this case arc_adjust_meta() has been entended to
explicitly evict meta data buffers from the ghost lists in
proportion to what was evicted from the mfu+mru lists.

If this is insufficient we request that the VFS release
some inodes and dentries.  This will result in the release
of some dnodes which are counted as 'other' metadata.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 module/zfs/arc.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index ce4a023..32ad80b 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -2104,8 +2104,9 @@ arc_do_user_evicts(void)
 void
 arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
 {
-	int64_t delta;
+	int64_t delta, tmp = adjustment;
 
+	/* Evict MRU+MFU meta data to ghost lists */
 	if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
 		delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
 		arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA);
@@ -2115,9 +2116,24 @@ arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
 	if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
 		delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustment);
 		arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA);
+	}
+
+	/* Evict ghost MRU+MFU meta data */
+	adjustment = tmp;
+
+	if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
+		delta = MIN(arc_mru_ghost->arcs_size, adjustment);
+		arc_evict_ghost(arc_mru_ghost, 0, delta, ARC_BUFC_METADATA);
+		adjustment -= delta;
+	}
+
+	if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
+		delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
+		arc_evict_ghost(arc_mfu_ghost, 0, delta, ARC_BUFC_METADATA);
 		adjustment -= delta;
 	}
 
+	/* Request the VFS release some meta data */
 	if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
 		arc_do_user_prune(zfs_arc_meta_prune);
 }
-- 
1.8.3.1


From 570d6edf1d94917aab49c5755027d05b3c7bcd43 Mon Sep 17 00:00:00 2001
From: Richard Yao <ryao@gentoo.org>
Date: Sun, 14 Jul 2013 12:59:24 -0400
Subject: [PATCH 02/16] Linux 3.8 compat: Support
 CONFIG_UIDGID_STRICT_TYPE_CHECKS

When CONFIG_UIDGID_STRICT_TYPE_CHECKS is enabled uid_t/git_t are
replaced by kuid_t/kgid_t, which are structures instead of integral
types. This causes any code that uses an integral type to fail to build.
The User Namespace functionality introduced in Linux 3.8 requires
CONFIG_UIDGID_STRICT_TYPE_CHECKS, so we could not build against any
kernel that supported it.

We resolve this by converting between the new kuid_t/kgid_t structures
and the original uid_t/gid_t types.

Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1589
---
 module/zfs/zfs_ctldir.c | 4 ++--
 module/zfs/zfs_znode.c  | 4 ++--
 module/zfs/zpl_inode.c  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index 168f853..b35f27d 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -200,8 +200,8 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
 	zp->z_is_stale = B_FALSE;
 	ip->i_ino = id;
 	ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
-	ip->i_uid = 0;
-	ip->i_gid = 0;
+	ip->i_uid = SUID_TO_KUID(0);
+	ip->i_gid = SGID_TO_KGID(0);
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
 	ip->i_atime = now;
 	ip->i_mtime = now;
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 5922d3a..aaf17e1 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -472,8 +472,8 @@ zfs_inode_update(znode_t *zp)
 
 	spin_lock(&ip->i_lock);
 	ip->i_generation = zp->z_gen;
-	ip->i_uid = zp->z_uid;
-	ip->i_gid = zp->z_gid;
+	ip->i_uid = SUID_TO_KUID(zp->z_uid);
+	ip->i_gid = SGID_TO_KGID(zp->z_gid);
 	set_nlink(ip, zp->z_links);
 	ip->i_mode = zp->z_mode;
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 8c8ec24..ab1fe68 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -74,7 +74,7 @@ zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr)
 	vap->va_uid = crgetfsuid(cr);
 
 	if (dir && dir->i_mode & S_ISGID) {
-		vap->va_gid = dir->i_gid;
+		vap->va_gid = KGID_TO_SGID(dir->i_gid);
 		if (S_ISDIR(mode))
 			vap->va_mode |= S_ISGID;
 	} else {
@@ -235,8 +235,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
 	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
 	vap->va_mode = ia->ia_mode;
-	vap->va_uid = ia->ia_uid;
-	vap->va_gid = ia->ia_gid;
+	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
+	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
 	vap->va_size = ia->ia_size;
 	vap->va_atime = ia->ia_atime;
 	vap->va_mtime = ia->ia_mtime;
-- 
1.8.3.1


From cb682a173a84813b2aeb5d18f58cff1a07531fb3 Mon Sep 17 00:00:00 2001
From: Matthew Ahrens <mahrens@delphix.com>
Date: Thu, 21 Mar 2013 14:47:36 -0800
Subject: [PATCH 03/16] Illumos #3618 ::zio dcmd does not show timestamp data

3618 ::zio dcmd does not show timestamp data
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <gwilson@zfsmail.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Approved by: Dan McDonald <danmcd@nexenta.com>

References:
  http://www.illumos.org/issues/3618
  illumos/illumos-gate@c55e05cb35da47582b7afd38734d2f0d9c6deb40

Notes on porting to ZFS on Linux:

The original changeset mostly deals with mdb ::zio dcmd.
However, in order to provide the requested functionality
it modifies vdev and zio structures to keep the timing data
in nanoseconds instead of ticks. It is these changes that
are ported over in the commit in hand.

One visible change of this commit is that the default value
of 'zfs_vdev_time_shift' tunable is changed:

    zfs_vdev_time_shift = 6
        to
    zfs_vdev_time_shift = 29

The original value of 6 was inherited from OpenSolaris and
was subotimal - since it shifted the raw tick value - it
didn't compensate for different tick frequencies on Linux and
OpenSolaris. The former has HZ=1000, while the latter HZ=100.

(Which itself led to other interesting performance anomalies
under non-trivial load. The deadline scheduler delays the IO
according to its priority - the lower priority the further
the deadline is set. The delay is measured in units of
"shifted ticks". Since the HZ value was 10 times higher,
the delay units were 10 times shorter. Thus really low
priority IO like resilver (delay is 10 units) and scrub
(delay is 20 units) were scheduled much sooner than intended.
The overall effect is that resilver and scrub IO consumed
more bandwidth at the expense of the other IO.)

Now that the bookkeeping is done is nanoseconds the shift
behaves correctly for any tick frequency (HZ).

Ported-by: Cyril Plisko <cyril.plisko@mountall.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1643
---
 include/sys/vdev_impl.h |  4 ++--
 include/sys/zio.h       |  4 ++--
 module/zfs/vdev.c       |  8 ++++----
 module/zfs/vdev_queue.c | 13 ++++++++-----
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 711408e..e0669cc 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -105,8 +105,8 @@ struct vdev_queue {
 	avl_tree_t	vq_read_tree;
 	avl_tree_t	vq_write_tree;
 	avl_tree_t	vq_pending_tree;
-	uint64_t	vq_io_complete_ts;
-	uint64_t	vq_io_delta_ts;
+	hrtime_t	vq_io_complete_ts;
+	hrtime_t	vq_io_delta_ts;
 	list_t		vq_io_list;
 	kmutex_t	vq_lock;
 };
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 0353033..189966b 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -410,8 +410,8 @@ struct zio {
 
 	uint64_t	io_offset;
 	uint64_t	io_deadline;	/* expires at timestamp + deadline */
-	uint64_t	io_timestamp;	/* submitted at (ticks) */
-	uint64_t	io_delta;	/* vdev queue service delta (ticks) */
+	hrtime_t	io_timestamp;	/* submitted at */
+	hrtime_t	io_delta;	/* vdev queue service delta */
 	uint64_t	io_delay;	/* vdev disk service delta (ticks) */
 	avl_node_t	io_offset_node;
 	avl_node_t	io_deadline_node;
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index d6b55ee..662a877 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -3219,10 +3219,10 @@ vdev_deadman(vdev_t *vd)
 			 * the spa_deadman_synctime we log a zevent.
 			 */
 			fio = avl_first(&vq->vq_pending_tree);
-			delta = ddi_get_lbolt64() - fio->io_timestamp;
-			if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
-				zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
-				    "delta %llu, last io %llu",
+			delta = gethrtime() - fio->io_timestamp;
+			if (delta > spa_deadman_synctime(spa)) {
+				zfs_dbgmsg("SLOW IO: zio timestamp %lluns, "
+				    "delta %lluns, last io %lluns",
 				    fio->io_timestamp, delta,
 				    vq->vq_io_complete_ts);
 				zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 3f2793b..b2cc6b8 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -44,8 +44,11 @@
 int zfs_vdev_max_pending = 10;
 int zfs_vdev_min_pending = 4;
 
-/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
-int zfs_vdev_time_shift = 6;
+/*
+ * The deadlines are grouped into buckets based on zfs_vdev_time_shift:
+ * deadline = pri + gethrtime() >> time_shift)
+ */
+int zfs_vdev_time_shift = 29; /* each bucket is 0.537 seconds */
 
 /* exponential I/O issue ramp-up rate */
 int zfs_vdev_ramp_rate = 2;
@@ -396,7 +399,7 @@ vdev_queue_io(zio_t *zio)
 
 	mutex_enter(&vq->vq_lock);
 
-	zio->io_timestamp = ddi_get_lbolt64();
+	zio->io_timestamp = gethrtime();
 	zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
 	    zio->io_priority;
 
@@ -430,8 +433,8 @@ vdev_queue_io_done(zio_t *zio)
 
 	avl_remove(&vq->vq_pending_tree, zio);
 
-	zio->io_delta = ddi_get_lbolt64() - zio->io_timestamp;
-	vq->vq_io_complete_ts = ddi_get_lbolt64();
+	zio->io_delta = gethrtime() - zio->io_timestamp;
+	vq->vq_io_complete_ts = gethrtime();
 	vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
 
 	for (i = 0; i < zfs_vdev_ramp_rate; i++) {
-- 
1.8.3.1


From 105afebb1529c21c3fb1ef7101918ed9fb704942 Mon Sep 17 00:00:00 2001
From: Yuri Pankov <yuri.pankov@nexenta.com>
Date: Tue, 16 Jul 2013 15:54:29 -0500
Subject: [PATCH 04/16] Illumos #3098 zfs userspace/groupspace fail

3098 zfs userspace/groupspace fail without saying why when run as non-root
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

References:
  https://www.illumos.org/issues/3098
  illumos/illumos-gate@70f56fa69343b013f47e010537cff8ef3a7a40a5

Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1596
---
 cmd/zfs/zfs_main.c          | 19 +++++++++----------
 lib/libzfs/libzfs_dataset.c | 29 +++++++++++++++++------------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 9671c0c..cb5c871 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -2288,10 +2288,8 @@ userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
 			if (!cb->cb_sid2posix) {
 				e = directory_name_from_sid(NULL, sid, &name,
 				    &classes);
-				if (e != NULL) {
+				if (e != NULL)
 					directory_error_free(e);
-					return (1);
-				}
 				if (name == NULL)
 					name = sid;
 			}
@@ -2543,7 +2541,7 @@ zfs_do_userspace(int argc, char **argv)
 	boolean_t prtnum = B_FALSE;
 	boolean_t parsable = B_FALSE;
 	boolean_t sid2posix = B_FALSE;
-	int error = 0;
+	int ret = 0;
 	int c;
 	zfs_sort_column_t *sortcol = NULL;
 	int types = USTYPE_PSX_USR | USTYPE_SMB_USR;
@@ -2688,18 +2686,19 @@ zfs_do_userspace(int argc, char **argv)
 		    !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP))))
 			continue;
 		cb.cb_prop = p;
-		error = zfs_userspace(zhp, p, userspace_cb, &cb);
-		if (error)
-			break;
+		if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0)
+			return (ret);
 	}
 
 	/* Sort the list */
+	if ((node = uu_avl_first(avl_tree)) == NULL)
+		return (0);
+
 	us_populated = B_TRUE;
+
 	listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT);
 	list = uu_list_create(listpool, NULL, UU_DEFAULT);
-
-	node = uu_avl_first(avl_tree);
 	uu_list_node_init(node, &node->usn_listnode, listpool);
 
 	while (node != NULL) {
@@ -2740,7 +2739,7 @@ zfs_do_userspace(int argc, char **argv)
 	uu_avl_destroy(avl_tree);
 	uu_avl_pool_destroy(avl_pool);
 
-	return (error);
+	return (ret);
 }
 
 /*
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index d4691dd..554b0ea 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -21,10 +21,10 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <ctype.h>
@@ -4337,35 +4337,40 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
     zfs_userspace_cb_t func, void *arg)
 {
 	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
-	int error;
 	zfs_useracct_t buf[100];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	int ret;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	zc.zc_objset_type = type;
 	zc.zc_nvlist_dst = (uintptr_t)buf;
 
-	/* CONSTCOND */
-	while (1) {
+	for (;;) {
 		zfs_useracct_t *zua = buf;
 
 		zc.zc_nvlist_dst_size = sizeof (buf);
-		error = ioctl(zhp->zfs_hdl->libzfs_fd,
-		    ZFS_IOC_USERSPACE_MANY, &zc);
-		if (error || zc.zc_nvlist_dst_size == 0)
+		if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
+			char errbuf[ZFS_MAXNAMELEN + 32];
+
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "cannot get used/quota for %s"), zc.zc_name);
+			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+		}
+		if (zc.zc_nvlist_dst_size == 0)
 			break;
 
 		while (zc.zc_nvlist_dst_size > 0) {
-			error = func(arg, zua->zu_domain, zua->zu_rid,
-			    zua->zu_space);
-			if (error != 0)
-				return (error);
+			if ((ret = func(arg, zua->zu_domain, zua->zu_rid,
+			    zua->zu_space)) != 0)
+				return (ret);
 			zua++;
 			zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
 		}
 	}
 
-	return (error);
+	return (0);
 }
 
 int
-- 
1.8.3.1


From fb5c53ea65b75c67c23f90ebbbb1134a5bb6c140 Mon Sep 17 00:00:00 2001
From: John Layman <jlayman@sagecloud.com>
Date: Tue, 13 Aug 2013 15:24:58 -0400
Subject: [PATCH 05/16] Fix for re-reading /etc/mtab in zfs_is_mounted()

When /etc/mtab is updated on Linux it's done atomically with
rename(2).  A new mtab is written, the existing mtab is unlinked,
and the new mtab is renamed to /etc/mtab.  This means that we
must close the old file and open the new file to get the updated
contents.  Using rewind(3) will just move the file pointer back
to the start of the file, freopen(3) will close and open the file.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1611
---
 lib/libzfs/libzfs_dataset.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 554b0ea..244b687 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -629,12 +629,15 @@ libzfs_mnttab_init(libzfs_handle_t *hdl)
 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
 }
 
-void
+int
 libzfs_mnttab_update(libzfs_handle_t *hdl)
 {
 	struct mnttab entry;
 
-	rewind(hdl->libzfs_mnttab);
+	/* Reopen MNTTAB to prevent reading stale data from open file */
+	if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+		return (ENOENT);
+
 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
 		mnttab_node_t *mtn;
 
@@ -647,6 +650,8 @@ libzfs_mnttab_update(libzfs_handle_t *hdl)
 		mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts);
 		avl_add(&hdl->libzfs_mnttab_cache, mtn);
 	}
+
+	return (0);
 }
 
 void
@@ -677,13 +682,18 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 {
 	mnttab_node_t find;
 	mnttab_node_t *mtn;
+	int error;
 
 	if (!hdl->libzfs_mnttab_enable) {
 		struct mnttab srch = { 0 };
 
 		if (avl_numnodes(&hdl->libzfs_mnttab_cache))
 			libzfs_mnttab_fini(hdl);
-		rewind(hdl->libzfs_mnttab);
+
+		/* Reopen MNTTAB to prevent reading stale data from open file */
+		if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+			return (ENOENT);
+
 		srch.mnt_special = (char *)fsname;
 		srch.mnt_fstype = MNTTYPE_ZFS;
 		if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0)
@@ -693,7 +703,8 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 	}
 
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
-		libzfs_mnttab_update(hdl);
+		if ((error = libzfs_mnttab_update(hdl)) != 0)
+			return (error);
 
 	find.mtn_mt.mnt_special = (char *)fsname;
 	mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
-- 
1.8.3.1


From f9e459d1435832caa69fdaf236ebaeacd93f1b3e Mon Sep 17 00:00:00 2001
From: Turbo Fredriksson <turbo@bayour.com>
Date: Tue, 4 Jun 2013 03:55:28 +0200
Subject: [PATCH 06/16] Use setmntent() OR fopen()

For the same reasons it's used in libzfs_init(), this was just
overlooked because zinject gets minimal use.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1498
---
 cmd/zinject/translate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c
index fc16127..b2ccb67 100644
--- a/cmd/zinject/translate.c
+++ b/cmd/zinject/translate.c
@@ -115,7 +115,11 @@ parse_pathname(const char *inpath, char *dataset, char *relpath,
 		return (-1);
 	}
 
+#ifdef HAVE_SETMNTENT
+	if ((fp = setmntent(MNTTAB, "r")) == NULL) {
+#else
 	if ((fp = fopen(MNTTAB, "r")) == NULL) {
+#endif
 		(void) fprintf(stderr, "cannot open /etc/mtab\n");
 		return (-1);
 	}
-- 
1.8.3.1


From abbfdca483897d4406889e3517e77baf07573ecb Mon Sep 17 00:00:00 2001
From: Turbo Fredriksson <turbo@bayour.com>
Date: Tue, 4 Jun 2013 03:56:04 +0200
Subject: [PATCH 07/16] No point in rewind() mtab in zfs_unshare_proto(). We're
 not really reading the file, but instead use libzfs_mnttab_find() which does
 the nessesary freopen() for us.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1498
---
 lib/libzfs/libzfs_mount.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index 9a57ad9..bded1f0 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -877,7 +877,6 @@ zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
 	char *mntpt = NULL;
 
 	/* check to see if need to unmount the filesystem */
-	rewind(zhp->zfs_hdl->libzfs_mnttab);
 	if (mountpoint != NULL)
 		mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
 
-- 
1.8.3.1


From 0bc7a7a75429f3f5dc68e57dfad4c5fb84a48558 Mon Sep 17 00:00:00 2001
From: Turbo Fredriksson <turbo@bayour.com>
Date: Tue, 4 Jun 2013 03:54:44 +0200
Subject: [PATCH 08/16] Don't specifically open /etc/mtab - it is done in
 libzfs_init() a few lines further down and we can share the open file handle.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1498
---
 cmd/zfs/zfs_main.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index cb5c871..5753cce 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -66,6 +66,7 @@
 #include "zfs_iter.h"
 #include "zfs_util.h"
 #include "zfs_comutil.h"
+#include "libzfs_impl.h"
 
 libzfs_handle_t *g_zfs;
 
@@ -6343,12 +6344,6 @@ main(int argc, char **argv)
 
 	opterr = 0;
 
-	if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
-		(void) fprintf(stderr, gettext("internal error: unable to "
-		    "open %s\n"), MNTTAB);
-		return (1);
-	}
-
 	/*
 	 * Make sure the user has specified some command.
 	 */
@@ -6387,6 +6382,8 @@ main(int argc, char **argv)
 	if ((g_zfs = libzfs_init()) == NULL)
 		return (1);
 
+	mnttab_file = g_zfs->libzfs_mnttab;
+
 	zpool_set_history_str("zfs", argc, argv, history_str);
 	verify(zpool_stage_history(g_zfs, history_str) == 0);
 
@@ -6411,8 +6408,6 @@ main(int argc, char **argv)
 	}
 	libzfs_fini(g_zfs);
 
-	(void) fclose(mnttab_file);
-
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
-- 
1.8.3.1


From 34e143323e359b42bc9d06dd19cc4b1f13091283 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Wed, 14 Aug 2013 16:18:58 -0700
Subject: [PATCH 09/16] Fix z_wr_iss_h zio_execute() import hang

Because we need to be more frugal about our stack usage under
Linux.  The __zio_execute() function was modified to re-dispatch
zios to a ZIO_TASKQ_ISSUE thread when we're in a context which
is known to be stack heavy.  Those two contexts are the sync
thread and what ever thread is performing spa initialization.

Unfortunately, this change introduced an unlikely bug which can
result in a zio being re-dispatched indefinitely and never being
executed.  If during spa initialization we handle a zio with
ZIO_PRIORITY_NOW it will be moved to the high priority queue.
When __zio_execute() is called again for the zio it will mis-
interpret the context and re-dispatch it again.  The system
will get stuck spinning re-dispatching the zio and making no
forward progress.

To fix this rare issue __zio_execute() has been updated not
to re-dispatch zios on either the ZIO_TASKQ_ISSUE or
ZIO_TASKQ_ISSUE_HIGH task queues.

In practice this issue was rarely reported and can usually
be fixed by rebooting the system and importing the pool again.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1455
---
 module/zfs/zio.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 62a9082..ccefaf8 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1309,12 +1309,14 @@ __zio_execute(zio_t *zio)
 		/*
 		 * If we executing in the context of the tx_sync_thread,
 		 * or we are performing pool initialization outside of a
-		 * zio_taskq[ZIO_TASKQ_ISSUE] context.  Then issue the zio
-		 * async to minimize stack usage for these deep call paths.
+		 * zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context.
+		 * Then issue the zio asynchronously to minimize stack usage
+		 * for these deep call paths.
 		 */
 		if ((dp && curthread == dp->dp_tx.tx_sync_thread) ||
 		    (dp && spa_is_initializing(dp->dp_spa) &&
-		    !zio_taskq_member(zio, ZIO_TASKQ_ISSUE))) {
+		    !zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
+		    !zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))) {
 			zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
 			return;
 		}
-- 
1.8.3.1


From 0f37d0c8bed442dd0d2c1b1dddd68653fa6eec66 Mon Sep 17 00:00:00 2001
From: Richard Yao <ryao@gentoo.org>
Date: Wed, 7 Aug 2013 08:53:45 -0400
Subject: [PATCH 10/16] Linux 3.11 compat: fops->iterate()

Commit torvalds/linux@2233f31aade393641f0eaed43a71110e629bb900
replaced ->readdir() with ->iterate() in struct file_operations.
All filesystems must now use the new ->iterate method.

To handle this the code was reworked to use the new ->iterate
interface.  Care was taken to keep the majority of changes
confined to the ZPL layer which is already Linux specific.
However, minor changes were required to the common zfs_readdir()
function.

Compatibility with older kernels was accomplished by adding
versions of the trivial dir_emit* helper functions.  Also the
various *_readdir() functions were reworked in to wrappers
which create a dir_context structure to pass to the new
*_iterate() functions.

Unfortunately, the new dir_emit* functions prevent us from
passing a private pointer to the filldir function.  The xattr
directory code leveraged this ability through zfs_readdir()
to generate the list of xattr names.  Since we can no longer
use zfs_readdir() a simplified zpl_xattr_readdir() function
was added to perform the same task.

Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1653
Issue #1591
---
 config/kernel-vfs-iterate.m4 |  43 ++++++++++
 config/kernel.m4             |   1 +
 include/sys/zfs_vnops.h      |   4 +-
 include/sys/zpl.h            |  59 +++++++++++++
 module/zfs/zfs_vnops.c       |  12 +--
 module/zfs/zpl_ctldir.c      | 193 ++++++++++++++++++++-----------------------
 module/zfs/zpl_file.c        |  23 +++++-
 module/zfs/zpl_xattr.c       |  49 +++++++++--
 8 files changed, 260 insertions(+), 124 deletions(-)
 create mode 100644 config/kernel-vfs-iterate.m4

diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4
new file mode 100644
index 0000000..c2c6562
--- /dev/null
+++ b/config/kernel-vfs-iterate.m4
@@ -0,0 +1,43 @@
+dnl #
+dnl # 3.11 API change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [
+	AC_MSG_CHECKING([whether fops->iterate() is available])
+	ZFS_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+		int iterate(struct file *filp, struct dir_context * context)
+		    { return 0; }
+
+		static const struct file_operations fops
+		    __attribute__ ((unused)) = {
+			.iterate	 = iterate,
+		};
+	],[
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_VFS_ITERATE, 1,
+		          [fops->iterate() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether fops->readdir() is available])
+		ZFS_LINUX_TRY_COMPILE([
+			#include <linux/fs.h>
+			int readdir(struct file *filp, void *entry, filldir_t func)
+			    { return 0; }
+
+			static const struct file_operations fops
+			    __attribute__ ((unused)) = {
+				.readdir = readdir,
+			};
+		],[
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_VFS_READDIR, 1,
+				  [fops->readdir() is available])
+		],[
+			AC_MSG_ERROR(no; file a bug report with ZFSOnLinux)
+		])
+
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 8742bc5..74ce22c 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -80,6 +80,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_KERNEL_ELEVATOR_CHANGE
 	ZFS_AC_KERNEL_5ARG_SGET
 	ZFS_AC_KERNEL_LSEEK_EXECUTE
+	ZFS_AC_KERNEL_VFS_ITERATE
 
 	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
 		KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h
index 75f7c12..c9fecf8 100644
--- a/include/sys/zfs_vnops.h
+++ b/include/sys/zfs_vnops.h
@@ -31,6 +31,7 @@
 #include <sys/cred.h>
 #include <sys/fcntl.h>
 #include <sys/pathname.h>
+#include <sys/zpl.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -51,8 +52,7 @@ extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
     struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp);
 extern int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd,
     cred_t *cr, int flags);
-extern int zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
-    loff_t *pos, cred_t *cr);
+extern int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr);
 extern int zfs_fsync(struct inode *ip, int syncflag, cred_t *cr);
 extern int zfs_getattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr);
 extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
diff --git a/include/sys/zpl.h b/include/sys/zpl.h
index 61a57ef..89cf824 100644
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@@ -89,4 +89,63 @@ extern const struct inode_operations zpl_ops_snapdirs;
 extern const struct file_operations zpl_fops_shares;
 extern const struct inode_operations zpl_ops_shares;
 
+#ifdef HAVE_VFS_ITERATE
+
+#define DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
+	.actor = _actor,				\
+	.pos = _pos,					\
+}
+
+#else
+
+typedef struct dir_context {
+	void *dirent;
+	const filldir_t actor;
+	loff_t pos;
+} dir_context_t;
+
+#define DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
+	.dirent = _dirent,				\
+	.actor = _actor,				\
+	.pos = _pos,					\
+}
+
+static inline bool
+dir_emit(struct dir_context *ctx, const char *name, int namelen,
+    uint64_t ino, unsigned type)
+{
+	return ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type) == 0;
+}
+
+static inline bool
+dir_emit_dot(struct file *file, struct dir_context *ctx)
+{
+	return ctx->actor(ctx->dirent, ".", 1, ctx->pos,
+	    file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
+}
+
+static inline bool
+dir_emit_dotdot(struct file *file, struct dir_context *ctx)
+{
+	return ctx->actor(ctx->dirent, "..", 2, ctx->pos,
+	    parent_ino(file->f_path.dentry), DT_DIR) == 0;
+}
+
+static inline bool
+dir_emit_dots(struct file *file, struct dir_context *ctx)
+{
+	if (ctx->pos == 0) {
+		if (!dir_emit_dot(file, ctx))
+			return false;
+		ctx->pos = 1;
+	}
+	if (ctx->pos == 1) {
+		if (!dir_emit_dotdot(file, ctx))
+			return false;
+		ctx->pos = 2;
+	}
+	return true;
+}
+#endif /* HAVE_VFS_ITERATE */
+
 #endif	/* _SYS_ZPL_H */
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index db5d385..876d44b 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -1997,8 +1997,7 @@ EXPORT_SYMBOL(zfs_rmdir);
  */
 /* ARGSUSED */
 int
-zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
-    loff_t *pos, cred_t *cr)
+zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfs_sb_t	*zsb = ITOZSB(ip);
@@ -2010,6 +2009,7 @@ zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
 	uint8_t		prefetch;
 	int		done = 0;
 	uint64_t	parent;
+	loff_t		*pos = &(ctx->pos);
 
 	ZFS_ENTER(zsb);
 	ZFS_VERIFY_ZP(zp);
@@ -2098,11 +2098,11 @@ zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
 
 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
 		}
-		done = filldir(dirent, zap.za_name, strlen(zap.za_name),
-			       *pos, objnum, ZFS_DIRENT_TYPE(zap.za_first_integer));
-		if (done) {
+
+		done = !dir_emit(ctx, zap.za_name, strlen(zap.za_name),
+		    objnum, ZFS_DIRENT_TYPE(zap.za_first_integer));
+		if (done)
 			break;
-		}
 
 		/* Prefetch znode */
 		if (prefetch) {
diff --git a/module/zfs/zpl_ctldir.c b/module/zfs/zpl_ctldir.c
index 0897017..1bb646f 100644
--- a/module/zfs/zpl_ctldir.c
+++ b/module/zfs/zpl_ctldir.c
@@ -46,79 +46,34 @@ zpl_common_open(struct inode *ip, struct file *filp)
 	return generic_file_open(ip, filp);
 }
 
-static int
-zpl_common_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *ip = dentry->d_inode;
-	int error = 0;
-
-	switch (filp->f_pos) {
-	case 0:
-		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
-		if (error)
-			break;
-
-		filp->f_pos++;
-		/* fall-thru */
-	case 1:
-		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
-		if (error)
-			break;
-
-		filp->f_pos++;
-		/* fall-thru */
-	default:
-		break;
-	}
-
-	return (error);
-}
-
 /*
  * Get root directory contents.
  */
 static int
-zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
+zpl_root_iterate(struct file *filp, struct dir_context *ctx)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *ip = dentry->d_inode;
-	zfs_sb_t *zsb = ITOZSB(ip);
+	zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
 	int error = 0;
 
 	ZFS_ENTER(zsb);
 
-	switch (filp->f_pos) {
-	case 0:
-		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
-		if (error)
-			goto out;
+	if (!dir_emit_dots(filp, ctx))
+		goto out;
 
-		filp->f_pos++;
-		/* fall-thru */
-	case 1:
-		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
-		if (error)
+	if (ctx->pos == 2) {
+		if (!dir_emit(ctx, ZFS_SNAPDIR_NAME, strlen(ZFS_SNAPDIR_NAME),
+		    ZFSCTL_INO_SNAPDIR, DT_DIR))
 			goto out;
 
-		filp->f_pos++;
-		/* fall-thru */
-	case 2:
-		error = filldir(dirent, ZFS_SNAPDIR_NAME,
-		    strlen(ZFS_SNAPDIR_NAME), 2, ZFSCTL_INO_SNAPDIR, DT_DIR);
-		if (error)
-			goto out;
+		ctx->pos++;
+	}
 
-		filp->f_pos++;
-		/* fall-thru */
-	case 3:
-		error = filldir(dirent, ZFS_SHAREDIR_NAME,
-		    strlen(ZFS_SHAREDIR_NAME), 3, ZFSCTL_INO_SHARES, DT_DIR);
-		if (error)
+	if (ctx->pos == 3) {
+		if (!dir_emit(ctx, ZFS_SHAREDIR_NAME, strlen(ZFS_SHAREDIR_NAME),
+		    ZFSCTL_INO_SHARES, DT_DIR))
 			goto out;
 
-		filp->f_pos++;
-		/* fall-thru */
+		ctx->pos++;
 	}
 out:
 	ZFS_EXIT(zsb);
@@ -126,6 +81,20 @@ out:
 	return (error);
 }
 
+#if !defined(HAVE_VFS_ITERATE)
+static int
+zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_root_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* HAVE_VFS_ITERATE */
+
 /*
  * Get root directory attributes.
  */
@@ -175,7 +144,11 @@ const struct file_operations zpl_fops_root = {
 	.open		= zpl_common_open,
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE
+	.iterate	= zpl_root_iterate,
+#else
 	.readdir	= zpl_root_readdir,
+#endif
 };
 
 const struct inode_operations zpl_ops_root = {
@@ -273,50 +246,29 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
 	return d_splice_alias(ip, dentry);
 }
 
-/* ARGSUSED */
 static int
-zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *dip = dentry->d_inode;
-	zfs_sb_t *zsb = ITOZSB(dip);
+	zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
 	char snapname[MAXNAMELEN];
-	uint64_t id, cookie;
 	boolean_t case_conflict;
+	uint64_t id;
 	int error = 0;
 
 	ZFS_ENTER(zsb);
 
-	cookie = filp->f_pos;
-	switch (filp->f_pos) {
-	case 0:
-		error = filldir(dirent, ".", 1, 0, dip->i_ino, DT_DIR);
-		if (error)
-			goto out;
+	if (!dir_emit_dots(filp, ctx))
+		goto out;
 
-		filp->f_pos++;
-		/* fall-thru */
-	case 1:
-		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
+	while (error == 0) {
+		error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
+		    snapname, &id, &(ctx->pos), &case_conflict);
 		if (error)
 			goto out;
 
-		filp->f_pos++;
-		/* fall-thru */
-	default:
-		while (error == 0) {
-			error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
-			    snapname, &id, &cookie, &case_conflict);
-			if (error)
-				goto out;
-
-			error = filldir(dirent, snapname, strlen(snapname),
-			    filp->f_pos, ZFSCTL_INO_SHARES - id, DT_DIR);
-			if (error)
-				goto out;
-
-			filp->f_pos = cookie;
-		}
+		if (!dir_emit(ctx, snapname, strlen(snapname),
+		    ZFSCTL_INO_SHARES - id, DT_DIR))
+			goto out;
 	}
 out:
 	ZFS_EXIT(zsb);
@@ -327,6 +279,20 @@ out:
 	return (error);
 }
 
+#if !defined(HAVE_VFS_ITERATE)
+static int
+zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_snapdir_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* HAVE_VFS_ITERATE */
+
 int
 zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry)
@@ -413,7 +379,12 @@ const struct file_operations zpl_fops_snapdir = {
 	.open		= zpl_common_open,
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE
+	.iterate	= zpl_snapdir_iterate,
+#else
 	.readdir	= zpl_snapdir_readdir,
+#endif
+
 };
 
 /*
@@ -458,42 +429,51 @@ zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
 	return d_splice_alias(ip, dentry);
 }
 
-/* ARGSUSED */
 static int
-zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
+zpl_shares_iterate(struct file *filp, struct dir_context *ctx)
 {
 	cred_t *cr = CRED();
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *ip = dentry->d_inode;
-	zfs_sb_t *zsb = ITOZSB(ip);
+	zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
 	znode_t *dzp;
-	int error;
+	int error = 0;
 
 	ZFS_ENTER(zsb);
 
 	if (zsb->z_shares_dir == 0) {
-		error = zpl_common_readdir(filp, dirent, filldir);
-		ZFS_EXIT(zsb);
-		return (error);
+		dir_emit_dots(filp, ctx);
+		goto out;
 	}
 
 	error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
-	if (error) {
-		ZFS_EXIT(zsb);
-		return (error);
-	}
+	if (error)
+		goto out;
 
 	crhold(cr);
-	error = -zfs_readdir(ZTOI(dzp), dirent, filldir, &filp->f_pos, cr);
+	error = -zfs_readdir(ZTOI(dzp), ctx, cr);
 	crfree(cr);
 
 	iput(ZTOI(dzp));
+out:
 	ZFS_EXIT(zsb);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
+#if !defined(HAVE_VFS_ITERATE)
+static int
+zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_shares_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* HAVE_VFS_ITERATE */
+
 /* ARGSUSED */
 static int
 zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -532,7 +512,12 @@ const struct file_operations zpl_fops_shares = {
 	.open		= zpl_common_open,
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE
+	.iterate	= zpl_shares_iterate,
+#else
 	.readdir	= zpl_shares_readdir,
+#endif
+
 };
 
 /*
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 2995891..6598c17 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -64,21 +64,34 @@ zpl_release(struct inode *ip, struct file *filp)
 }
 
 static int
-zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
+zpl_iterate(struct file *filp, struct dir_context *ctx)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	cred_t *cr = CRED();
 	int error;
 
 	crhold(cr);
-	error = -zfs_readdir(dentry->d_inode, dirent, filldir,
-	    &filp->f_pos, cr);
+	error = -zfs_readdir(dentry->d_inode, ctx, cr);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
+#if !defined(HAVE_VFS_ITERATE)
+static int
+zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* HAVE_VFS_ITERATE */
+
 #if defined(HAVE_FSYNC_WITH_DENTRY)
 /*
  * Linux 2.6.x - 2.6.34 API,
@@ -506,7 +519,11 @@ const struct file_operations zpl_file_operations = {
 const struct file_operations zpl_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE
+	.iterate	= zpl_iterate,
+#else
 	.readdir	= zpl_readdir,
+#endif
 	.fsync		= zpl_fsync,
 	.unlocked_ioctl = zpl_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
index eb2c00d..dca1ad6 100644
--- a/module/zfs/zpl_xattr.c
+++ b/module/zfs/zpl_xattr.c
@@ -80,6 +80,7 @@
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_znode.h>
+#include <sys/zap.h>
 #include <sys/vfs.h>
 #include <sys/zpl.h>
 
@@ -91,11 +92,8 @@ typedef struct xattr_filldir {
 } xattr_filldir_t;
 
 static int
-zpl_xattr_filldir(void *arg, const char *name, int name_len,
-    loff_t offset, uint64_t objnum, unsigned int d_type)
+zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
 {
-	xattr_filldir_t *xf = arg;
-
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
 		if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR))
 			return (0);
@@ -118,12 +116,46 @@ zpl_xattr_filldir(void *arg, const char *name, int name_len,
 	return (0);
 }
 
+/*
+ * Read as many directory entry names as will fit in to the provided buffer,
+ * or when no buffer is provided calculate the required buffer size.
+ */
+int
+zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
+{
+	zap_cursor_t zc;
+	zap_attribute_t	zap;
+	int error;
+
+	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
+
+	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
+
+		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
+			error = -ENXIO;
+			break;
+		}
+
+		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
+		if (error)
+			break;
+
+		zap_cursor_advance(&zc);
+	}
+
+	zap_cursor_fini(&zc);
+
+	if (error == -ENOENT)
+		error = 0;
+
+	return (error);
+}
+
 static ssize_t
 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
 {
 	struct inode *ip = xf->inode;
 	struct inode *dxip = NULL;
-	loff_t pos = 3;  /* skip '.', '..', and '.zfs' entries. */
 	int error;
 
 	/* Lookup the xattr directory */
@@ -135,8 +167,7 @@ zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
 		return (error);
 	}
 
-	/* Fill provided buffer via zpl_zattr_filldir helper */
-	error = -zfs_readdir(dxip, (void *)xf, zpl_xattr_filldir, &pos, cr);
+	error = zpl_xattr_readdir(dxip, xf);
 	iput(dxip);
 
 	return (error);
@@ -162,8 +193,8 @@ zpl_xattr_list_sa(xattr_filldir_t *xf)
 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
 
-		error = zpl_xattr_filldir((void *)xf, nvpair_name(nvp),
-		     strlen(nvpair_name(nvp)), 0, 0, 0);
+		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
+		     strlen(nvpair_name(nvp)));
 		if (error)
 			return (error);
 	}
-- 
1.8.3.1


From bff32e0972bbc07ba5f2b9ce5b965813d8edcf78 Mon Sep 17 00:00:00 2001
From: Richard Yao <ryao@gentoo.org>
Date: Sat, 10 Aug 2013 08:24:40 -0400
Subject: [PATCH 11/16] Implement database to workaround misreported physical
 sector sizes

This implements vdev_bdev_database_check(). It alters the detected
sector size of any device listed in a database of drives known to lie
about their physical sector sizes.

This is based on "6931570 Add flash devices' VID/PID to disk table to
advertising 4K physical sector size" from Open Solaris and on
sg_simple4.c from sg3_utils. About two dozen lines are taken from
sg_simple4.c, which is GPLv2 licensed. However, sg_simple4.c is
analogous to a Hello World program and is safe for us to use. We
requested that Douglas Gilbert, the author of sg_simple4.c, confirm that
this is the case. A cutdown version of his response is as follows:

```
I would consider a SCSI INQUIRY example using the Linux sg
driver interface (also written by me) as the equivalent of an
"hello world" program in C.
```

The database was created with the help of the freenode and ZFSOnLinux
communities.

Some notes:

1. The following drives both were confirmed to lie via reports in IRC
and they contain capacity information in their identifiers:

INTEL SSDSA2M080
INTEL SSDSA2M160
M4-CT256M4SSD2
WDC WD15EARS-00S
WDC WD15EARS-00Z
WDC WD20EARS-00M

The identifiers for different capacity models were extrapolated and
added under the assumption that those models also lie. Google was used
to verify that the extrapolated drive identifiers existed prior to their
inclusion.

2. The OCZ-VERTEX2 3.5 identifer applies to two drives that differ
solely in page size (and slightly in capacity). One uses 4096-byte pages
and the other uses 8192-byte pages. Both are set to use 8192-byte pages.
We could detect the page size by checking the capacity, but that would
unnecessarily complicate the code.

3. It is possible for updated drive firmware to correctly report the
sector size. There were reports of a few advanced format drives doing
that. One report stated that the vendor changed the identification
string while another was unclear on this. Both reports involved WDC
models.

4. Google was used to determine the size of pages in the listed flash
devices. Reports of 8192-byte pages took precedence over reports of
4096-byte pages.

5. Devices behind USB adapters can have their identification strings
altered. Identification strings obtained across USB adapters are
omitted and no attempt is made to correct for alterations made by USB
adapters when doing comparisons against the database. Two entries in the
Open Solaris database that appear to have been altered by a USB
adapter were omitted.

Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1652
---
 cmd/zpool/zpool_vdev.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 129 insertions(+), 4 deletions(-)

diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c
index b0b17b1..723e10b 100644
--- a/cmd/zpool/zpool_vdev.c
+++ b/cmd/zpool/zpool_vdev.c
@@ -67,6 +67,8 @@
 #include <libintl.h>
 #include <libnvpair.h>
 #include <limits.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -82,6 +84,7 @@
 #endif /* HAVE_LIBBLKID */
 
 #include "zpool_util.h"
+#include <sys/zfs_context.h>
 
 /*
  * For any given vdev specification, we can have multiple errors.  The
@@ -91,6 +94,115 @@
 boolean_t error_seen;
 boolean_t is_force;
 
+typedef struct vdev_disk_db_entry
+{
+	char id[24];
+	int sector_size;
+} vdev_disk_db_entry_t;
+
+/*
+ * Database of block devices that lie about physical sector sizes.  The
+ * identification string must be precisely 24 characters to avoid false
+ * negatives
+ */
+static vdev_disk_db_entry_t vdev_disk_database[] = {
+	{"ATA     Corsair Force 3 ", 8192},
+	{"ATA     INTEL SSDSA2CT04", 8192},
+	{"ATA     INTEL SSDSA2CW16", 8192},
+	{"ATA     INTEL SSDSC2CT18", 8192},
+	{"ATA     INTEL SSDSC2CW12", 8192},
+	{"ATA     KINGSTON SH100S3", 8192},
+	{"ATA     M4-CT064M4SSD2  ", 8192},
+	{"ATA     M4-CT128M4SSD2  ", 8192},
+	{"ATA     M4-CT256M4SSD2  ", 8192},
+	{"ATA     M4-CT512M4SSD2  ", 8192},
+	{"ATA     OCZ-AGILITY2    ", 8192},
+	{"ATA     OCZ-VERTEX2 3.5 ", 8192},
+	{"ATA     OCZ-VERTEX3     ", 8192},
+	{"ATA     OCZ-VERTEX3 LT  ", 8192},
+	{"ATA     OCZ-VERTEX3 MI  ", 8192},
+	{"ATA     SAMSUNG SSD 830 ", 8192},
+	{"ATA     Samsung SSD 840 ", 8192},
+	{"ATA     INTEL SSDSA2M040", 4096},
+	{"ATA     INTEL SSDSA2M080", 4096},
+	{"ATA     INTEL SSDSA2M160", 4096},
+	/* Imported from Open Solaris*/
+	{"ATA     MARVELL SD88SA02", 4096},
+	/* Advanced format Hard drives */
+	{"ATA     Hitachi HDS5C303", 4096},
+	{"ATA     SAMSUNG HD204UI ", 4096},
+	{"ATA     ST2000DL004 HD20", 4096},
+	{"ATA     WDC WD10EARS-00M", 4096},
+	{"ATA     WDC WD10EARS-00S", 4096},
+	{"ATA     WDC WD10EARS-00Z", 4096},
+	{"ATA     WDC WD15EARS-00M", 4096},
+	{"ATA     WDC WD15EARS-00S", 4096},
+	{"ATA     WDC WD15EARS-00Z", 4096},
+	{"ATA     WDC WD20EARS-00M", 4096},
+	{"ATA     WDC WD20EARS-00S", 4096},
+	{"ATA     WDC WD20EARS-00Z", 4096},
+	/* Virtual disks: Assume zvols with default volblocksize */
+#if 0
+	{"ATA     QEMU HARDDISK   ", 8192},
+	{"IET     VIRTUAL-DISK    ", 8192},
+	{"OI      COMSTAR         ", 8192},
+#endif
+};
+
+static const int vdev_disk_database_size =
+	sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
+
+#define	INQ_REPLY_LEN	96
+#define	INQ_CMD_LEN	6
+
+static boolean_t
+check_sector_size_database(char *path, int *sector_size)
+{
+	unsigned char inq_buff[INQ_REPLY_LEN];
+	unsigned char sense_buffer[32];
+	unsigned char inq_cmd_blk[INQ_CMD_LEN] =
+	    {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
+	sg_io_hdr_t io_hdr;
+	int error;
+	int fd;
+	int i;
+
+	/* Prepare INQUIRY command */
+	memset(&io_hdr, 0, sizeof(sg_io_hdr_t));
+	io_hdr.interface_id = 'S';
+	io_hdr.cmd_len = sizeof(inq_cmd_blk);
+	io_hdr.mx_sb_len = sizeof(sense_buffer);
+	io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+	io_hdr.dxfer_len = INQ_REPLY_LEN;
+	io_hdr.dxferp = inq_buff;
+	io_hdr.cmdp = inq_cmd_blk;
+	io_hdr.sbp = sense_buffer;
+	io_hdr.timeout = 10;        /* 10 milliseconds is ample time */
+
+	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
+		return (B_FALSE);
+
+	error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
+
+	(void) close(fd);
+
+	if (error < 0)
+		return (B_FALSE);
+
+	if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
+		return (B_FALSE);
+
+	for (i = 0; i < vdev_disk_database_size; i++) {
+		if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
+			continue;
+
+		*sector_size = vdev_disk_database[i].sector_size;
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
 /*PRINTFLIKE1*/
 static void
 vdev_error(const char *fmt, ...)
@@ -459,6 +571,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 	nvlist_t *vdev = NULL;
 	char *type = NULL;
 	boolean_t wholedisk = B_FALSE;
+	uint64_t ashift = 0;
 	int err;
 
 	/*
@@ -544,19 +657,31 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 		    (uint64_t)wholedisk) == 0);
 
+	/*
+	 * Override defaults if custom properties are provided.
+	 */
 	if (props != NULL) {
-		uint64_t ashift = 0;
 		char *value = NULL;
 
 		if (nvlist_lookup_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0)
 			zfs_nicestrtonum(NULL, value, &ashift);
+	}
 
-		if (ashift > 0)
-			verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT,
-			    ashift) == 0);
+	/*
+	 * If the device is known to incorrectly report its physical sector
+	 * size explicitly provide the known correct value.
+	 */
+	if (ashift == 0) {
+		int sector_size;
+
+		if (check_sector_size_database(path, &sector_size) == B_TRUE)
+			ashift = highbit(sector_size) - 1;
 	}
 
+	if (ashift > 0)
+		nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift);
+
 	return (vdev);
 }
 
-- 
1.8.3.1


From c273d60d80958dea8edc3c6f5702c9c81ffbd8ea Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Thu, 22 Aug 2013 12:14:26 -0700
Subject: [PATCH 12/16] Revert "Evict meta data from ghost lists + l2arc
 headers"

This reverts commit fadd0c4da1e2ccd6014800d8b1a0fd117dd323e8 which
introduced a regression in honoring the meta limit.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Close #1660
---
 module/zfs/arc.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 32ad80b..ce4a023 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -2104,9 +2104,8 @@ arc_do_user_evicts(void)
 void
 arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
 {
-	int64_t delta, tmp = adjustment;
+	int64_t delta;
 
-	/* Evict MRU+MFU meta data to ghost lists */
 	if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
 		delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
 		arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA);
@@ -2116,24 +2115,9 @@ arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
 	if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
 		delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustment);
 		arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA);
-	}
-
-	/* Evict ghost MRU+MFU meta data */
-	adjustment = tmp;
-
-	if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
-		delta = MIN(arc_mru_ghost->arcs_size, adjustment);
-		arc_evict_ghost(arc_mru_ghost, 0, delta, ARC_BUFC_METADATA);
-		adjustment -= delta;
-	}
-
-	if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
-		delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
-		arc_evict_ghost(arc_mfu_ghost, 0, delta, ARC_BUFC_METADATA);
 		adjustment -= delta;
 	}
 
-	/* Request the VFS release some meta data */
 	if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
 		arc_do_user_prune(zfs_arc_meta_prune);
 }
-- 
1.8.3.1


From 6a7c0ccca44ad02c476a111d8f7911fc8b12fff7 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Thu, 22 Aug 2013 13:06:33 -0700
Subject: [PATCH 13/16] Use directory xattrs for symlinks

There is currently a subtle bug in the SA implementation which
can crop up which prevents us from safely using multiple variable
length SAs in one object.

Fortunately, the only existing use case for this are symlinks with
SA based xattrs.  Therefore, until the root cause in the SA code
can be identified and fixed we prevent adding SA xattrs to symlinks.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1468
---
 module/zfs/zpl_xattr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
index dca1ad6..d79d35b 100644
--- a/module/zfs/zpl_xattr.c
+++ b/module/zfs/zpl_xattr.c
@@ -438,6 +438,10 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
 		if (error == -ENOENT)
 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
 	} else {
+		/* Do not allow SA xattrs in symlinks (issue #1648) */
+		if (S_ISLNK(ip->i_mode))
+			return (-EMLINK);
+
 		/* Limited to 32k to keep nvpair memory allocations small */
 		if (size > DXATTR_MAX_ENTRY_SIZE)
 			return (-EFBIG);
-- 
1.8.3.1


From 0c28fb480836ab7bb1bbf8de6e572d2443273396 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 16 Aug 2013 15:20:07 -0700
Subject: [PATCH 14/16] Tag zfs-0.6.2

META file and release log updated.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 META                         | 2 +-
 rpm/generic/zfs-dkms.spec.in | 2 ++
 rpm/generic/zfs-kmod.spec.in | 2 ++
 rpm/generic/zfs.spec.in      | 2 ++
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/META b/META
index a65dc29..4b8a111 100644
--- a/META
+++ b/META
@@ -1,7 +1,7 @@
 Meta:         1
 Name:         zfs
 Branch:       1.0
-Version:      0.6.1
+Version:      0.6.2
 Release:      1
 Release-Tags: relext
 License:      CDDL
diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in
index c758baa..a4b0b36 100644
--- a/rpm/generic/zfs-dkms.spec.in
+++ b/rpm/generic/zfs-dkms.spec.in
@@ -68,5 +68,7 @@ dkms remove -m %{module} -v %{version} --all --rpm_safe_upgrade
 exit 0
 
 %changelog
+* Wed Aug 21 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.2-1
+- Released 0.6.2-1
 * Fri Mar 22 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.1-1
 - First official stable release.
diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in
index 2ea4bcb..4eb25e0 100644
--- a/rpm/generic/zfs-kmod.spec.in
+++ b/rpm/generic/zfs-kmod.spec.in
@@ -166,5 +166,7 @@ chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/*
 rm -rf $RPM_BUILD_ROOT
 
 %changelog
+* Wed Aug 21 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.2-1
+- Released 0.6.2-1
 * Fri Mar 22 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.1-1
 - First official stable release.
diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
index c832404..0797124 100644
--- a/rpm/generic/zfs.spec.in
+++ b/rpm/generic/zfs.spec.in
@@ -153,5 +153,7 @@ exit 0
 %{_dracutdir}/modules.d/*
 
 %changelog
+* Wed Aug 21 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.2-1
+- Released 0.6.2-1
 * Fri Mar 22 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.1-1
 - First official stable release.
-- 
1.8.3.1


From a36bf1149d7408fde273851350c0ef6c0e4fe989 Mon Sep 17 00:00:00 2001
From: Ralf Ertzinger <ralf@skytale.net>
Date: Wed, 23 Oct 2013 10:33:33 +0200
Subject: [PATCH 15/16] Introduce zpool_get_prop_literal interface

This change introduces zpool_get_prop_literal. It's an expanded version
of zpool_get_prop taking one additional boolean parameter. With this
parameter set to B_FALSE it will behave identically to zpool_get_prop.
Setting it to B_TRUE will return full precision numbers for the
following properties:

ZPOOL_PROP_SIZE
ZPOOL_PROP_ALLOCATED
ZPOOL_PROP_FREE
ZPOOL_PROP_FREEING
ZPOOL_PROP_EXPANDSZ
ZPOOL_PROP_ASHIFT

Also introduced is a wrapper function for zpool_get_prop making it
use zpool_get_prop_literal in the background.
---
 include/libzfs.h         |  2 ++
 lib/libzfs/libzfs_pool.c | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/libzfs.h b/include/libzfs.h
index 3472b76..145b5a3 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -272,6 +272,8 @@ extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
 extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
 extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
     size_t proplen, zprop_source_t *);
+extern int zpool_get_prop_literal(zpool_handle_t *, zpool_prop_t, char *,
+    size_t proplen, zprop_source_t *, boolean_t literal);
 extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
     zprop_source_t *);
 
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index a6cacd3..468243c 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -235,13 +235,23 @@ zpool_pool_state_to_name(pool_state_t state)
 }
 
 /*
- * Get a zpool property value for 'prop' and return the value in
- * a pre-allocated buffer.
+ * API compatibility wrapper around zpool_get_prop_literal
  */
 int
 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
     zprop_source_t *srctype)
 {
+    return zpool_get_prop_literal(zhp, prop, buf, len, srctype, B_FALSE);
+}
+
+/*
+ * Get a zpool property value for 'prop' and return the value in
+ * a pre-allocated buffer.
+ */
+int
+zpool_get_prop_literal(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
+    zprop_source_t *srctype, boolean_t literal)
+{
 	uint64_t intval;
 	const char *strval;
 	zprop_source_t src = ZPROP_SRC_NONE;
@@ -307,7 +317,11 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
 		case ZPOOL_PROP_FREEING:
 		case ZPOOL_PROP_EXPANDSZ:
 		case ZPOOL_PROP_ASHIFT:
-			(void) zfs_nicenum(intval, buf, len);
+			if (literal)
+				(void) snprintf(buf, len, "%llu",
+					(u_longlong_t)intval);
+			else
+				(void) zfs_nicenum(intval, buf, len);
 			break;
 
 		case ZPOOL_PROP_CAPACITY:
-- 
1.8.3.1


From f27eacc7529a5e397382425219f1fb20602563ea Mon Sep 17 00:00:00 2001
From: Ralf Ertzinger <ralf@skytale.net>
Date: Wed, 23 Oct 2013 10:50:48 +0200
Subject: [PATCH 16/16] Add -p switch to "zpool get"

This works the same as the -p switch to "zfs get", displaying full
resolution values for appropriate attributes.
---
 cmd/zpool/zpool_main.c | 34 +++++++++++++++++++++++++++-------
 man/man8/zpool.8       | 15 +++++++++++++--
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index b96fbe4..2632e8c 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -265,7 +265,7 @@ get_usage(zpool_help_t idx) {
 	case HELP_EVENTS:
 		return (gettext("\tevents [-vHfc]\n"));
 	case HELP_GET:
-		return (gettext("\tget <\"all\" | property[,...]> "
+		return (gettext("\tget [-p] <\"all\" | property[,...]> "
 		    "<pool> ...\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> <pool> \n"));
@@ -3098,6 +3098,9 @@ zpool_do_list(int argc, char **argv)
 
 	argc -= optind;
 	argv += optind;
+	fprintf(stderr, "argc = %d\n", argc);
+	for (c=0; c<argc; c++)
+		fprintf(stderr, "argv[%d] = %s\n", argc, argv[c]);
 
 	get_interval_count(&argc, argv, &interval, &count);
 
@@ -5466,8 +5469,8 @@ get_callback(zpool_handle_t *zhp, void *data)
 				    NULL, NULL);
 			}
 		} else {
-			if (zpool_get_prop(zhp, pl->pl_prop, value,
-			    sizeof (value), &srctype) != 0)
+			if (zpool_get_prop_literal(zhp, pl->pl_prop, value,
+			    sizeof (value), &srctype, cbp->cb_literal) != 0)
 				continue;
 
 			zprop_print_one_property(zpool_get_name(zhp), cbp,
@@ -5483,9 +5486,26 @@ zpool_do_get(int argc, char **argv)
 {
 	zprop_get_cbdata_t cb = { 0 };
 	zprop_list_t fake_name = { 0 };
-	int ret;
+	int c, ret;
 
-	if (argc < 2) {
+	/* check options */
+	while ((c = getopt(argc, argv, "p")) != -1) {
+		switch (c) {
+		case 'p':
+			cb.cb_literal = B_TRUE;
+			break;
+
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
 		usage(B_FALSE);
@@ -5499,7 +5519,7 @@ zpool_do_get(int argc, char **argv)
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_POOL;
 
-	if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
+	if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist,
 	    ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
@@ -5510,7 +5530,7 @@ zpool_do_get(int argc, char **argv)
 		cb.cb_proplist = &fake_name;
 	}
 
-	ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist,
+	ret = for_each_pool(argc - 1, argv + 1, B_TRUE, &cb.cb_proplist,
 	    get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
diff --git a/man/man8/zpool.8 b/man/man8/zpool.8
index b4b0f46..82ef204 100644
--- a/man/man8/zpool.8
+++ b/man/man8/zpool.8
@@ -62,7 +62,7 @@ zpool \- configures ZFS storage pools
 
 .LP
 .nf
-\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
+\fBzpool get\fR [\fB-p\fR] "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
 .fi
 
 .LP
@@ -1036,7 +1036,7 @@ This command will forcefully export the pool even if it has a shared spare that
 .ne 2
 .mk
 .na
-\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
+\fB\fBzpool get\fR [\fB-p\fR] "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
 .ad
 .sp .6
 .RS 4n
@@ -1053,6 +1053,17 @@ Retrieves the given list of properties (or all properties if "\fBall\fR" is used
 .sp
 
 See the "Properties" section for more information on the available pool properties.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-p\fR\fR
+.ad
+.RS 6n
+.rt
+Display numbers in parseable (exact) values.
+.RE
+
 .RE
 
 .sp
-- 
1.8.3.1