X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=module%2Fzfs%2Fzfs_znode.c;h=17a5821eb415c7e8e4db019b2850281b436070bf;hb=f0b2486034f6a4d8cd1a139ff4604e6b9629a64e;hp=56ac2ab8c3a0a7dd10c6d6dc0921b415bc28ec0a;hpb=d8fd10545b677cb4b770674372c1e116b7c22c64;p=zfs.git

diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 56ac2ab..17a5821 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -174,7 +174,7 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
 	vattr.va_uid = crgetuid(kcred);
 	vattr.va_gid = crgetgid(kcred);
 
-	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
 	sharezp->z_moved = 0;
 	sharezp->z_unlinked = 0;
 	sharezp->z_atime_dirty = 0;
@@ -206,40 +206,6 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
 #endif /* HAVE_SHARE */
 }
 
-/*
- * define a couple of values we need available
- * for both 64 and 32 bit environments.
- */
-#ifndef NBITSMINOR64
-#define	NBITSMINOR64	32
-#endif
-#ifndef MAXMAJ64
-#define	MAXMAJ64	0xffffffffUL
-#endif
-#ifndef	MAXMIN64
-#define	MAXMIN64	0xffffffffUL
-#endif
-
-/*
- * Create special expldev for ZFS private use.
- * Can't use standard expldev since it doesn't do
- * what we want.  The standard expldev() takes a
- * dev32_t in LP64 and expands it to a long dev_t.
- * We need an interface that takes a dev32_t in ILP32
- * and expands it to a long dev_t.
- */
-static uint64_t
-zfs_expldev(dev_t dev)
-{
-#ifndef _LP64
-	major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
-	return (((uint64_t)major << NBITSMINOR64) |
-	    ((minor_t)dev & MAXMIN32));
-#else
-	return (dev);
-#endif
-}
-
 static void
 zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
@@ -282,7 +248,7 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
 {
 	znode_t *zp;
 
-	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
 	*ip = ZTOI(zp);
 
 	return (0);
@@ -312,7 +278,7 @@ zfs_inode_destroy(struct inode *ip)
 static void
 zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
 {
-	uint64_t rdev;
+	uint64_t rdev = 0;
 
 	switch (ip->i_mode & S_IFMT) {
 	case S_IFREG:
@@ -331,12 +297,16 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
 		ip->i_op = &zpl_symlink_inode_operations;
 		break;
 
+	/*
+	 * rdev is only stored in a SA only for device files.
+	 */
 	case S_IFCHR:
 	case S_IFBLK:
-	case S_IFIFO:
-	case S_IFSOCK:
 		VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb),
 		    &rdev, sizeof (rdev)) == 0);
+		/*FALLTHROUGH*/
+	case S_IFIFO:
+	case S_IFSOCK:
 		init_special_inode(ip, ip->i_mode, rdev);
 		ip->i_op = &zpl_special_inode_operations;
 		break;
@@ -356,7 +326,8 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
  */
 static znode_t *
 zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
-    dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
+    dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
+    struct dentry *dentry)
 {
 	znode_t	*zp;
 	struct inode *ip;
@@ -381,6 +352,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
+	zp->z_is_zvol = 0;
 
 	zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
 
@@ -405,13 +377,15 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
 	}
 
 	ip->i_ino = obj;
-	ip->i_mode = zp->z_mode;
-	ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC;
+	zfs_inode_update(zp);
 	zfs_inode_set_ops(zsb, ip);
 
 	if (insert_inode_locked(ip))
 		goto error;
 
+	if (dentry)
+		d_instantiate(dentry, ip);
+
 	mutex_enter(&zsb->z_znodes_lock);
 	list_insert_tail(&zsb->z_all_znodes, zp);
 	membar_producer();
@@ -586,9 +560,8 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 		size = links = 0;
 	}
 
-	if (S_ISBLK(vap->va_mode)  || S_ISCHR(vap->va_mode) ||
-	    S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))
-		rdev = zfs_expldev(vap->va_rdev);
+	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
+		rdev = vap->va_rdev;
 
 	parent = dzp->z_id;
 	mode = acl_ids->z_mode;
@@ -678,8 +651,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 		    &empty_xattr, 8);
 	}
 	if (obj_type == DMU_OT_ZNODE ||
-	    (S_ISBLK(vap->va_mode)  || S_ISCHR(vap->va_mode) ||
-	     S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) {
+	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
 		    NULL, &rdev, 8);
 	}
@@ -708,7 +680,8 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
 
 	if (!(flag & IS_ROOT_NODE)) {
-		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
+		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
+		    vap->va_dentry);
 		ASSERT(*zpp != NULL);
 		ASSERT(dzp != NULL);
 		err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp));
@@ -735,6 +708,102 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 	ZFS_OBJ_HOLD_EXIT(zsb, obj);
 }
 
+/*
+ * zfs_xvattr_set only updates the in-core attributes
+ * it is assumed the caller will be doing an sa_bulk_update
+ * to push the changes out
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+	xoptattr_t *xoap;
+
+	xoap = xva_getxoptattr(xvap);
+	ASSERT(xoap);
+
+	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+		uint64_t times[2];
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+		    &times, sizeof (times), tx);
+		XVA_SET_RTN(xvap, XAT_CREATETIME);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_READONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_HIDDEN);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SYSTEM);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_ARCHIVE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NOUNLINK);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_APPENDONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NODUMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OPAQUE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+		zfs_sa_set_scanstamp(zp, xvap, tx);
+		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_REPARSE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OFFLINE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SPARSE);
+	}
+}
+
 int
 zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
 {
@@ -743,14 +812,19 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
 	znode_t		*zp;
 	int err;
 	sa_handle_t	*hdl;
+	struct inode	*ip;
 
 	*zpp = NULL;
 
+again:
+	ip = ilookup(zsb->z_sb, obj_num);
+
 	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
 
 	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
 	if (err) {
 		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+		iput(ip);
 		return (err);
 	}
 
@@ -761,13 +835,27 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+		iput(ip);
 		return (EINVAL);
 	}
 
 	hdl = dmu_buf_get_user(db);
 	if (hdl != NULL) {
-		zp  = sa_get_userdata(hdl);
+		if (ip == NULL) {
+			/*
+			 * ilookup returned NULL, which means
+			 * the znode is dying - but the SA handle isn't
+			 * quite dead yet, we need to drop any locks
+			 * we're holding, re-schedule the task and try again.
+			 */
+			sa_buf_rele(db, NULL);
+			ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+
+			schedule();
+			goto again;
+		}
 
+		zp = sa_get_userdata(hdl);
 
 		/*
 		 * Since "SA" does immediate eviction we
@@ -789,9 +877,12 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
 		sa_buf_rele(db, NULL);
 		mutex_exit(&zp->z_lock);
 		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+		iput(ip);
 		return (err);
 	}
 
+	ASSERT3P(ip, ==, NULL);
+
 	/*
 	 * Not found create new znode/vnode but only if file exists.
 	 *
@@ -803,7 +894,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
 	 * bonus buffer.
 	 */
 	zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
-	    doi.doi_bonus_type, obj_num, NULL);
+	    doi.doi_bonus_type, obj_num, NULL, NULL);
 	if (zp == NULL) {
 		err = ENOENT;
 	} else {
@@ -917,13 +1008,24 @@ zfs_zinactive(znode_t *zp)
 {
 	zfs_sb_t *zsb = ZTOZSB(zp);
 	uint64_t z_id = zp->z_id;
+	boolean_t drop_mutex = 0;
 
 	ASSERT(zp->z_sa_hdl);
 
 	/*
-	 * Don't allow a zfs_zget() while were trying to release this znode
+	 * Don't allow a zfs_zget() while were trying to release this znode.
+	 *
+	 * Linux allows direct memory reclaim which means that any KM_SLEEP
+	 * allocation may trigger inode eviction.  This can lead to a deadlock
+	 * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
+	 * zfs_zinactive() call path.  To avoid this deadlock the process
+	 * must not reacquire the mutex when it is already holding it.
 	 */
-	ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+	if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
+		ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+		drop_mutex = 1;
+	}
+
 	mutex_enter(&zp->z_lock);
 
 	/*
@@ -932,14 +1034,19 @@ zfs_zinactive(znode_t *zp)
 	 */
 	if (zp->z_unlinked) {
 		mutex_exit(&zp->z_lock);
-		ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+		if (drop_mutex)
+			ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
 		zfs_rmnode(zp);
 		return;
 	}
 
 	mutex_exit(&zp->z_lock);
 	zfs_znode_dmu_fini(zp);
-	ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+	if (drop_mutex)
+		ZFS_OBJ_HOLD_EXIT(zsb, z_id);
 }
 
 void
@@ -1012,22 +1119,6 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
 }
 
-#ifdef HAVE_MMAP
-/*
- * This is a dummy interface used when pvn_vplist_dirty() should *not*
- * be calling back into the fs for a putpage().  E.g.: when truncating
- * a file, the pages being "thrown away* don't need to be written out.
- */
-/* ARGSUSED */
-static int
-zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
-    int flags, cred_t *cr)
-{
-	ASSERT(0);
-	return (0);
-}
-#endif /* HAVE_MMAP */
-
 /*
  * Increase the file length
  *