X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_znode.c;h=076161a8576b8e85aef0efffb69b137db2950a8e;hb=5c03efc379693f992ebe39c6a00c7297c4a304ea;hp=1017414f7b1cc72a7584c639192605683bb0d986;hpb=691f6ac4c2858d64afc2a0dc1bd2b8c041d68502;p=zfs.git diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 1017414..076161a 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -278,7 +278,7 @@ zfs_inode_destroy(struct inode *ip) static void zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) { - uint64_t rdev; + uint64_t rdev = 0; switch (ip->i_mode & S_IFMT) { case S_IFREG: @@ -297,12 +297,16 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) ip->i_op = &zpl_symlink_inode_operations; break; + /* + * rdev is only stored in a SA only for device files. + */ case S_IFCHR: case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev, sizeof (rdev)) == 0); + /*FALLTHROUGH*/ + case S_IFIFO: + case S_IFSOCK: init_special_inode(ip, ip->i_mode, rdev); ip->i_op = &zpl_special_inode_operations; break; @@ -322,7 +326,8 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) */ static znode_t * zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, - dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) + dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl, + struct dentry *dentry, struct inode *dip) { znode_t *zp; struct inode *ip; @@ -372,13 +377,19 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, } ip->i_ino = obj; - ip->i_mode = zp->z_mode; - ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC; + zfs_inode_update(zp); zfs_inode_set_ops(zsb, ip); if (insert_inode_locked(ip)) goto error; + if (dentry) { + if (zpl_xattr_security_init(ip, dip, &dentry->d_name)) + goto error; + + d_instantiate(dentry, ip); + } + mutex_enter(&zsb->z_znodes_lock); list_insert_tail(&zsb->z_all_znodes, zp); membar_producer(); @@ -553,8 +564,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, size = links = 0; } - if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) || - S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode)) + if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) rdev = vap->va_rdev; parent = dzp->z_id; @@ -645,8 +655,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || - (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) || - S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) { + (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); } @@ -675,11 +684,10 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { - *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl); + *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl, + vap->va_dentry, ZTOI(dzp)); ASSERT(*zpp != NULL); ASSERT(dzp != NULL); - err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp)); - ASSERT3S(err, ==, 0); } else { /* * If we are creating the root node, the "parent" we @@ -806,14 +814,19 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) znode_t *zp; int err; sa_handle_t *hdl; + struct inode *ip; *zpp = NULL; +again: + ip = ilookup(zsb->z_sb, obj_num); + ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); + iput(ip); return (err); } @@ -824,13 +837,27 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); + iput(ip); return (EINVAL); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { - zp = sa_get_userdata(hdl); + if (ip == NULL) { + /* + * ilookup returned NULL, which means + * the znode is dying - but the SA handle isn't + * quite dead yet, we need to drop any locks + * we're holding, re-schedule the task and try again. + */ + sa_buf_rele(db, NULL); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); + + schedule(); + goto again; + } + zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we @@ -852,9 +879,12 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) sa_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); + iput(ip); return (err); } + ASSERT3P(ip, ==, NULL); + /* * Not found create new znode/vnode but only if file exists. * @@ -866,7 +896,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) * bonus buffer. */ zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, - doi.doi_bonus_type, obj_num, NULL); + doi.doi_bonus_type, obj_num, NULL, NULL, NULL); if (zp == NULL) { err = ENOENT; } else { @@ -980,13 +1010,24 @@ zfs_zinactive(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; + boolean_t drop_mutex = 0; ASSERT(zp->z_sa_hdl); /* - * Don't allow a zfs_zget() while were trying to release this znode + * Don't allow a zfs_zget() while were trying to release this znode. + * + * Linux allows direct memory reclaim which means that any KM_SLEEP + * allocation may trigger inode eviction. This can lead to a deadlock + * through the ->shrink_icache_memory()->evict()->zfs_inactive()-> + * zfs_zinactive() call path. To avoid this deadlock the process + * must not reacquire the mutex when it is already holding it. */ - ZFS_OBJ_HOLD_ENTER(zsb, z_id); + if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) { + ZFS_OBJ_HOLD_ENTER(zsb, z_id); + drop_mutex = 1; + } + mutex_enter(&zp->z_lock); /* @@ -995,14 +1036,19 @@ zfs_zinactive(znode_t *zp) */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + + if (drop_mutex) + ZFS_OBJ_HOLD_EXIT(zsb, z_id); + zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + + if (drop_mutex) + ZFS_OBJ_HOLD_EXIT(zsb, z_id); } void @@ -1075,22 +1121,6 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } -#ifdef HAVE_MMAP -/* - * This is a dummy interface used when pvn_vplist_dirty() should *not* - * be calling back into the fs for a putpage(). E.g.: when truncating - * a file, the pages being "thrown away* don't need to be written out. - */ -/* ARGSUSED */ -static int -zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, - int flags, cred_t *cr) -{ - ASSERT(0); - return (0); -} -#endif /* HAVE_MMAP */ - /* * Increase the file length *