vattr.va_uid = crgetuid(kcred);
vattr.va_gid = crgetgid(kcred);
- sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
sharezp->z_moved = 0;
sharezp->z_unlinked = 0;
sharezp->z_atime_dirty = 0;
{
znode_t *zp;
- zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
*ip = ZTOI(zp);
return (0);
static void
zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
{
- uint64_t rdev;
+ uint64_t rdev = 0;
switch (ip->i_mode & S_IFMT) {
case S_IFREG:
ip->i_op = &zpl_symlink_inode_operations;
break;
+ /*
+ * rdev is only stored in a SA only for device files.
+ */
case S_IFCHR:
case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb),
&rdev, sizeof (rdev)) == 0);
+ /*FALLTHROUGH*/
+ case S_IFIFO:
+ case S_IFSOCK:
init_special_inode(ip, ip->i_mode, rdev);
ip->i_op = &zpl_special_inode_operations;
break;
*/
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
- dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
+ dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
+ struct dentry *dentry, struct inode *dip)
{
znode_t *zp;
struct inode *ip;
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
+ zp->z_is_zvol = 0;
zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
}
ip->i_ino = obj;
- ip->i_mode = zp->z_mode;
- ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC;
+ zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);
if (insert_inode_locked(ip))
goto error;
+ if (dentry) {
+ if (zpl_xattr_security_init(ip, dip, &dentry->d_name))
+ goto error;
+
+ d_instantiate(dentry, ip);
+ }
+
mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
membar_producer();
size = links = 0;
}
- if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))
+ if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
rdev = vap->va_rdev;
parent = dzp->z_id;
&empty_xattr, 8);
}
if (obj_type == DMU_OT_ZNODE ||
- (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) {
+ (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
NULL, &rdev, 8);
}
VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
if (!(flag & IS_ROOT_NODE)) {
- *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
+ *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
+ vap->va_dentry, ZTOI(dzp));
ASSERT(*zpp != NULL);
ASSERT(dzp != NULL);
- err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp));
- ASSERT3S(err, ==, 0);
} else {
/*
* If we are creating the root node, the "parent" we
ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
+/*
+ * zfs_xvattr_set only updates the in-core attributes
+ * it is assumed the caller will be doing an sa_bulk_update
+ * to push the changes out
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+ xoptattr_t *xoap;
+
+ xoap = xva_getxoptattr(xvap);
+ ASSERT(xoap);
+
+ if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+ uint64_t times[2];
+ ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+ ×, sizeof (times), tx);
+ XVA_SET_RTN(xvap, XAT_CREATETIME);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_READONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+ ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_HIDDEN);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+ ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SYSTEM);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+ ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_ARCHIVE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+ ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+ ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NOUNLINK);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_APPENDONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+ ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NODUMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+ ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OPAQUE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+ xoap->xoa_av_quarantined, zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+ zfs_sa_set_scanstamp(zp, xvap, tx);
+ XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_REPARSE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+ ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OFFLINE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SPARSE);
+ }
+}
+
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
znode_t *zp;
int err;
sa_handle_t *hdl;
+ struct inode *ip;
*zpp = NULL;
+again:
+ ip = ilookup(zsb->z_sb, obj_num);
+
ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (EINVAL);
}
hdl = dmu_buf_get_user(db);
if (hdl != NULL) {
- zp = sa_get_userdata(hdl);
+ if (ip == NULL) {
+ /*
+ * ilookup returned NULL, which means
+ * the znode is dying - but the SA handle isn't
+ * quite dead yet, we need to drop any locks
+ * we're holding, re-schedule the task and try again.
+ */
+ sa_buf_rele(db, NULL);
+ ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+
+ schedule();
+ goto again;
+ }
+ zp = sa_get_userdata(hdl);
/*
* Since "SA" does immediate eviction we
sa_buf_rele(db, NULL);
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
+ ASSERT3P(ip, ==, NULL);
+
/*
* Not found create new znode/vnode but only if file exists.
*
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
- doi.doi_bonus_type, obj_num, NULL);
+ doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
if (zp == NULL) {
err = ENOENT;
} else {
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
+ boolean_t drop_mutex = 0;
ASSERT(zp->z_sa_hdl);
/*
- * Don't allow a zfs_zget() while were trying to release this znode
+ * Don't allow a zfs_zget() while were trying to release this znode.
+ *
+ * Linux allows direct memory reclaim which means that any KM_SLEEP
+ * allocation may trigger inode eviction. This can lead to a deadlock
+ * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
+ * zfs_zinactive() call path. To avoid this deadlock the process
+ * must not reacquire the mutex when it is already holding it.
*/
- ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
+ ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ drop_mutex = 1;
+ }
+
mutex_enter(&zp->z_lock);
/*
*/
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
zfs_rmnode(zp);
return;
}
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
}
void
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
}
-#ifdef HAVE_MMAP
-/*
- * This is a dummy interface used when pvn_vplist_dirty() should *not*
- * be calling back into the fs for a putpage(). E.g.: when truncating
- * a file, the pages being "thrown away* don't need to be written out.
- */
-/* ARGSUSED */
-static int
-zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
- int flags, cred_t *cr)
-{
- ASSERT(0);
- return (0);
-}
-#endif /* HAVE_MMAP */
-
/*
* Increase the file length
*
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
-#ifdef HAVE_MANDLOCKS
struct inode *ip = ZTOI(zp);
-#endif /* HAVE_MANDLOCKS */
dmu_tx_t *tx;
zfs_sb_t *zsb = ZTOZSB(zp);
zilog_t *zilog = zsb->z_log;
return (error);
}
-#ifdef HAVE_MANDLOCKS
/*
* Check for any locks in the region to be freed.
*/
-
- if (MANDLOCK(ip, (mode_t)mode)) {
+ if (ip->i_flock && mandatory_lock(ip)) {
uint64_t length = (len ? len : zp->z_size - off);
- if (error = chklock(ip, FWRITE, off, length, flag, NULL))
- return (error);
+ if (!lock_may_write(ip, off, length))
+ return (EAGAIN);
}
-#endif /* HAVE_MANDLOCKS */
if (len == 0) {
error = zfs_trunc(zp, off);