* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
* pushing cached pages (which acquires range locks) and syncing out
* cached atime changes. Third, zfs_zinactive() may require a new tx,
* which could deadlock the system if you were already holding one.
+ * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
*
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
* as they can span dmu_tx_assign() calls.
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+ /*
+ * Clean up any locks held by this process on the vp.
+ */
+ cleanlocks(vp, ddi_get_pid(), 0);
+ cleanshares(vp, ddi_get_pid());
+
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if ((flag & (FSYNC | FDSYNC)) && (count == 1))
atomic_dec_32(&zp->z_sync_cnt);
- /*
- * Clean up any locks held by this process on the vp.
- */
- cleanlocks(vp, ddi_get_pid(), 0);
- cleanshares(vp, ddi_get_pid());
-
if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
ZTOV(zp)->v_type == VREG &&
!(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) &&
*
* On Write: If we find a memory mapped page, we write to *both*
* the page and the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- * the file is memory mapped.
*/
-static int
-mappedwrite(vnode_t *vp, int nbytes, uio_t *uio, dmu_tx_t *tx)
+static void
+update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid)
{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int64_t start, off;
- int len = nbytes;
- int error = 0;
+ int64_t off;
- start = uio->uio_loffset;
off = start & PAGEOFFSET;
for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
page_t *pp;
- uint64_t bytes = MIN(PAGESIZE - off, len);
- uint64_t woff = uio->uio_loffset;
+ uint64_t nbytes = MIN(PAGESIZE - off, len);
- /*
- * We don't want a new page to "appear" in the middle of
- * the file update (because it may not get the write
- * update data), so we grab a lock to block
- * zfs_getpage().
- */
- rw_enter(&zp->z_map_lock, RW_WRITER);
if (pp = page_lookup(vp, start, SE_SHARED)) {
caddr_t va;
- rw_exit(&zp->z_map_lock);
va = zfs_map_page(pp, S_WRITE);
- error = uiomove(va+off, bytes, UIO_WRITE, uio);
- if (error == 0) {
- dmu_write(zfsvfs->z_os, zp->z_id,
- woff, bytes, va+off, tx);
- }
+ (void) dmu_read(os, oid, start+off, nbytes, va+off,
+ DMU_READ_PREFETCH);
zfs_unmap_page(pp, va);
page_unlock(pp);
- } else {
- error = dmu_write_uio(zfsvfs->z_os, zp->z_id,
- uio, bytes, tx);
- rw_exit(&zp->z_map_lock);
}
- len -= bytes;
+ len -= nbytes;
off = 0;
- if (error)
- break;
}
- return (error);
}
/*
int max_blksz = zfsvfs->z_max_blksz;
uint64_t pflags;
int error;
+ arc_buf_t *abuf;
/*
* Fasttrack empty write
* and allows us to do more fine-grained space accounting.
*/
while (n > 0) {
+ abuf = NULL;
+ woff = uio->uio_loffset;
+
+again:
+ if (zfs_usergroup_overquota(zfsvfs,
+ B_FALSE, zp->z_phys->zp_uid) ||
+ zfs_usergroup_overquota(zfsvfs,
+ B_TRUE, zp->z_phys->zp_gid)) {
+ if (abuf != NULL)
+ dmu_return_arcbuf(abuf);
+ error = EDQUOT;
+ break;
+ }
+
+ /*
+ * If dmu_assign_arcbuf() is expected to execute with minimum
+ * overhead loan an arc buffer and copy user data to it before
+ * we enter a txg. This avoids holding a txg forever while we
+ * pagefault on a hanging NFS server mapping.
+ */
+ if (abuf == NULL && n >= max_blksz &&
+ woff >= zp->z_phys->zp_size &&
+ P2PHASE(woff, max_blksz) == 0 &&
+ zp->z_blksz == max_blksz) {
+ size_t cbytes;
+
+ abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz);
+ ASSERT(abuf != NULL);
+ ASSERT(arc_buf_size(abuf) == max_blksz);
+ if (error = uiocopy(abuf->b_data, max_blksz,
+ UIO_WRITE, uio, &cbytes)) {
+ dmu_return_arcbuf(abuf);
+ break;
+ }
+ ASSERT(cbytes == max_blksz);
+ }
+
/*
* Start a transaction.
*/
- woff = uio->uio_loffset;
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
- continue;
+ goto again;
}
dmu_tx_abort(tx);
+ if (abuf != NULL)
+ dmu_return_arcbuf(abuf);
break;
}
* Perhaps we should use SPA_MAXBLOCKSIZE chunks?
*/
nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
- rw_enter(&zp->z_map_lock, RW_READER);
- tx_bytes = uio->uio_resid;
- if (vn_has_cached_data(vp)) {
- rw_exit(&zp->z_map_lock);
- error = mappedwrite(vp, nbytes, uio, tx);
+ if (abuf == NULL) {
+ tx_bytes = uio->uio_resid;
+ error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio,
+ nbytes, tx);
+ tx_bytes -= uio->uio_resid;
} else {
- error = dmu_write_uio(zfsvfs->z_os, zp->z_id,
- uio, nbytes, tx);
- rw_exit(&zp->z_map_lock);
+ tx_bytes = nbytes;
+ ASSERT(tx_bytes == max_blksz);
+ dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx);
+ ASSERT(tx_bytes <= uio->uio_resid);
+ uioskip(uio, tx_bytes);
+ }
+ if (tx_bytes && vn_has_cached_data(vp)) {
+ update_pages(vp, woff,
+ tx_bytes, zfsvfs->z_os, zp->z_id);
}
- tx_bytes -= uio->uio_resid;
/*
* If we made no progress, we're done. If we made even
zgd_t *zgd = (zgd_t *)vzgd;
rl_t *rl = zgd->zgd_rl;
vnode_t *vp = ZTOV(rl->r_zp);
+ objset_t *os = rl->r_zp->z_zfsvfs->z_os;
dmu_buf_rele(db, vzgd);
zfs_range_unlock(rl);
- VN_RELE(vp);
+ /*
+ * Release the vnode asynchronously as we currently have the
+ * txg stopped from syncing.
+ */
+ VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
kmem_free(zgd, sizeof (zgd_t));
}
+#ifdef DEBUG
+static int zil_fault_io = 0;
+#endif
+
/*
* Get data to generate a TX_WRITE intent log record.
*/
if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0)
return (ENOENT);
if (zp->z_unlinked) {
- VN_RELE(ZTOV(zp));
+ /*
+ * Release the vnode asynchronously as we currently have the
+ * txg stopped from syncing.
+ */
+ VN_RELE_ASYNC(ZTOV(zp),
+ dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
return (ENOENT);
}
error = ENOENT;
goto out;
}
- VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf));
+ VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf,
+ DMU_READ_NO_PREFETCH));
} else { /* indirect write */
uint64_t boff; /* block starting offset */
zgd->zgd_rl = rl;
zgd->zgd_zilog = zfsvfs->z_log;
zgd->zgd_bp = &lr->lr_blkptr;
- VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db));
+#ifdef DEBUG
+ if (zil_fault_io) {
+ error = EIO;
+ zil_fault_io = 0;
+ } else {
+ error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db);
+ }
+#else
+ error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db);
+#endif
+ if (error != 0) {
+ kmem_free(zgd, sizeof (zgd_t));
+ goto out;
+ }
+
ASSERT(boff == db->db_offset);
lr->lr_blkoff = off - boff;
error = dmu_sync(zio, db, &lr->lr_blkptr,
}
out:
zfs_range_unlock(rl);
- VN_RELE(ZTOV(zp));
+ /*
+ * Release the vnode asynchronously as we currently have the
+ * txg stopped from syncing.
+ */
+ VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
return (error);
}
}
/*
+ * If vnode is for a device return a specfs vnode instead.
+ */
+static int
+specvp_check(vnode_t **vpp, cred_t *cr)
+{
+ int error = 0;
+
+ if (IS_DEVVP(*vpp)) {
+ struct vnode *svp;
+
+ svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+ VN_RELE(*vpp);
+ if (svp == NULL)
+ error = ENOSYS;
+ *vpp = svp;
+ }
+ return (error);
+}
+
+
+/*
* Lookup an entry in a directory, or an extended attribute directory.
* If it exists, return a held vnode reference for it.
*
{
znode_t *zdp = VTOZ(dvp);
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
- int error;
+ int error = 0;
+
+ /* fast path */
+ if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
+
+ if (dvp->v_type != VDIR) {
+ return (ENOTDIR);
+ } else if (zdp->z_dbuf == NULL) {
+ return (EIO);
+ }
+
+ if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
+ error = zfs_fastaccesschk_execute(zdp, cr);
+ if (!error) {
+ *vpp = dvp;
+ VN_HOLD(*vpp);
+ return (0);
+ }
+ return (error);
+ } else {
+ vnode_t *tvp = dnlc_lookup(dvp, nm);
+
+ if (tvp) {
+ error = zfs_fastaccesschk_execute(zdp, cr);
+ if (error) {
+ VN_RELE(tvp);
+ return (error);
+ }
+ if (tvp == DNLC_NO_VNODE) {
+ VN_RELE(tvp);
+ return (ENOENT);
+ } else {
+ *vpp = tvp;
+ return (specvp_check(vpp, cr));
+ }
+ }
+ }
+ }
+
+ DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zdp);
}
error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
- if (error == 0) {
- /*
- * Convert device special files
- */
- if (IS_DEVVP(*vpp)) {
- vnode_t *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL)
- error = ENOSYS;
- else
- *vpp = svp;
- }
- }
+ if (error == 0)
+ error = specvp_check(vpp, cr);
ZFS_EXIT(zfsvfs);
return (error);
zfs_dirlock_t *dl;
dmu_tx_t *tx;
int error;
- zfs_acl_t *aclp = NULL;
- zfs_fuid_info_t *fuidp = NULL;
ksid_t *ksid;
uid_t uid;
gid_t gid = crgetgid(cr);
+ zfs_acl_ids_t acl_ids;
+ boolean_t fuid_dirtied;
/*
* If we have an ephemeral id, ACL, or XVATTR then
if (strcmp(name, "..") == 0)
error = EISDIR;
ZFS_EXIT(zfsvfs);
- if (aclp)
- zfs_acl_free(aclp);
- return (error);
- }
- }
- if (vsecp && aclp == NULL) {
- error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp);
- if (error) {
- ZFS_EXIT(zfsvfs);
- if (dl)
- zfs_dirent_unlock(dl);
return (error);
}
}
-
if (zp == NULL) {
uint64_t txtype;
goto out;
}
+ if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp,
+ &acl_ids)) != 0)
+ goto out;
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+ zfs_acl_ids_free(&acl_ids);
+ error = EDQUOT;
+ goto out;
+ }
+
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(uid) ||
- IS_EPHEMERAL(gid)) {
- if (zfsvfs->z_fuid_obj == 0) {
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
- FALSE, NULL);
- } else {
- dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
- dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- }
- }
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
dmu_tx_hold_bonus(tx, dzp->z_id);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp) {
+ if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
}
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
+ zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
}
dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
- if (aclp)
- zfs_acl_free(aclp);
return (error);
}
- zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
+
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
(void) zfs_link_create(dl, zp, tx, ZNEW);
+
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
if (flag & FIGNORECASE)
txtype |= TX_CI;
zfs_log_create(zilog, tx, txtype, dzp, zp, name,
- vsecp, fuidp, vap);
- if (fuidp)
- zfs_fuid_info_free(fuidp);
+ vsecp, acl_ids.z_fuidp, vap);
+ zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
} else {
int aflags = (flag & FAPPEND) ? V_APPEND : 0;
VN_RELE(ZTOV(zp));
} else {
*vpp = ZTOV(zp);
- /*
- * If vnode is for a device return a specfs vnode instead.
- */
- if (IS_DEVVP(*vpp)) {
- struct vnode *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL) {
- error = ENOSYS;
- }
- *vpp = svp;
- }
+ error = specvp_check(vpp, cr);
}
- if (aclp)
- zfs_acl_free(aclp);
ZFS_EXIT(zfsvfs);
return (error);
uint64_t txtype;
dmu_tx_t *tx;
int error;
- zfs_acl_t *aclp = NULL;
- zfs_fuid_info_t *fuidp = NULL;
int zf = ZNEW;
ksid_t *ksid;
uid_t uid;
gid_t gid = crgetgid(cr);
+ zfs_acl_ids_t acl_ids;
+ boolean_t fuid_dirtied;
ASSERT(vap->va_type == VDIR);
return (error);
}
- if (vsecp && aclp == NULL) {
- error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp);
- if (error) {
- zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
+ if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp,
+ &acl_ids)) != 0) {
+ zfs_dirent_unlock(dl);
+ ZFS_EXIT(zfsvfs);
+ return (error);
}
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+ zfs_acl_ids_free(&acl_ids);
+ zfs_dirent_unlock(dl);
+ ZFS_EXIT(zfsvfs);
+ return (EDQUOT);
+ }
+
/*
* Add a new entry to the directory.
*/
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
- if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(uid) ||
- IS_EPHEMERAL(gid)) {
- if (zfsvfs->z_fuid_obj == 0) {
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
- } else {
- dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
- dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- }
- }
- if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp)
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
+ if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
+ zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
}
dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
- if (aclp)
- zfs_acl_free(aclp);
return (error);
}
/*
* Create new node.
*/
- zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
-
- if (aclp)
- zfs_acl_free(aclp);
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
/*
* Now put new name in parent dir.
*/
txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
if (flags & FIGNORECASE)
txtype |= TX_CI;
- zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, fuidp, vap);
+ zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
+ acl_ids.z_fuidp, vap);
- if (fuidp)
- zfs_fuid_info_free(fuidp);
+ zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
zfs_dirent_unlock(dl);
}
}
+ if (flags & V_RDDIR_ACCFILTER) {
+ /*
+ * If we have no access at all, don't include
+ * this entry in the returned information
+ */
+ znode_t *ezp;
+ if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
+ goto skip_entry;
+ if (!zfs_has_access(ezp, cr)) {
+ VN_RELE(ZTOV(ezp));
+ goto skip_entry;
+ }
+ VN_RELE(ZTOV(ezp));
+ }
+
if (flags & V_RDDIR_ENTFLAGS)
reclen = EDIRENT_RECLEN(strlen(zap.za_name));
else
if (prefetch)
dmu_prefetch(os, objnum, 0, 0);
+ skip_entry:
/*
* Move to the next entry, fill in the previous offset.
*/
ZFS_VERIFY_ZP(zp);
pzp = zp->z_phys;
- mutex_enter(&zp->z_lock);
-
/*
* If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
* Also, if we are the owner don't bother, since owner should
(pzp->zp_uid != crgetuid(cr))) {
if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
skipaclchk, cr)) {
- mutex_exit(&zp->z_lock);
ZFS_EXIT(zfsvfs);
return (error);
}
* than to determine whether we were asked the question.
*/
+ mutex_enter(&zp->z_lock);
vap->va_type = vp->v_type;
vap->va_mode = pzp->zp_mode & MODEMASK;
zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
uint_t saved_mask;
int trim_mask = 0;
uint64_t new_mode;
+ uint64_t new_uid, new_gid;
znode_t *attrzp;
int need_policy = FALSE;
int err;
xoptattr_t *xoap;
zfs_acl_t *aclp = NULL;
boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+ boolean_t fuid_dirtied = B_FALSE;
if (mask == 0)
return (0);
top:
attrzp = NULL;
+ /* Can this be moved to before the top label? */
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
ZFS_EXIT(zfsvfs);
return (EROFS);
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
- if (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
- ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid))) {
- if (zfsvfs->z_fuid_obj == 0) {
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
- } else {
- dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
- dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- }
- }
if (mask & AT_MODE) {
uint64_t pmode = pzp->zp_mode;
new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
- if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) {
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (err);
- }
+ if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
+ goto out;
if (pzp->zp_acl.z_acl_extern_obj) {
/* Are we upgrading ACL from old V0 format to new V1 */
if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
}
}
- if ((mask & (AT_UID | AT_GID)) && pzp->zp_xattr != 0) {
- err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
- if (err) {
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- if (aclp)
- zfs_acl_free(aclp);
- return (err);
+ if (mask & (AT_UID | AT_GID)) {
+ if (pzp->zp_xattr) {
+ err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
+ if (err)
+ goto out;
+ dmu_tx_hold_bonus(tx, attrzp->z_id);
+ }
+ if (mask & AT_UID) {
+ new_uid = zfs_fuid_create(zfsvfs,
+ (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
+ if (new_uid != pzp->zp_uid &&
+ zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) {
+ err = EDQUOT;
+ goto out;
+ }
+ }
+
+ if (mask & AT_GID) {
+ new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
+ cr, ZFS_GROUP, &fuidp);
+ if (new_gid != pzp->zp_gid &&
+ zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) {
+ err = EDQUOT;
+ goto out;
+ }
+ }
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied) {
+ if (zfsvfs->z_fuid_obj == 0) {
+ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+ FUID_SIZE_ESTIMATE(zfsvfs));
+ dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
+ FALSE, NULL);
+ } else {
+ dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
+ dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
+ FUID_SIZE_ESTIMATE(zfsvfs));
+ }
}
- dmu_tx_hold_bonus(tx, attrzp->z_id);
}
err = dmu_tx_assign(tx, TXG_NOWAIT);
if (err) {
- if (attrzp)
- VN_RELE(ZTOV(attrzp));
-
- if (aclp) {
- zfs_acl_free(aclp);
- aclp = NULL;
- }
-
- if (err == ERESTART) {
+ if (err == ERESTART)
dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (err);
+ goto out;
}
dmu_buf_will_dirty(zp->z_dbuf, tx);
if (mask & AT_MODE) {
mutex_enter(&zp->z_acl_lock);
zp->z_phys->zp_mode = new_mode;
- err = zfs_aclset_common(zp, aclp, cr, &fuidp, tx);
+ err = zfs_aclset_common(zp, aclp, cr, tx);
ASSERT3U(err, ==, 0);
+ zp->z_acl_cached = aclp;
+ aclp = NULL;
mutex_exit(&zp->z_acl_lock);
}
mutex_enter(&attrzp->z_lock);
if (mask & AT_UID) {
- pzp->zp_uid = zfs_fuid_create(zfsvfs,
- vap->va_uid, cr, ZFS_OWNER, tx, &fuidp);
- if (attrzp) {
- attrzp->z_phys->zp_uid = zfs_fuid_create(zfsvfs,
- vap->va_uid, cr, ZFS_OWNER, tx, &fuidp);
- }
+ pzp->zp_uid = new_uid;
+ if (attrzp)
+ attrzp->z_phys->zp_uid = new_uid;
}
if (mask & AT_GID) {
- pzp->zp_gid = zfs_fuid_create(zfsvfs, vap->va_gid,
- cr, ZFS_GROUP, tx, &fuidp);
+ pzp->zp_gid = new_gid;
if (attrzp)
- attrzp->z_phys->zp_gid = zfs_fuid_create(zfsvfs,
- vap->va_gid, cr, ZFS_GROUP, tx, &fuidp);
+ attrzp->z_phys->zp_gid = new_gid;
}
- if (aclp)
- zfs_acl_free(aclp);
-
if (attrzp)
mutex_exit(&attrzp->z_lock);
zfs_xvattr_set(zp, xvap);
}
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
if (mask != 0)
zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
- if (fuidp)
- zfs_fuid_info_free(fuidp);
mutex_exit(&zp->z_lock);
+out:
if (attrzp)
VN_RELE(ZTOV(attrzp));
- dmu_tx_commit(tx);
+ if (aclp)
+ zfs_acl_free(aclp);
+
+ if (fuidp) {
+ zfs_fuid_info_free(fuidp);
+ fuidp = NULL;
+ }
+
+ if (err)
+ dmu_tx_abort(tx);
+ else
+ dmu_tx_commit(tx);
+
+ if (err == ERESTART)
+ goto top;
ZFS_EXIT(zfsvfs);
return (err);
int len = strlen(link);
int error;
int zflg = ZNEW;
- zfs_fuid_info_t *fuidp = NULL;
+ zfs_acl_ids_t acl_ids;
+ boolean_t fuid_dirtied;
ASSERT(vap->va_type == VLNK);
return (error);
}
+ VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids));
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+ zfs_acl_ids_free(&acl_ids);
+ zfs_dirent_unlock(dl);
+ ZFS_EXIT(zfsvfs);
+ return (EDQUOT);
+ }
tx = dmu_tx_create(zfsvfs->z_os);
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
dmu_tx_hold_bonus(tx, dzp->z_id);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)
+ if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
- if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
- if (zfsvfs->z_fuid_obj == 0) {
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
- } else {
- dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
- dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
- FUID_SIZE_ESTIMATE(zfsvfs));
- }
- }
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
+ zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
* otherwise, store it just like any other file data.
*/
if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) {
- zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, NULL, &fuidp);
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids);
if (len != 0)
bcopy(link, zp->z_phys + 1, len);
} else {
dmu_buf_t *dbp;
- zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, NULL, &fuidp);
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
+
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
/*
* Nothing can access the znode yet so no locking needed
* for growing the znode's blocksize.
* Insert the new object into the directory.
*/
(void) zfs_link_create(dl, zp, tx, ZNEW);
-out:
if (error == 0) {
uint64_t txtype = TX_SYMLINK;
if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
}
- if (fuidp)
- zfs_fuid_info_free(fuidp);
+
+ zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
dmu_tx_t *tx;
- rl_t *rl;
u_offset_t off, koff;
size_t len, klen;
uint64_t filesz;
* a read-modify-write).
*/
if (off < filesz && zp->z_blksz > PAGESIZE) {
- if (!ISP2(zp->z_blksz)) {
- /* Only one block in the file. */
- klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
- koff = 0;
- } else {
- klen = zp->z_blksz;
- koff = P2ALIGN(off, (u_offset_t)klen);
- }
+ klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
+ koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
ASSERT(koff <= filesz);
if (koff + klen > filesz)
klen = P2ROUNDUP(filesz - koff, (uint64_t)PAGESIZE);
pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
}
ASSERT3U(btop(len), ==, btopr(len));
-top:
- rl = zfs_range_lock(zp, off, len, RL_WRITER);
+
/*
* Can't push pages past end-of-file.
*/
- filesz = zp->z_phys->zp_size;
if (off >= filesz) {
/* ignore all pages */
err = 0;
len = filesz - off;
}
+ if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) ||
+ zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) {
+ err = EDQUOT;
+ goto out;
+ }
+top:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_write(tx, zp->z_id, off, len);
dmu_tx_hold_bonus(tx, zp->z_id);
err = dmu_tx_assign(tx, TXG_NOWAIT);
if (err != 0) {
if (err == ERESTART) {
- zfs_range_unlock(rl);
dmu_tx_wait(tx);
dmu_tx_abort(tx);
- err = 0;
goto top;
}
dmu_tx_abort(tx);
if (err == 0) {
zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
- zfs_log_write(zilog, tx, TX_WRITE, zp, off, len, 0);
- dmu_tx_commit(tx);
+ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
}
+ dmu_tx_commit(tx);
out:
- zfs_range_unlock(rl);
pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
if (offp)
*offp = off;
page_t *pp;
size_t io_len;
u_offset_t io_off;
- uint64_t filesz;
+ uint_t blksz;
+ rl_t *rl;
int error = 0;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- if (len == 0) {
+ /*
+ * Align this request to the file block size in case we kluster.
+ * XXX - this can result in pretty aggresive locking, which can
+ * impact simultanious read/write access. One option might be
+ * to break up long requests (len == 0) into block-by-block
+ * operations to get narrower locking.
+ */
+ blksz = zp->z_blksz;
+ if (ISP2(blksz))
+ io_off = P2ALIGN_TYPED(off, blksz, u_offset_t);
+ else
+ io_off = 0;
+ if (len > 0 && ISP2(blksz))
+ io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
+ else
+ io_len = 0;
+
+ if (io_len == 0) {
/*
- * Search the entire vp list for pages >= off.
+ * Search the entire vp list for pages >= io_off.
*/
- error = pvn_vplist_dirty(vp, (u_offset_t)off, zfs_putapage,
- flags, cr);
+ rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER);
+ error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr);
goto out;
}
+ rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
- filesz = zp->z_phys->zp_size; /* get consistent copy of zp_size */
- if (off > filesz) {
+ if (off > zp->z_phys->zp_size) {
/* past end of file */
+ zfs_range_unlock(rl);
ZFS_EXIT(zfsvfs);
return (0);
}
- len = MIN(len, filesz - off);
+ len = MIN(io_len, P2ROUNDUP(zp->z_phys->zp_size, PAGESIZE) - io_off);
- for (io_off = off; io_off < off + len; io_off += io_len) {
+ for (off = io_off; io_off < off + len; io_off += io_len) {
if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
pp = page_lookup(vp, io_off,
(flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED);
}
}
out:
+ zfs_range_unlock(rl);
if ((flags & B_ASYNC) == 0)
zil_commit(zfsvfs->z_log, UINT64_MAX, zp->z_id);
ZFS_EXIT(zfsvfs);
/*
* If we can't find a page in the cache, we will create a new page
* and fill it with file data. For efficiency, we may try to fill
- * multiple pages at once (klustering).
+ * multiple pages at once (klustering) to fill up the supplied page
+ * list. Note that the pages to be filled are held with an exclusive
+ * lock to prevent access by other threads while they are being filled.
*/
static int
zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
znode_t *zp = VTOZ(vp);
page_t *pp, *cur_pp;
objset_t *os = zp->z_zfsvfs->z_os;
- caddr_t va;
u_offset_t io_off, total;
- uint64_t oid = zp->z_id;
size_t io_len;
- uint64_t filesz;
int err;
- /*
- * If we are only asking for a single page don't bother klustering.
- */
- filesz = zp->z_phys->zp_size; /* get consistent copy of zp_size */
- if (off >= filesz)
- return (EFAULT);
if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) {
+ /*
+ * We only have a single page, don't bother klustering
+ */
io_off = off;
io_len = PAGESIZE;
- pp = page_create_va(vp, io_off, io_len, PG_WAIT, seg, addr);
+ pp = page_create_va(vp, io_off, io_len,
+ PG_EXCL | PG_WAIT, seg, addr);
} else {
/*
- * Try to fill a kluster of pages (a blocks worth).
+ * Try to find enough pages to fill the page list
*/
- size_t klen;
- u_offset_t koff;
-
- if (!ISP2(zp->z_blksz)) {
- /* Only one block in the file. */
- klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
- koff = 0;
- } else {
- /*
- * It would be ideal to align our offset to the
- * blocksize but doing so has resulted in some
- * strange application crashes. For now, we
- * leave the offset as is and only adjust the
- * length if we are off the end of the file.
- */
- koff = off;
- klen = plsz;
- }
- ASSERT(koff <= filesz);
- if (koff + klen > filesz)
- klen = P2ROUNDUP(filesz, (uint64_t)PAGESIZE) - koff;
- ASSERT3U(off, >=, koff);
- ASSERT3U(off, <, koff + klen);
pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
- &io_len, koff, klen, 0);
+ &io_len, off, plsz, 0);
}
if (pp == NULL) {
/*
- * Some other thread entered the page before us.
- * Return to zfs_getpage to retry the lookup.
+ * The page already exists, nothing to do here.
*/
*pl = NULL;
return (0);
*/
cur_pp = pp;
for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
+ caddr_t va;
+
ASSERT3U(io_off, ==, cur_pp->p_offset);
va = zfs_map_page(cur_pp, S_WRITE);
- err = dmu_read(os, oid, io_off, PAGESIZE, va);
+ err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
+ DMU_READ_PREFETCH);
zfs_unmap_page(cur_pp, va);
if (err) {
/* On error, toss the entire kluster */
}
cur_pp = cur_pp->p_next;
}
-out:
+
/*
- * Fill in the page list array from the kluster. If
- * there are too many pages in the kluster, return
- * as many pages as possible starting from the desired
- * offset `off'.
+ * Fill in the page list array from the kluster starting
+ * from the desired offset `off'.
* NOTE: the page list will always be null terminated.
*/
pvn_plist_init(pp, pl, plsz, off, io_len, rw);
+ ASSERT(pl == NULL || (*pl)->p_offset == off);
return (0);
}
/*
* Return pointers to the pages for the file region [off, off + len]
* in the pl array. If plsz is greater than len, this function may
- * also return page pointers from before or after the specified
- * region (i.e. some region [off', off' + plsz]). These additional
- * pages are only returned if they are already in the cache, or were
- * created as part of a klustered read.
+ * also return page pointers from after the specified region
+ * (i.e. the region [off, off + plsz]). These additional pages are
+ * only returned if they are already in the cache, or were created as
+ * part of a klustered read.
*
* IN: vp - vnode of file to get data from.
* off - position in file to get data from.
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- page_t *pp, **pl0 = pl;
- int need_unlock = 0, err = 0;
- offset_t orig_off;
+ page_t **pl0 = pl;
+ int err = 0;
+
+ /* we do our own caching, faultahead is unnecessary */
+ if (pl == NULL)
+ return (0);
+ else if (len > plsz)
+ len = plsz;
+ else
+ len = P2ROUNDUP(len, PAGESIZE);
+ ASSERT(plsz >= len);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if (protp)
*protp = PROT_ALL;
- /* no faultahead (for now) */
- if (pl == NULL) {
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- /* can't fault past EOF */
- if (off >= zp->z_phys->zp_size) {
- ZFS_EXIT(zfsvfs);
- return (EFAULT);
- }
- orig_off = off;
-
/*
- * If we already own the lock, then we must be page faulting
- * in the middle of a write to this file (i.e., we are writing
- * to this file using data from a mapped region of the file).
- */
- if (rw_owner(&zp->z_map_lock) != curthread) {
- rw_enter(&zp->z_map_lock, RW_WRITER);
- need_unlock = TRUE;
- }
-
- /*
- * Loop through the requested range [off, off + len] looking
+ * Loop through the requested range [off, off + len) looking
* for pages. If we don't find a page, we will need to create
* a new page and fill it with data from the file.
*/
while (len > 0) {
- if (plsz < PAGESIZE)
- break;
- if (pp = page_lookup(vp, off, SE_SHARED)) {
- *pl++ = pp;
+ if (*pl = page_lookup(vp, off, SE_SHARED))
+ *(pl+1) = NULL;
+ else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw))
+ goto out;
+ while (*pl) {
+ ASSERT3U((*pl)->p_offset, ==, off);
off += PAGESIZE;
addr += PAGESIZE;
- len -= PAGESIZE;
- plsz -= PAGESIZE;
- } else {
- err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw);
- if (err)
- goto out;
- /*
- * klustering may have changed our region
- * to be block aligned.
- */
- if (((pp = *pl) != 0) && (off != pp->p_offset)) {
- int delta = off - pp->p_offset;
- len += delta;
- off -= delta;
- addr -= delta;
- }
- while (*pl) {
- pl++;
- off += PAGESIZE;
- addr += PAGESIZE;
- plsz -= PAGESIZE;
- if (len > PAGESIZE)
- len -= PAGESIZE;
- else
- len = 0;
+ if (len > 0) {
+ ASSERT3U(len, >=, PAGESIZE);
+ len -= PAGESIZE;
}
+ ASSERT3U(plsz, >=, PAGESIZE);
+ plsz -= PAGESIZE;
+ pl++;
}
}
/*
* Fill out the page array with any pages already in the cache.
*/
- while (plsz > 0) {
- pp = page_lookup_nowait(vp, off, SE_SHARED);
- if (pp == NULL)
- break;
- *pl++ = pp;
- off += PAGESIZE;
- plsz -= PAGESIZE;
+ while (plsz > 0 &&
+ (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) {
+ off += PAGESIZE;
+ plsz -= PAGESIZE;
}
-
- ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
out:
- /*
- * We can't grab the range lock for the page as reader which would
- * stop truncation as this leads to deadlock. So we need to recheck
- * the file size.
- */
- if (orig_off >= zp->z_phys->zp_size)
- err = EFAULT;
if (err) {
/*
* Release any pages we have previously locked.
*/
while (pl > pl0)
page_unlock(*--pl);
+ } else {
+ ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
}
*pl = NULL;
- if (need_unlock)
- rw_exit(&zp->z_map_lock);
-
ZFS_EXIT(zfsvfs);
return (err);
}
(vp->v_type == VREG || vp->v_type == VDIR);
return (0);
+ case _PC_ACCESS_FILTERING:
+ *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
+ vp->v_type == VDIR;
+ return (0);
+
case _PC_ACL_ENABLED:
*valp = _ACL_ACE_ENABLED;
return (0);
};
/*
+ * special share hidden files vnode operations template
+ */
+vnodeops_t *zfs_sharevnodeops;
+const fs_operation_def_t zfs_sharevnodeops_template[] = {
+ VOPNAME_GETATTR, { .vop_getattr = zfs_getattr },
+ VOPNAME_ACCESS, { .vop_access = zfs_access },
+ VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive },
+ VOPNAME_FID, { .vop_fid = zfs_fid },
+ VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf },
+ VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr },
+ VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
+ NULL, NULL
+};
+
+/*
* Extended attribute directory vnode operations template
* This template is identical to the directory vnodes
* operation template except for restricted operations: