int
zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
{
-#ifdef HAVE_SHARE
+#ifdef HAVE_SMB_SHARE
zfs_acl_ids_t acl_ids;
vattr_t vattr;
znode_t *sharezp;
vattr.va_uid = crgetuid(kcred);
vattr.va_gid = crgetgid(kcred);
- sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
sharezp->z_moved = 0;
sharezp->z_unlinked = 0;
sharezp->z_atime_dirty = 0;
return (error);
#else
return (0);
-#endif /* HAVE_SHARE */
-}
-
-/*
- * define a couple of values we need available
- * for both 64 and 32 bit environments.
- */
-#ifndef NBITSMINOR64
-#define NBITSMINOR64 32
-#endif
-#ifndef MAXMAJ64
-#define MAXMAJ64 0xffffffffUL
-#endif
-#ifndef MAXMIN64
-#define MAXMIN64 0xffffffffUL
-#endif
-
-/*
- * Create special expldev for ZFS private use.
- * Can't use standard expldev since it doesn't do
- * what we want. The standard expldev() takes a
- * dev32_t in LP64 and expands it to a long dev_t.
- * We need an interface that takes a dev32_t in ILP32
- * and expands it to a long dev_t.
- */
-static uint64_t
-zfs_expldev(dev_t dev)
-{
-#ifndef _LP64
- major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
- return (((uint64_t)major << NBITSMINOR64) |
- ((minor_t)dev & MAXMIN32));
-#else
- return (dev);
-#endif
+#endif /* HAVE_SMB_SHARE */
}
static void
{
znode_t *zp;
- zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
*ip = ZTOI(zp);
return (0);
static void
zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
{
- uint64_t rdev;
+ uint64_t rdev = 0;
switch (ip->i_mode & S_IFMT) {
case S_IFREG:
ip->i_op = &zpl_symlink_inode_operations;
break;
+ /*
+ * rdev is only stored in a SA only for device files.
+ */
case S_IFCHR:
case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb),
&rdev, sizeof (rdev)) == 0);
+ /*FALLTHROUGH*/
+ case S_IFIFO:
+ case S_IFSOCK:
init_special_inode(ip, ip->i_mode, rdev);
ip->i_op = &zpl_special_inode_operations;
break;
*/
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
- dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
+ dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
+ struct dentry *dentry, struct inode *dip)
{
znode_t *zp;
struct inode *ip;
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
+ zp->z_is_zvol = 0;
zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
}
ip->i_ino = obj;
- ip->i_mode = zp->z_mode;
- ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC;
+ zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);
if (insert_inode_locked(ip))
goto error;
+ if (dentry) {
+ if (zpl_xattr_security_init(ip, dip, &dentry->d_name))
+ goto error;
+
+ d_instantiate(dentry, ip);
+ }
+
mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
membar_producer();
size = links = 0;
}
- if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))
- rdev = zfs_expldev(vap->va_rdev);
+ if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
+ rdev = vap->va_rdev;
parent = dzp->z_id;
mode = acl_ids->z_mode;
&empty_xattr, 8);
}
if (obj_type == DMU_OT_ZNODE ||
- (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) {
+ (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
NULL, &rdev, 8);
}
VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
if (!(flag & IS_ROOT_NODE)) {
- *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
+ *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
+ vap->va_dentry, ZTOI(dzp));
ASSERT(*zpp != NULL);
ASSERT(dzp != NULL);
- err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp));
- ASSERT3S(err, ==, 0);
} else {
/*
* If we are creating the root node, the "parent" we
ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
+/*
+ * zfs_xvattr_set only updates the in-core attributes
+ * it is assumed the caller will be doing an sa_bulk_update
+ * to push the changes out
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+ xoptattr_t *xoap;
+
+ xoap = xva_getxoptattr(xvap);
+ ASSERT(xoap);
+
+ if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+ uint64_t times[2];
+ ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+ ×, sizeof (times), tx);
+ XVA_SET_RTN(xvap, XAT_CREATETIME);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_READONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+ ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_HIDDEN);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+ ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SYSTEM);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+ ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_ARCHIVE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+ ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+ ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NOUNLINK);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_APPENDONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+ ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NODUMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+ ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OPAQUE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+ xoap->xoa_av_quarantined, zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+ zfs_sa_set_scanstamp(zp, xvap, tx);
+ XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_REPARSE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+ ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OFFLINE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SPARSE);
+ }
+}
+
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
znode_t *zp;
int err;
sa_handle_t *hdl;
+ struct inode *ip;
*zpp = NULL;
+again:
+ ip = ilookup(zsb->z_sb, obj_num);
+
ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (EINVAL);
}
hdl = dmu_buf_get_user(db);
if (hdl != NULL) {
- zp = sa_get_userdata(hdl);
+ if (ip == NULL) {
+ /*
+ * ilookup returned NULL, which means
+ * the znode is dying - but the SA handle isn't
+ * quite dead yet, we need to drop any locks
+ * we're holding, re-schedule the task and try again.
+ */
+ sa_buf_rele(db, NULL);
+ ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+
+ schedule();
+ goto again;
+ }
+ zp = sa_get_userdata(hdl);
/*
* Since "SA" does immediate eviction we
sa_buf_rele(db, NULL);
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
+ ASSERT3P(ip, ==, NULL);
+
/*
* Not found create new znode/vnode but only if file exists.
*
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
- doi.doi_bonus_type, obj_num, NULL);
+ doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
if (zp == NULL) {
err = ENOENT;
} else {
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
+ boolean_t drop_mutex = 0;
ASSERT(zp->z_sa_hdl);
/*
- * Don't allow a zfs_zget() while were trying to release this znode
+ * Don't allow a zfs_zget() while were trying to release this znode.
+ *
+ * Linux allows direct memory reclaim which means that any KM_SLEEP
+ * allocation may trigger inode eviction. This can lead to a deadlock
+ * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
+ * zfs_zinactive() call path. To avoid this deadlock the process
+ * must not reacquire the mutex when it is already holding it.
*/
- ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
+ ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ drop_mutex = 1;
+ }
+
mutex_enter(&zp->z_lock);
/*
*/
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
zfs_rmnode(zp);
return;
}
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
}
void
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
}
-#ifdef HAVE_MMAP
-/*
- * This is a dummy interface used when pvn_vplist_dirty() should *not*
- * be calling back into the fs for a putpage(). E.g.: when truncating
- * a file, the pages being "thrown away* don't need to be written out.
- */
-/* ARGSUSED */
-static int
-zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
- int flags, cred_t *cr)
-{
- ASSERT(0);
- return (0);
-}
-#endif /* HAVE_MMAP */
-
/*
* Increase the file length
*
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
-#ifdef HAVE_MANDLOCKS
struct inode *ip = ZTOI(zp);
-#endif /* HAVE_MANDLOCKS */
dmu_tx_t *tx;
zfs_sb_t *zsb = ZTOZSB(zp);
zilog_t *zilog = zsb->z_log;
return (error);
}
-#ifdef HAVE_MANDLOCKS
/*
* Check for any locks in the region to be freed.
*/
-
- if (MANDLOCK(ip, (mode_t)mode)) {
+ if (ip->i_flock && mandatory_lock(ip)) {
uint64_t length = (len ? len : zp->z_size - off);
- if (error = chklock(ip, FWRITE, off, length, flag, NULL))
- return (error);
+ if (!lock_may_write(ip, off, length))
+ return (EAGAIN);
}
-#endif /* HAVE_MANDLOCKS */
if (len == 0) {
error = zfs_trunc(zp, off);
void
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
{
+ struct super_block *sb;
+ zfs_sb_t *zsb;
uint64_t moid, obj, sa_obj, version;
+ uint64_t sense = ZFS_CASE_SENSITIVE;
uint64_t norm = 0;
nvpair_t *elem;
int error;
- timestruc_t now;
- dmu_buf_t *db;
- znode_phys_t *pzp;
+ int i;
+ znode_t *rootzp = NULL;
+ vattr_t vattr;
+ znode_t *zp;
+ zfs_acl_ids_t acl_ids;
/*
* First attempt to create master node.
ASSERT(error == 0);
if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
norm = val;
+ else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
+ sense = val;
}
ASSERT(version != 0);
error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
ASSERT(error == 0);
/*
- * Create root znode with code free of VFS dependencies. This
- * is important because without a registered filesystem and super
- * block all the required VFS hooks will be missing. The critical
- * thing is to just crete the required root znode.
+ * Create root znode. Create minimal znode/inode/zsb/sb
+ * to allow zfs_mknode to work.
*/
- obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS,
- DMU_OT_ZNODE, sizeof (znode_phys_t), tx);
+ vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
+ vattr.va_mode = S_IFDIR|0755;
+ vattr.va_uid = crgetuid(cr);
+ vattr.va_gid = crgetgid(cr);
+
+ rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ rootzp->z_moved = 0;
+ rootzp->z_unlinked = 0;
+ rootzp->z_atime_dirty = 0;
+ rootzp->z_is_sa = USE_SA(version, os);
+
+ zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
+ zsb->z_os = os;
+ zsb->z_parent = zsb;
+ zsb->z_version = version;
+ zsb->z_use_fuids = USE_FUIDS(version, os);
+ zsb->z_use_sa = USE_SA(version, os);
+ zsb->z_norm = norm;
+
+ sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
+ sb->s_fs_info = zsb;
- VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
+ ZTOI(rootzp)->i_sb = sb;
+
+ error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+ &zsb->z_attr_table);
+
+ ASSERT(error == 0);
/*
- * Initialize the znode physical data to zero.
+ * Fold case on file systems that are always or sometimes case
+ * insensitive.
*/
- ASSERT(db->db_size >= sizeof (znode_phys_t));
- bzero(db->db_data, db->db_size);
- pzp = db->db_data;
+ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
+ zsb->z_norm |= U8_TEXTPREP_TOUPPER;
- if (USE_FUIDS(version, os))
- pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+ mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&zsb->z_all_znodes, sizeof (znode_t),
+ offsetof(znode_t, z_link_node));
- pzp->zp_size = 2; /* "." and ".." */
- pzp->zp_links = 2;
- pzp->zp_parent = obj;
- pzp->zp_gen = dmu_tx_get_txg(tx);
- pzp->zp_mode = S_IFDIR | 0755;
- pzp->zp_flags = ZFS_ACL_TRIVIAL;
+ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+ mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
- gethrestime(&now);
+ VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
+ cr, NULL, &acl_ids));
+ zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
+ ASSERT3P(zp, ==, rootzp);
+ error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
+ ASSERT(error == 0);
+ zfs_acl_ids_free(&acl_ids);
- ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
- ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
- ZFS_TIME_ENCODE(&now, pzp->zp_atime);
- ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
+ atomic_set(&ZTOI(rootzp)->i_count, 0);
+ sa_handle_destroy(rootzp->z_sa_hdl);
+ kmem_free(sb, sizeof (struct super_block));
+ kmem_free(zsb, sizeof (zfs_sb_t));
+ kmem_cache_free(znode_cache, rootzp);
+
+ /*
+ * Create shares directory
+ */
+
+ error = zfs_create_share_dir(zsb, tx);
- error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx);
ASSERT(error == 0);
- dmu_buf_rele(db, FTAG);
+ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+ mutex_destroy(&zsb->z_hold_mtx[i]);
}
-
#endif /* _KERNEL */
static int