X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_znode.c;h=aaf17e18f4951969b932acb7989d75b7fc33552d;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=076161a8576b8e85aef0efffb69b137db2950a8e;hpb=5c03efc379693f992ebe39c6a00c7297c4a304ea;p=zfs.git diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 076161a..aaf17e1 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ @@ -52,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +108,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zp->z_range_avl, zfs_range_compare, @@ -113,6 +116,8 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; + zp->z_xattr_cached = NULL; + zp->z_xattr_parent = NULL; zp->z_moved = 0; return (0); } @@ -128,11 +133,14 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_parent_lock); rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); + rw_destroy(&zp->z_xattr_lock); avl_destroy(&zp->z_range_avl); mutex_destroy(&zp->z_range_lock); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_acl_cached == NULL); + ASSERT(zp->z_xattr_cached == NULL); + ASSERT(zp->z_xattr_parent == NULL); } void @@ -161,7 +169,7 @@ zfs_znode_fini(void) int zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx) { -#ifdef HAVE_SHARE +#ifdef HAVE_SMB_SHARE zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; @@ -203,7 +211,7 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx) return (error); #else return (0); -#endif /* HAVE_SHARE */ +#endif /* HAVE_SMB_SHARE */ } static void @@ -263,8 +271,14 @@ zfs_inode_destroy(struct inode *ip) znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ZTOZSB(zp); + if (zfsctl_is_node(ip)) + zfsctl_inode_destroy(ip); + mutex_enter(&zsb->z_znodes_lock); - list_remove(&zsb->z_all_znodes, zp); + if (list_link_active(&zp->z_link_node)) { + list_remove(&zsb->z_all_znodes, zp); + zsb->z_nr_znodes--; + } mutex_exit(&zsb->z_znodes_lock); if (zp->z_acl_cached) { @@ -272,6 +286,16 @@ zfs_inode_destroy(struct inode *ip) zp->z_acl_cached = NULL; } + if (zp->z_xattr_cached) { + nvlist_free(zp->z_xattr_cached); + zp->z_xattr_cached = NULL; + } + + if (zp->z_xattr_parent) { + iput(ZTOI(zp->z_xattr_parent)); + zp->z_xattr_parent = NULL; + } + kmem_cache_free(znode_cache, zp); } @@ -327,7 +351,7 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) static znode_t * zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl, - struct dentry *dentry, struct inode *dip) + struct inode *dip) { znode_t *zp; struct inode *ip; @@ -343,6 +367,9 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zp = ITOZ(ip); ASSERT(zp->z_dirlocks == NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT3P(zp->z_xattr_parent, ==, NULL); zp->z_moved = 0; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; @@ -352,7 +379,10 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_is_zvol = 0; + zp->z_is_zvol = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_FALSE; + zp->z_is_stale = B_FALSE; zfs_znode_sa_init(zsb, zp, db, obj_type, hdl); @@ -376,22 +406,31 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, goto error; } + /* + * xattr znodes hold a reference on their unique parent + */ + if (dip && zp->z_pflags & ZFS_XATTR) { + igrab(dip); + zp->z_xattr_parent = ITOZ(dip); + } + ip->i_ino = obj; zfs_inode_update(zp); zfs_inode_set_ops(zsb, ip); - if (insert_inode_locked(ip)) - goto error; - - if (dentry) { - if (zpl_xattr_security_init(ip, dip, &dentry->d_name)) - goto error; - - d_instantiate(dentry, ip); - } + /* + * The only way insert_inode_locked() can fail is if the ip->i_ino + * number is already hashed for this super block. This can never + * happen because the inode numbers map 1:1 with the object numbers. + * + * The one exception is rolling back a mounted file system, but in + * this case all the active inode are unhashed during the rollback. + */ + VERIFY3S(insert_inode_locked(ip), ==, 0); mutex_enter(&zsb->z_znodes_lock); list_insert_tail(&zsb->z_all_znodes, zp); + zsb->z_nr_znodes++; membar_producer(); mutex_exit(&zsb->z_znodes_lock); @@ -423,15 +462,19 @@ zfs_inode_update(znode_t *zp) zsb = ZTOZSB(zp); ip = ZTOI(zp); + /* Skip .zfs control nodes which do not exist on disk. */ + if (zfsctl_is_node(ip)) + return; + sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16); sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16); sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16); spin_lock(&ip->i_lock); ip->i_generation = zp->z_gen; - ip->i_uid = zp->z_uid; - ip->i_gid = zp->z_gid; - ip->i_nlink = zp->z_links; + ip->i_uid = SUID_TO_KUID(zp->z_uid); + ip->i_gid = SGID_TO_KGID(zp->z_gid); + set_nlink(ip, zp->z_links); ip->i_mode = zp->z_mode; ip->i_blkbits = SPA_MINBLOCKSHIFT; dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, @@ -513,7 +556,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, err = zap_create_claim_norm(zsb->z_os, obj, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); - ASSERT3U(err, ==, 0); + ASSERT0(err); } else { obj = zap_create_norm(zsb->z_os, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, @@ -524,7 +567,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, err = dmu_object_claim(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); - ASSERT3U(err, ==, 0); + ASSERT0(err); } else { obj = dmu_object_alloc(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, @@ -604,7 +647,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ - sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP); + sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), @@ -685,9 +728,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl, - vap->va_dentry, ZTOI(dzp)); - ASSERT(*zpp != NULL); - ASSERT(dzp != NULL); + ZTOI(dzp)); + VERIFY(*zpp != NULL); + VERIFY(dzp != NULL); } else { /* * If we are creating the root node, the "parent" we @@ -704,7 +747,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); - ASSERT3S(err, ==, 0); + ASSERT0(err); } kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zsb, obj); @@ -896,7 +939,7 @@ again: * bonus buffer. */ zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, - doi.doi_bonus_type, obj_num, NULL, NULL, NULL); + doi.doi_bonus_type, obj_num, NULL, NULL); if (zp == NULL) { err = ENOENT; } else { @@ -926,8 +969,20 @@ zfs_rezget(znode_t *zp) zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } - mutex_exit(&zp->z_acl_lock); + + rw_enter(&zp->z_xattr_lock, RW_WRITER); + if (zp->z_xattr_cached) { + nvlist_free(zp->z_xattr_cached); + zp->z_xattr_cached = NULL; + } + + if (zp->z_xattr_parent) { + iput(ZTOI(zp->z_xattr_parent)); + zp->z_xattr_parent = NULL; + } + rw_exit(&zp->z_xattr_lock); + ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { @@ -981,6 +1036,7 @@ zfs_rezget(znode_t *zp) zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; + zfs_inode_update(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); @@ -1115,7 +1171,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) if (error == ENOTSUP) return; - ASSERT3U(error, ==, 0); + ASSERT0(error); /* What blocksize did we actually get? */ dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); @@ -1324,9 +1380,7 @@ top: int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { -#ifdef HAVE_MANDLOCKS struct inode *ip = ZTOI(zp); -#endif /* HAVE_MANDLOCKS */ dmu_tx_t *tx; zfs_sb_t *zsb = ZTOZSB(zp); zilog_t *zilog = zsb->z_log; @@ -1348,17 +1402,14 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) return (error); } -#ifdef HAVE_MANDLOCKS /* * Check for any locks in the region to be freed. */ - - if (MANDLOCK(ip, (mode_t)mode)) { + if (ip->i_flock && mandatory_lock(ip)) { uint64_t length = (len ? len : zp->z_size - off); - if (error = chklock(ip, FWRITE, off, length, flag, NULL)) - return (error); + if (!lock_may_write(ip, off, length)) + return (EAGAIN); } -#endif /* HAVE_MANDLOCKS */ if (len == 0) { error = zfs_trunc(zp, off); @@ -1402,13 +1453,18 @@ log: void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { + struct super_block *sb; + zfs_sb_t *zsb; uint64_t moid, obj, sa_obj, version; + uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; - timestruc_t now; - dmu_buf_t *db; - znode_phys_t *pzp; + int i; + znode_t *rootzp = NULL; + vattr_t vattr; + znode_t *zp; + zfs_acl_ids_t acl_ids; /* * First attempt to create master node. @@ -1444,6 +1500,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; + else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) + sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); @@ -1469,47 +1527,76 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ASSERT(error == 0); /* - * Create root znode with code free of VFS dependencies. This - * is important because without a registered filesystem and super - * block all the required VFS hooks will be missing. The critical - * thing is to just crete the required root znode. + * Create root znode. Create minimal znode/inode/zsb/sb + * to allow zfs_mknode to work. */ - obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, - DMU_OT_ZNODE, sizeof (znode_phys_t), tx); + vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID; + vattr.va_mode = S_IFDIR|0755; + vattr.va_uid = crgetuid(cr); + vattr.va_gid = crgetgid(cr); + + rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE); + rootzp->z_moved = 0; + rootzp->z_unlinked = 0; + rootzp->z_atime_dirty = 0; + rootzp->z_is_sa = USE_SA(version, os); + + zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG); + zsb->z_os = os; + zsb->z_parent = zsb; + zsb->z_version = version; + zsb->z_use_fuids = USE_FUIDS(version, os); + zsb->z_use_sa = USE_SA(version, os); + zsb->z_norm = norm; - VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db)); - dmu_buf_will_dirty(db, tx); + sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE); + sb->s_fs_info = zsb; + + ZTOI(rootzp)->i_sb = sb; + + error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, + &zsb->z_attr_table); + + ASSERT(error == 0); /* - * Initialize the znode physical data to zero. + * Fold case on file systems that are always or sometimes case + * insensitive. */ - ASSERT(db->db_size >= sizeof (znode_phys_t)); - bzero(db->db_data, db->db_size); - pzp = db->db_data; + if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) + zsb->z_norm |= U8_TEXTPREP_TOUPPER; - if (USE_FUIDS(version, os)) - pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; + mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zsb->z_all_znodes, sizeof (znode_t), + offsetof(znode_t, z_link_node)); - pzp->zp_size = 2; /* "." and ".." */ - pzp->zp_links = 2; - pzp->zp_parent = obj; - pzp->zp_gen = dmu_tx_get_txg(tx); - pzp->zp_mode = S_IFDIR | 0755; - pzp->zp_flags = ZFS_ACL_TRIVIAL; + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); - gethrestime(&now); + VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, + cr, NULL, &acl_ids)); + zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); + ASSERT3P(zp, ==, rootzp); + error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); + ASSERT(error == 0); + zfs_acl_ids_free(&acl_ids); - ZFS_TIME_ENCODE(&now, pzp->zp_crtime); - ZFS_TIME_ENCODE(&now, pzp->zp_ctime); - ZFS_TIME_ENCODE(&now, pzp->zp_atime); - ZFS_TIME_ENCODE(&now, pzp->zp_mtime); + atomic_set(&ZTOI(rootzp)->i_count, 0); + sa_handle_destroy(rootzp->z_sa_hdl); + kmem_cache_free(znode_cache, rootzp); - error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx); + /* + * Create shares directory + */ + error = zfs_create_share_dir(zsb, tx); ASSERT(error == 0); - dmu_buf_rele(db, FTAG); -} + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zsb->z_hold_mtx[i]); + kmem_free(sb, sizeof (struct super_block)); + kmem_free(zsb, sizeof (zfs_sb_t)); +} #endif /* _KERNEL */ static int @@ -1528,12 +1615,12 @@ zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db) + dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; - if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0) + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); @@ -1541,13 +1628,13 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, doi.doi_bonus_type != DMU_OT_ZNODE) || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (ENOTSUP); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (error); } @@ -1555,10 +1642,10 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, } void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db) +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); - sa_buf_rele(db, FTAG); + sa_buf_rele(db, tag); } /* @@ -1635,7 +1722,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, int is_xattrdir; if (prevdb) - zfs_release_sa_handle(prevhdl, prevdb); + zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) @@ -1667,7 +1754,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, prevhdl = sa_hdl; prevdb = sa_db; } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db); + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; @@ -1677,7 +1764,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); - zfs_release_sa_handle(sa_hdl, sa_db); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) @@ -1698,13 +1785,13 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } @@ -1724,19 +1811,19 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); }