X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=module%2Fzfs%2Fzfs_znode.c;h=72a077d0804f2568585df52ffd2b23a95e3c3e71;hb=10c6047ea56445a87d8ce1397ac2d2caa97719ca;hp=24bd3ddcdd8275a3e363a7f7c578c1939dc27e59;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 24bd3dd..72a077d 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_comutil.h" @@ -81,9 +82,6 @@ #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ -#define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) -#define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) - /* * Functions needed for userland (ie: libzpool) are not put under * #ifdef_KERNEL; the rest of the functions have dependencies @@ -99,17 +97,6 @@ krwlock_t zfsvfs_lock; static kmem_cache_t *znode_cache = NULL; /*ARGSUSED*/ -static void -znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) -{ - /* - * We should never drop all dbuf refs without first clearing - * the eviction callback. - */ - panic("evicting znode %p\n", user_ptr); -} - -/*ARGSUSED*/ static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { @@ -136,6 +123,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; + zp->z_moved = 0; return (0); } @@ -160,184 +148,6 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT(zp->z_acl_cached == NULL); } -#ifdef ZNODE_STATS -static struct { - uint64_t zms_zfsvfs_invalid; - uint64_t zms_zfsvfs_recheck1; - uint64_t zms_zfsvfs_unmounted; - uint64_t zms_zfsvfs_recheck2; - uint64_t zms_obj_held; - uint64_t zms_vnode_locked; - uint64_t zms_not_only_dnlc; -} znode_move_stats; -#endif /* ZNODE_STATS */ - -static void -zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) -{ - vnode_t *vp; - - /* Copy fields. */ - nzp->z_zfsvfs = ozp->z_zfsvfs; - - /* Swap vnodes. */ - vp = nzp->z_vnode; - nzp->z_vnode = ozp->z_vnode; - ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ - ZTOV(ozp)->v_data = ozp; - ZTOV(nzp)->v_data = nzp; - - nzp->z_id = ozp->z_id; - ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ - ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); - nzp->z_unlinked = ozp->z_unlinked; - nzp->z_atime_dirty = ozp->z_atime_dirty; - nzp->z_zn_prefetch = ozp->z_zn_prefetch; - nzp->z_blksz = ozp->z_blksz; - nzp->z_seq = ozp->z_seq; - nzp->z_mapcnt = ozp->z_mapcnt; - nzp->z_last_itx = ozp->z_last_itx; - nzp->z_gen = ozp->z_gen; - nzp->z_sync_cnt = ozp->z_sync_cnt; - nzp->z_is_sa = ozp->z_is_sa; - nzp->z_sa_hdl = ozp->z_sa_hdl; - bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); - nzp->z_links = ozp->z_links; - nzp->z_size = ozp->z_size; - nzp->z_pflags = ozp->z_pflags; - nzp->z_uid = ozp->z_uid; - nzp->z_gid = ozp->z_gid; - nzp->z_mode = ozp->z_mode; - - /* - * Since this is just an idle znode and kmem is already dealing with - * memory pressure, release any cached ACL. - */ - if (ozp->z_acl_cached) { - zfs_acl_free(ozp->z_acl_cached); - ozp->z_acl_cached = NULL; - } - - sa_set_userp(nzp->z_sa_hdl, nzp); - - /* - * Invalidate the original znode by clearing fields that provide a - * pointer back to the znode. Set the low bit of the vfs pointer to - * ensure that zfs_znode_move() recognizes the znode as invalid in any - * subsequent callback. - */ - ozp->z_sa_hdl = NULL; - POINTER_INVALIDATE(&ozp->z_zfsvfs); -} - -/*ARGSUSED*/ -static kmem_cbrc_t -zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) -{ - znode_t *ozp = buf, *nzp = newbuf; - zfsvfs_t *zfsvfs; - vnode_t *vp; - - /* - * The znode is on the file system's list of known znodes if the vfs - * pointer is valid. We set the low bit of the vfs pointer when freeing - * the znode to invalidate it, and the memory patterns written by kmem - * (baddcafe and deadbeef) set at least one of the two low bits. A newly - * created znode sets the vfs pointer last of all to indicate that the - * znode is known and in a valid state to be moved by this function. - */ - zfsvfs = ozp->z_zfsvfs; - if (!POINTER_IS_VALID(zfsvfs)) { - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * Close a small window in which it's possible that the filesystem could - * be unmounted and freed, and zfsvfs, though valid in the previous - * statement, could point to unrelated memory by the time we try to - * prevent the filesystem from being unmounted. - */ - rw_enter(&zfsvfs_lock, RW_WRITER); - if (zfsvfs != ozp->z_zfsvfs) { - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * If the znode is still valid, then so is the file system. We know that - * no valid file system can be freed while we hold zfsvfs_lock, so we - * can safely ensure that the filesystem is not and will not be - * unmounted. The next statement is equivalent to ZFS_ENTER(). - */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); - if (zfsvfs->z_unmounted) { - ZFS_EXIT(zfsvfs); - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); - return (KMEM_CBRC_DONT_KNOW); - } - rw_exit(&zfsvfs_lock); - - mutex_enter(&zfsvfs->z_znodes_lock); - /* - * Recheck the vfs pointer in case the znode was removed just before - * acquiring the lock. - */ - if (zfsvfs != ozp->z_zfsvfs) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * At this point we know that as long as we hold z_znodes_lock, the - * znode cannot be freed and fields within the znode can be safely - * accessed. Now, prevent a race with zfs_zget(). - */ - if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); - return (KMEM_CBRC_LATER); - } - - vp = ZTOV(ozp); - if (mutex_tryenter(&vp->v_lock) == 0) { - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); - return (KMEM_CBRC_LATER); - } - - /* Only move znodes that are referenced _only_ by the DNLC. */ - if (vp->v_count != 1 || !vn_in_dnlc(vp)) { - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); - return (KMEM_CBRC_LATER); - } - - /* - * The znode is known and in a valid state to move. We're holding the - * locks needed to execute the critical section. - */ - zfs_znode_move_impl(ozp, nzp); - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - - list_link_replace(&ozp->z_link_node, &nzp->z_link_node); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - - return (KMEM_CBRC_YES); -} - void zfs_znode_init(void) { @@ -349,7 +159,6 @@ zfs_znode_init(void) znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); - kmem_cache_set_move(znode_cache, zfs_znode_move); } void @@ -358,7 +167,9 @@ zfs_znode_fini(void) /* * Cleanup vfs & vnode ops */ +#ifdef HAVE_ZPL zfs_remove_op_tables(); +#endif /* HAVE_ZPL */ /* * Cleanup zcache @@ -369,6 +180,7 @@ zfs_znode_fini(void) rw_destroy(&zfsvfs_lock); } +#ifdef HAVE_ZPL struct vnodeops *zfs_dvnodeops; struct vnodeops *zfs_fvnodeops; struct vnodeops *zfs_symvnodeops; @@ -464,6 +276,7 @@ zfs_create_op_tables() int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { +#ifdef HAVE_SHARE zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; @@ -478,6 +291,8 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) vattr.va_gid = crgetgid(kcred); sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); + ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); + sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; sharezp->z_zfsvfs = zfsvfs; @@ -503,6 +318,9 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) kmem_cache_free(znode_cache, sharezp); return (error); +#else + return (0); +#endif /* HAVE_SHARE */ } /* @@ -519,6 +337,8 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) #define MAXMIN64 0xffffffffUL #endif +#endif /* HAVE_ZPL */ + /* * Create special expldev for ZFS private use. * Can't use standard expldev since it doesn't do @@ -616,17 +436,19 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; +#ifdef HAVE_ZPL vnode_t *vp; uint64_t mode; uint64_t parent; - uint64_t uid, gid; sa_bulk_attr_t bulk[9]; int count = 0; +#endif /* HAVE_ZPL */ zp = kmem_cache_alloc(znode_cache, KM_SLEEP); ASSERT(zp->z_dirlocks == NULL); ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); + zp->z_moved = 0; /* * Defer setting z_zfsvfs until the znode is ready to be a candidate for @@ -636,12 +458,12 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; - zp->z_last_itx = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; +#ifdef HAVE_ZPL vp = ZTOV(zp); vn_reinit(vp); @@ -659,9 +481,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, - &uid, 8); + &zp->z_uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, - &gid, 8); + &zp->z_gid, 8); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { if (hdl == NULL) @@ -670,8 +492,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, return (NULL); } - zp->z_uid = zfs_fuid_map_id(zfsvfs, uid, CRED(), ZFS_OWNER); - zp->z_gid = zfs_fuid_map_id(zfsvfs, gid, CRED(), ZFS_GROUP); zp->z_mode = mode; vp->v_vfsp = zfsvfs->z_parent->z_vfs; @@ -705,7 +525,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, case VREG: vp->v_flag |= VMODSORT; if (parent == zfsvfs->z_shares_dir) { - ASSERT(uid == 0 && gid == 0); + ASSERT(zp->z_uid == 0 && zp->z_gid == 0); vn_setops(vp, zfs_sharevnodeops); } else { vn_setops(vp, zfs_fvnodeops); @@ -718,7 +538,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, vn_setops(vp, zfs_evnodeops); break; } - +#endif /* HAVE_ZPL */ mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); membar_producer(); @@ -759,7 +579,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; - uint64_t dzp_pflags = 0; + uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; dmu_buf_t *db; @@ -769,7 +589,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; - sa_bulk_attr_t sa_attrs[ZPL_END]; + sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; @@ -794,7 +614,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, */ /* * There's currently no mechanism for pre-reading the blocks that will - * be to needed allocate a new object, so we accept the small chance + * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ @@ -895,6 +715,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ + sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), @@ -996,8 +817,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); - ASSERT3P(err, ==, 0); + ASSERT3S(err, ==, 0); } + kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } @@ -1009,6 +831,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) { +#ifdef HAVE_XVATTR xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); @@ -1085,6 +908,17 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_REPARSE); } + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { + ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_OFFLINE); + } + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { + ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_SPARSE); + } +#endif /* HAVE_XVATTR */ } int @@ -1174,7 +1008,6 @@ zfs_rezget(znode_t *zp) dmu_buf_t *db; uint64_t obj_num = zp->z_id; uint64_t mode; - uint64_t uid, gid; sa_bulk_attr_t bulk[8]; int err; int count = 0; @@ -1220,28 +1053,26 @@ zfs_rezget(znode_t *zp) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, sizeof (zp->z_atime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, - &uid, sizeof (uid)); + &zp->z_uid, sizeof (zp->z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, - &gid, sizeof (gid)); + &zp->z_gid, sizeof (zp->z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, sizeof (mode)); - zp->z_mode = mode; - if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EIO); } + zp->z_mode = mode; + if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EIO); } - zp->z_uid = zfs_fuid_map_id(zfsvfs, uid, CRED(), ZFS_OWNER); - zp->z_gid = zfs_fuid_map_id(zfsvfs, gid, CRED(), ZFS_GROUP); zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; @@ -1256,11 +1087,13 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; uint64_t obj = zp->z_id; - uint64_t acl_obj = ZFS_EXTERNAL_ACL(zp); + uint64_t acl_obj = zfs_external_acl(zp); ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); - if (acl_obj) + if (acl_obj) { + VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); + } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); @@ -1414,6 +1247,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } +#ifdef HAVE_ZPL /* * This is a dummy interface used when pvn_vplist_dirty() should *not* * be calling back into the fs for a putpage(). E.g.: when truncating @@ -1427,6 +1261,7 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, ASSERT(0); return (0); } +#endif /* HAVE_ZPL */ /* * Increase the file length @@ -1558,10 +1393,14 @@ static int zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; +#ifdef HAVE_ZPL vnode_t *vp = ZTOV(zp); +#endif /* HAVE_ZPL */ dmu_tx_t *tx; rl_t *rl; int error; + sa_bulk_attr_t bulk[2]; + int count = 0; /* * We will change zp_size, lock the whole file. @@ -1598,12 +1437,19 @@ top: } zp->z_size = end; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), + NULL, &zp->z_size, sizeof (zp->z_size)); - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), - &zp->z_size, sizeof (zp->z_size), tx)); + if (end == 0) { + zp->z_pflags &= ~ZFS_SPARSE; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, 8); + } + VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); +#ifdef HAVE_ZPL /* * Clear any mapped pages in the truncated region. This has to * happen outside of the transaction to avoid the possibility of @@ -1627,6 +1473,7 @@ top: B_INVAL | B_TRUNC, NULL); ASSERT(error == 0); } +#endif /* HAVE_ZPL */ zfs_range_unlock(rl); @@ -1648,7 +1495,9 @@ top: int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { +#ifdef HAVE_ZPL vnode_t *vp = ZTOV(zp); +#endif /* HAVE_ZPL */ dmu_tx_t *tx; zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog = zfsvfs->z_log; @@ -1670,6 +1519,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) return (error); } +#ifdef HAVE_ZPL /* * Check for any locks in the region to be freed. */ @@ -1679,6 +1529,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) if (error = chklock(vp, FWRITE, off, length, flag, NULL)) return (error); } +#endif /* HAVE_ZPL */ if (len == 0) { error = zfs_trunc(zp, off); @@ -1721,18 +1572,24 @@ log: void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { - zfsvfs_t zfsvfs; uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; +#ifdef HAVE_ZPL + zfsvfs_t zfsvfs; int i; znode_t *rootzp = NULL; vnode_t *vp; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; +#else + timestruc_t now; + dmu_buf_t *db; + znode_phys_t *pzp; +#endif /* HAVE_ZPL */ /* * First attempt to create master node. @@ -1794,6 +1651,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); +#ifdef HAVE_ZPL /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. @@ -1805,6 +1663,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); + ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); + rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); @@ -1822,7 +1682,10 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) zfsvfs.z_use_sa = USE_SA(version, os); zfsvfs.z_norm = norm; - zfsvfs.z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END); + error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, + &zfsvfs.z_attr_table); + + ASSERT(error == 0); /* * Fold case on file systems that are always or sometimes case @@ -1831,6 +1694,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; + /* XXX - This must be destroyed but I'm not quite sure yet so + * I'm just annotating that fact when it's an issue. -Brian */ mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); @@ -1838,7 +1703,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); - ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); rootzp->z_zfsvfs = &zfsvfs; VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); @@ -1853,93 +1717,168 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ZTOV(rootzp)->v_count = 0; sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); + error = zfs_create_share_dir(&zfsvfs, tx); + + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zfsvfs.z_hold_mtx[i]); +#else + /* + * Create root znode with code free of VFS dependencies + */ + obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_ZNODE, sizeof (znode_phys_t), tx); + + VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db)); + dmu_buf_will_dirty(db, tx); /* - * Create shares directory + * Initialize the znode physical data to zero. */ + ASSERT(db->db_size >= sizeof (znode_phys_t)); + bzero(db->db_data, db->db_size); + pzp = db->db_data; - error = zfs_create_share_dir(&zfsvfs, tx); + if (USE_FUIDS(version, os)) + pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; + pzp->zp_size = 2; /* "." and ".." */ + pzp->zp_links = 2; + pzp->zp_parent = obj; + pzp->zp_gen = dmu_tx_get_txg(tx); + pzp->zp_mode = S_IFDIR | 0755; + pzp->zp_flags = ZFS_ACL_TRIVIAL; + + gethrestime(&now); + + ZFS_TIME_ENCODE(&now, pzp->zp_crtime); + ZFS_TIME_ENCODE(&now, pzp->zp_ctime); + ZFS_TIME_ENCODE(&now, pzp->zp_atime); + ZFS_TIME_ENCODE(&now, pzp->zp_mtime); + + error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx); ASSERT(error == 0); - for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_destroy(&zfsvfs.z_hold_mtx[i]); + dmu_buf_rele(db, FTAG); +#endif /* HAVE_ZPL */ } #endif /* _KERNEL */ -/* - * Given an object number, return its parent object number and whether - * or not the object is an extended attribute directory. - */ static int -zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir, - sa_attr_type_t *sa_table) +zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) +{ + uint64_t sa_obj = 0; + int error; + + error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); + if (error != 0 && error != ENOENT) + return (error); + + error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); + return (error); +} + +static int +zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, + dmu_buf_t **db) { - dmu_buf_t *db; dmu_object_info_t doi; int error; - uint64_t parent; - uint64_t pflags; - uint64_t mode; - sa_bulk_attr_t bulk[3]; - sa_handle_t *hdl; - int count = 0; - if ((error = sa_buf_hold(osp, obj, FTAG, &db)) != 0) + if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0) return (error); - dmu_object_info_from_db(db, &doi); + dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || - doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t)) { - sa_buf_rele(db, FTAG); - return (EINVAL); + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t))) { + sa_buf_rele(*db, FTAG); + return (ENOTSUP); } - if ((error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, - &hdl)) != 0) { - sa_buf_rele(db, FTAG); + error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); + if (error != 0) { + sa_buf_rele(*db, FTAG); return (error); } - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], - NULL, &parent, 8); + return (0); +} + +void +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db) +{ + sa_handle_destroy(hdl); + sa_buf_rele(db, FTAG); +} + +/* + * Given an object number, return its parent object number and whether + * or not the object is an extended attribute directory. + */ +static int +zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp, + int *is_xattrdir) +{ + uint64_t parent; + uint64_t pflags; + uint64_t mode; + sa_bulk_attr_t bulk[3]; + int count = 0; + int error; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, + &parent, sizeof (parent)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, - &pflags, 8); + &pflags, sizeof (pflags)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &mode, 8); + &mode, sizeof (mode)); - if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) { - sa_buf_rele(db, FTAG); - sa_handle_destroy(hdl); + if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) return (error); - } + *pobjp = parent; *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); - sa_handle_destroy(hdl); - sa_buf_rele(db, FTAG); return (0); } -int -zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) +/* + * Given an object number, return some zpl level statistics + */ +static int +zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, + zfs_stat_t *sb) { + sa_bulk_attr_t bulk[4]; + int count = 0; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &sb->zs_mode, sizeof (sb->zs_mode)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, + &sb->zs_gen, sizeof (sb->zs_gen)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, + &sb->zs_links, sizeof (sb->zs_links)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, + &sb->zs_ctime, sizeof (sb->zs_ctime)); + + return (sa_bulk_lookup(hdl, bulk, count)); +} + +static int +zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, + sa_attr_type_t *sa_table, char *buf, int len) +{ + sa_handle_t *sa_hdl; + sa_handle_t *prevhdl = NULL; + dmu_buf_t *prevdb = NULL; + dmu_buf_t *sa_db = NULL; char *path = buf + len - 1; - sa_attr_type_t *sa_table; int error; - uint64_t sa_obj = 0; *path = '\0'; - - error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - - if (error != 0 && error != ENOENT) - return (error); - - sa_table = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END); + sa_hdl = hdl; for (;;) { uint64_t pobj; @@ -1947,8 +1886,11 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) size_t complen; int is_xattrdir; - if ((error = zfs_obj_to_pobj(osp, obj, &pobj, - &is_xattrdir, sa_table)) != 0) + if (prevdb) + zfs_release_sa_handle(prevhdl, prevdb); + + if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj, + &is_xattrdir)) != 0) break; if (pobj == obj) { @@ -1972,6 +1914,22 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) ASSERT(path >= buf); bcopy(component, path, complen); obj = pobj; + + if (sa_hdl != hdl) { + prevhdl = sa_hdl; + prevdb = sa_db; + } + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db); + if (error != 0) { + sa_hdl = prevhdl; + sa_db = prevdb; + break; + } + } + + if (sa_hdl != NULL && sa_hdl != hdl) { + ASSERT(sa_db != NULL); + zfs_release_sa_handle(sa_hdl, sa_db); } if (error == 0) @@ -1979,3 +1937,62 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) return (error); } + +int +zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) +{ + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + if (error != 0) + return (error); + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db); + return (error); +} + +int +zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, + char *buf, int len) +{ + char *path = buf + len - 1; + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + *path = '\0'; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + if (error != 0) + return (error); + + error = zfs_obj_to_stats_impl(hdl, sa_table, sb); + if (error != 0) { + zfs_release_sa_handle(hdl, db); + return (error); + } + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db); + return (error); +} + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_create_fs); +EXPORT_SYMBOL(zfs_obj_to_path); +#endif