X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_znode.c;h=137a0681044a85744e7b9c11f3ae9c781c4c497a;hb=9ee7fac5312f9c6b33155aed77e50b0dfd68a77d;hp=e1e4e9e03ac26666820e3fcf467ef00693d005ed;hpb=572e285762521df27fe5b026f409ba1a21abb7ac;p=zfs.git diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index e1e4e9e..137a068 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -88,6 +88,7 @@ * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL +#ifdef HAVE_ZPL /* * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to * be freed before it can be safely accessed. @@ -159,189 +160,6 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT(zp->z_acl_cached == NULL); } -#ifdef ZNODE_STATS -static struct { - uint64_t zms_zfsvfs_invalid; - uint64_t zms_zfsvfs_recheck1; - uint64_t zms_zfsvfs_unmounted; - uint64_t zms_zfsvfs_recheck2; - uint64_t zms_obj_held; - uint64_t zms_vnode_locked; - uint64_t zms_not_only_dnlc; -} znode_move_stats; -#endif /* ZNODE_STATS */ - -static void -zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) -{ - vnode_t *vp; - - /* Copy fields. */ - nzp->z_zfsvfs = ozp->z_zfsvfs; - - /* Swap vnodes. */ - vp = nzp->z_vnode; - nzp->z_vnode = ozp->z_vnode; - ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ - ZTOV(ozp)->v_data = ozp; - ZTOV(nzp)->v_data = nzp; - - nzp->z_id = ozp->z_id; - ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ - ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); - nzp->z_unlinked = ozp->z_unlinked; - nzp->z_atime_dirty = ozp->z_atime_dirty; - nzp->z_zn_prefetch = ozp->z_zn_prefetch; - nzp->z_blksz = ozp->z_blksz; - nzp->z_seq = ozp->z_seq; - nzp->z_mapcnt = ozp->z_mapcnt; - nzp->z_gen = ozp->z_gen; - nzp->z_sync_cnt = ozp->z_sync_cnt; - nzp->z_is_sa = ozp->z_is_sa; - nzp->z_sa_hdl = ozp->z_sa_hdl; - bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); - nzp->z_links = ozp->z_links; - nzp->z_size = ozp->z_size; - nzp->z_pflags = ozp->z_pflags; - nzp->z_uid = ozp->z_uid; - nzp->z_gid = ozp->z_gid; - nzp->z_mode = ozp->z_mode; - - /* - * Since this is just an idle znode and kmem is already dealing with - * memory pressure, release any cached ACL. - */ - if (ozp->z_acl_cached) { - zfs_acl_free(ozp->z_acl_cached); - ozp->z_acl_cached = NULL; - } - - sa_set_userp(nzp->z_sa_hdl, nzp); - - /* - * Invalidate the original znode by clearing fields that provide a - * pointer back to the znode. Set the low bit of the vfs pointer to - * ensure that zfs_znode_move() recognizes the znode as invalid in any - * subsequent callback. - */ - ozp->z_sa_hdl = NULL; - POINTER_INVALIDATE(&ozp->z_zfsvfs); - - /* - * Mark the znode. - */ - nzp->z_moved = 1; - ozp->z_moved = (uint8_t)-1; -} - -/*ARGSUSED*/ -static kmem_cbrc_t -zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) -{ - znode_t *ozp = buf, *nzp = newbuf; - zfsvfs_t *zfsvfs; - vnode_t *vp; - - /* - * The znode is on the file system's list of known znodes if the vfs - * pointer is valid. We set the low bit of the vfs pointer when freeing - * the znode to invalidate it, and the memory patterns written by kmem - * (baddcafe and deadbeef) set at least one of the two low bits. A newly - * created znode sets the vfs pointer last of all to indicate that the - * znode is known and in a valid state to be moved by this function. - */ - zfsvfs = ozp->z_zfsvfs; - if (!POINTER_IS_VALID(zfsvfs)) { - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * Close a small window in which it's possible that the filesystem could - * be unmounted and freed, and zfsvfs, though valid in the previous - * statement, could point to unrelated memory by the time we try to - * prevent the filesystem from being unmounted. - */ - rw_enter(&zfsvfs_lock, RW_WRITER); - if (zfsvfs != ozp->z_zfsvfs) { - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * If the znode is still valid, then so is the file system. We know that - * no valid file system can be freed while we hold zfsvfs_lock, so we - * can safely ensure that the filesystem is not and will not be - * unmounted. The next statement is equivalent to ZFS_ENTER(). - */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); - if (zfsvfs->z_unmounted) { - ZFS_EXIT(zfsvfs); - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); - return (KMEM_CBRC_DONT_KNOW); - } - rw_exit(&zfsvfs_lock); - - mutex_enter(&zfsvfs->z_znodes_lock); - /* - * Recheck the vfs pointer in case the znode was removed just before - * acquiring the lock. - */ - if (zfsvfs != ozp->z_zfsvfs) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * At this point we know that as long as we hold z_znodes_lock, the - * znode cannot be freed and fields within the znode can be safely - * accessed. Now, prevent a race with zfs_zget(). - */ - if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); - return (KMEM_CBRC_LATER); - } - - vp = ZTOV(ozp); - if (mutex_tryenter(&vp->v_lock) == 0) { - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); - return (KMEM_CBRC_LATER); - } - - /* Only move znodes that are referenced _only_ by the DNLC. */ - if (vp->v_count != 1 || !vn_in_dnlc(vp)) { - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); - return (KMEM_CBRC_LATER); - } - - /* - * The znode is known and in a valid state to move. We're holding the - * locks needed to execute the critical section. - */ - zfs_znode_move_impl(ozp, nzp); - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - - list_link_replace(&ozp->z_link_node, &nzp->z_link_node); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - - return (KMEM_CBRC_YES); -} - void zfs_znode_init(void) { @@ -353,7 +171,6 @@ zfs_znode_init(void) znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); - kmem_cache_set_move(znode_cache, zfs_znode_move); } void @@ -468,6 +285,7 @@ zfs_create_op_tables() int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { +#ifdef HAVE_SHARE zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; @@ -509,6 +327,9 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) kmem_cache_free(znode_cache, sharezp); return (error); +#else + return (0); +#endif /* HAVE_SHARE */ } /* @@ -772,7 +593,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; - sa_bulk_attr_t sa_attrs[ZPL_END]; + sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; @@ -898,6 +719,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ + sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), @@ -999,8 +821,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); - ASSERT3P(err, ==, 0); + ASSERT3S(err, ==, 0); } + kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } @@ -1737,22 +1560,29 @@ log: dmu_tx_commit(tx); return (0); } +#endif /* HAVE_ZPL */ void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { - zfsvfs_t zfsvfs; uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; +#ifdef HAVE_ZPL + zfsvfs_t zfsvfs; int i; znode_t *rootzp = NULL; vnode_t *vp; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; +#else + timestruc_t now; + dmu_buf_t *db; + znode_phys_t *pzp; +#endif /* HAVE_ZPL */ /* * First attempt to create master node. @@ -1814,6 +1644,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); +#ifdef HAVE_ZPL /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. @@ -1856,6 +1687,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; + /* XXX - This must be destroyed but I'm not quite sure yet so + * I'm just annotating that fact when it's an issue. -Brian */ mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); @@ -1877,17 +1710,49 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ZTOV(rootzp)->v_count = 0; sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); + error = zfs_create_share_dir(&zfsvfs, tx); + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zfsvfs.z_hold_mtx[i]); +#else /* - * Create shares directory + * Create root znode with code free of VFS dependencies */ + obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_ZNODE, sizeof (znode_phys_t), tx); - error = zfs_create_share_dir(&zfsvfs, tx); + VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db)); + dmu_buf_will_dirty(db, tx); + + /* + * Initialize the znode physical data to zero. + */ + ASSERT(db->db_size >= sizeof (znode_phys_t)); + bzero(db->db_data, db->db_size); + pzp = db->db_data; + if (USE_FUIDS(version, os)) + pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; + + pzp->zp_size = 2; /* "." and ".." */ + pzp->zp_links = 2; + pzp->zp_parent = obj; + pzp->zp_gen = dmu_tx_get_txg(tx); + pzp->zp_mode = S_IFDIR | 0755; + pzp->zp_flags = ZFS_ACL_TRIVIAL; + + gethrestime(&now); + + ZFS_TIME_ENCODE(&now, pzp->zp_crtime); + ZFS_TIME_ENCODE(&now, pzp->zp_ctime); + ZFS_TIME_ENCODE(&now, pzp->zp_atime); + ZFS_TIME_ENCODE(&now, pzp->zp_mtime); + + error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx); ASSERT(error == 0); - for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_destroy(&zfsvfs.z_hold_mtx[i]); + dmu_buf_rele(db, FTAG); +#endif /* HAVE_ZPL */ } #endif /* _KERNEL */ @@ -1919,8 +1784,8 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || - doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t)) { + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t))) { sa_buf_rele(*db, FTAG); return (ENOTSUP); } @@ -2119,3 +1984,8 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, zfs_release_sa_handle(hdl, db); return (error); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_create_fs); +EXPORT_SYMBOL(zfs_obj_to_path); +#endif