Merge branch 'zpl'

[zfs.git] / module / zfs / zfs_dir.c
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c

index 1ec4932..f54ed19 100644 (file)
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -19,10 +19,10 @@
   * CDDL HEADER END
   */
  /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   */
  
+
  #include <sys/types.h>
  #include <sys/param.h>
  #include <sys/time.h>
@@ -50,8 +50,9 @@
  #include <sys/zap.h>
  #include <sys/dmu.h>
  #include <sys/atomic.h>
-#include <sys/zfs_ctldir.h>
  #include <sys/zfs_fuid.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
  #include <sys/dnlc.h>
  #include <sys/extdirent.h>
  
@@ -60,12 +61,12 @@
   * of names after deciding which is the appropriate lookup interface.
   */
  static int
-zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
+zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
      boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
  {
         int error;
  
-       if (zfsvfs->z_norm) {
+       if (zsb->z_norm) {
                 matchtype_t mt = MT_FIRST;
                 boolean_t conflict = B_FALSE;
                 size_t bufsz = 0;
@@ -81,17 +82,19 @@ zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
                  * In the non-mixed case we only expect there would ever
                  * be one match, but we need to use the normalizing lookup.
                  */
-               error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
+               error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1,
                     zoid, mt, buf, bufsz, &conflict);
                 if (!error && deflags)
                         *deflags = conflict ? ED_CASE_CONFLICT : 0;
         } else {
-               error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
+               error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid);
         }
         *zoid = ZFS_DIRENT_OBJ(*zoid);
  
+#ifdef HAVE_DNLC
         if (error == ENOENT && update)
-               dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
+               dnlc_update(ZTOI(dzp), name, DNLC_NO_VNODE);
+#endif /* HAVE_DNLC */
  
         return (error);
  }
@@ -114,6 +117,8 @@ zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
   *               ZCIEXACT: On a purely case-insensitive file system,
   *                         this lookup should be case-sensitive.
   *               ZRENAMING: we are locking for renaming, force narrow locks
+ *               ZHAVELOCK: Don't grab the z_name_lock for this call. The
+ *                          current thread already holds it.
   *
   * Output arguments:
   *     zpp     - pointer to the znode for the entry (NULL if there isn't one)
@@ -133,12 +138,14 @@ int
  zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
      int flag, int *direntflags, pathname_t *realpnp)
  {
-       zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
+       zfs_sb_t        *zsb = ZTOZSB(dzp);
         zfs_dirlock_t   *dl;
         boolean_t       update;
         boolean_t       exact;
         uint64_t        zoid;
+#ifdef HAVE_DNLC
         vnode_t         *vp = NULL;
+#endif /* HAVE_DNLC */
         int             error = 0;
         int             cmpflags;
  
@@ -148,15 +155,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
         /*
          * Verify that we are not trying to lock '.', '..', or '.zfs'
          */
-       if (name[0] == '.' &&
-           (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
-           zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
+       if ((name[0] == '.' &&
+           (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
+           (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
                 return (EEXIST);
  
         /*
          * Case sensitivity and normalization preferences are set when
          * the file system is created.  These are stored in the
-        * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
+        * zsb->z_case and zsb->z_norm fields.  These choices
          * affect what vnodes can be cached in the DNLC, how we
          * perform zap lookups, and the "width" of our dirlocks.
          *
@@ -176,8 +183,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
          * access.
          */
         exact =
-           ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
-           ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
+           ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
+           ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
  
         /*
          * Only look in or update the DNLC if we are looking for the
@@ -189,9 +196,9 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
          * Maybe can add TO-UPPERed version of name to dnlc in ci-only
          * case for performance improvement?
          */
-       update = !zfsvfs->z_norm ||
-           ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
-           !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
+       update = !zsb->z_norm ||
+           ((zsb->z_case == ZFS_CASE_MIXED) &&
+           !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
  
         /*
          * ZRENAMING indicates we are in a situation where we should
@@ -204,17 +211,24 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
         if (flag & ZRENAMING)
                 cmpflags = 0;
         else
-               cmpflags = zfsvfs->z_norm;
+               cmpflags = zsb->z_norm;
  
         /*
          * Wait until there are no locks on this name.
+        *
+        * Don't grab the the lock if it is already held. However, cannot
+        * have both ZSHARED and ZHAVELOCK together.
          */
-       rw_enter(&dzp->z_name_lock, RW_READER);
+       ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
+       if (!(flag & ZHAVELOCK))
+               rw_enter(&dzp->z_name_lock, RW_READER);
+
         mutex_enter(&dzp->z_lock);
         for (;;) {
                 if (dzp->z_unlinked) {
                         mutex_exit(&dzp->z_lock);
-                       rw_exit(&dzp->z_name_lock);
+                       if (!(flag & ZHAVELOCK))
+                               rw_exit(&dzp->z_name_lock);
                         return (ENOENT);
                 }
                 for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
@@ -224,7 +238,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
                 }
                 if (error != 0) {
                         mutex_exit(&dzp->z_lock);
-                       rw_exit(&dzp->z_name_lock);
+                       if (!(flag & ZHAVELOCK))
+                               rw_exit(&dzp->z_name_lock);
                         return (ENOENT);
                 }
                 if (dl == NULL) {
@@ -235,6 +250,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
                         cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
                         dl->dl_name = name;
                         dl->dl_sharecnt = 0;
+                       dl->dl_namelock = 0;
                         dl->dl_namesize = 0;
                         dl->dl_dzp = dzp;
                         dl->dl_next = dzp->z_dirlocks;
@@ -246,6 +262,12 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
                 cv_wait(&dl->dl_cv, &dzp->z_lock);
         }
  
+       /*
+        * If the z_name_lock was NOT held for this dirlock record it.
+        */
+       if (flag & ZHAVELOCK)
+               dl->dl_namelock = 1;
+
         if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
                 /*
                  * We're the second shared reference to dl.  Make a copy of
@@ -269,27 +291,34 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
          * See if there's an object by this name; if so, put a hold on it.
          */
         if (flag & ZXATTR) {
-               zoid = dzp->z_phys->zp_xattr;
-               error = (zoid == 0 ? ENOENT : 0);
+               error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
+                   sizeof (zoid));
+               if (error == 0)
+                       error = (zoid == 0 ? ENOENT : 0);
         } else {
+#ifdef HAVE_DNLC
                 if (update)
-                       vp = dnlc_lookup(ZTOV(dzp), name);
+                       vp = dnlc_lookup(ZTOI(dzp), name);
                 if (vp == DNLC_NO_VNODE) {
-                       VN_RELE(vp);
+                       iput(vp);
                         error = ENOENT;
                 } else if (vp) {
                         if (flag & ZNEW) {
                                 zfs_dirent_unlock(dl);
-                               VN_RELE(vp);
+                               iput(vp);
                                 return (EEXIST);
                         }
                         *dlpp = dl;
                         *zpp = VTOZ(vp);
                         return (0);
                 } else {
-                       error = zfs_match_find(zfsvfs, dzp, name, exact,
+                       error = zfs_match_find(zsb, dzp, name, exact,
                             update, direntflags, realpnp, &zoid);
                 }
+#else
+               error = zfs_match_find(zsb, dzp, name, exact,
+                   update, direntflags, realpnp, &zoid);
+#endif /* HAVE_DNLC */
         }
         if (error) {
                 if (error != ENOENT || (flag & ZEXISTS)) {
@@ -301,13 +330,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
                         zfs_dirent_unlock(dl);
                         return (EEXIST);
                 }
-               error = zfs_zget(zfsvfs, zoid, zpp);
+               error = zfs_zget(zsb, zoid, zpp);
                 if (error) {
                         zfs_dirent_unlock(dl);
                         return (error);
                 }
+#ifdef HAVE_DNLC
                 if (!(flag & ZXATTR) && update)
-                       dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
+                       dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
+#endif /* HAVE_DNLC */
         }
  
         *dlpp = dl;
@@ -325,7 +356,10 @@ zfs_dirent_unlock(zfs_dirlock_t *dl)
         zfs_dirlock_t **prev_dl, *cur_dl;
  
         mutex_enter(&dzp->z_lock);
-       rw_exit(&dzp->z_name_lock);
+
+       if (!dl->dl_namelock)
+               rw_exit(&dzp->z_name_lock);
+
         if (dl->dl_sharecnt > 1) {
                 dl->dl_sharecnt--;
                 mutex_exit(&dzp->z_lock);
@@ -353,36 +387,44 @@ zfs_dirent_unlock(zfs_dirlock_t *dl)
   *     special pseudo-directory.
   */
  int
-zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
+zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
      int *deflg, pathname_t *rpnp)
  {
         zfs_dirlock_t *dl;
         znode_t *zp;
         int error = 0;
+       uint64_t parent;
  
         if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
-               *vpp = ZTOV(dzp);
-               VN_HOLD(*vpp);
+               *ipp = ZTOI(dzp);
+               igrab(*ipp);
         } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
-               zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+               zfs_sb_t *zsb = ZTOZSB(dzp);
+
                 /*
                  * If we are a snapshot mounted under .zfs, return
                  * the vp for the snapshot directory.
                  */
-               if (dzp->z_phys->zp_parent == dzp->z_id &&
-                   zfsvfs->z_parent != zfsvfs) {
-                       error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
-                           "snapshot", vpp, NULL, 0, NULL, kcred,
+               if ((error = sa_lookup(dzp->z_sa_hdl,
+                   SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
+                       return (error);
+#ifdef HAVE_SNAPSHOT
+               if (parent == dzp->z_id && zsb->z_parent != zsb) {
+                       error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
+                           "snapshot", ipp, NULL, 0, NULL, kcred,
                             NULL, NULL, NULL);
                         return (error);
                 }
+#endif /* HAVE_SNAPSHOT */
                 rw_enter(&dzp->z_parent_lock, RW_READER);
-               error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp);
+               error = zfs_zget(zsb, parent, &zp);
                 if (error == 0)
-                       *vpp = ZTOV(zp);
+                       *ipp = ZTOI(zp);
                 rw_exit(&dzp->z_parent_lock);
+#ifdef HAVE_SNAPSHOT
         } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
-               *vpp = zfsctl_root(dzp);
+               *ipp = zfsctl_root(dzp);
+#endif /* HAVE_SNAPSHOT */
         } else {
                 int zf;
  
@@ -392,7 +434,7 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
  
                 error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
                 if (error == 0) {
-                       *vpp = ZTOV(zp);
+                       *ipp = ZTOI(zp);
                         zfs_dirent_unlock(dl);
                         dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
                 }
@@ -422,13 +464,13 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
  void
  zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
  {
-       zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+       zfs_sb_t *zsb = ZTOZSB(zp);
  
         ASSERT(zp->z_unlinked);
-       ASSERT3U(zp->z_phys->zp_links, ==, 0);
+       ASSERT(zp->z_links == 0);
  
         VERIFY3U(0, ==,
-           zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+           zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
  }
  
  /*
@@ -436,7 +478,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
   * (force) umounted the file system.
   */
  void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+zfs_unlinked_drain(zfs_sb_t *zsb)
  {
         zap_cursor_t    zc;
         zap_attribute_t zap;
@@ -447,7 +489,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
         /*
          * Interate over the contents of the unlinked set.
          */
-       for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
+       for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
             zap_cursor_retrieve(&zc, &zap) == 0;
             zap_cursor_advance(&zc)) {
  
@@ -455,8 +497,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                  * See what kind of object we have in list
                  */
  
-               error = dmu_object_info(zfsvfs->z_os,
-                   zap.za_first_integer, &doi);
+               error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
                 if (error != 0)
                         continue;
  
@@ -466,7 +507,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                  * We need to re-mark these list entries for deletion,
                  * so we pull them back into core and set zp->z_unlinked.
                  */
-               error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
+               error = zfs_zget(zsb, zap.za_first_integer, &zp);
  
                 /*
                  * We may pick up znodes that are already marked for deletion.
@@ -478,7 +519,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                         continue;
  
                 zp->z_unlinked = B_TRUE;
-               VN_RELE(ZTOV(zp));
+               iput(ZTOI(zp));
         }
         zap_cursor_fini(&zc);
  }
@@ -501,33 +542,34 @@ zfs_purgedir(znode_t *dzp)
         zap_attribute_t zap;
         znode_t         *xzp;
         dmu_tx_t        *tx;
-       zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
+       zfs_sb_t        *zsb = ZTOZSB(dzp);
         zfs_dirlock_t   dl;
         int skipped = 0;
         int error;
  
-       for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
+       for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
             zap_cursor_advance(&zc)) {
-               error = zfs_zget(zfsvfs,
+               error = zfs_zget(zsb,
                     ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
                 if (error) {
                         skipped += 1;
                         continue;
                 }
  
-               ASSERT((ZTOV(xzp)->v_type == VREG) ||
-                   (ZTOV(xzp)->v_type == VLNK));
+               ASSERT(S_ISREG(ZTOI(xzp)->i_mode)||S_ISLNK(ZTOI(xzp)->i_mode));
  
-               tx = dmu_tx_create(zfsvfs->z_os);
-               dmu_tx_hold_bonus(tx, dzp->z_id);
+               tx = dmu_tx_create(zsb->z_os);
+               dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
                 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
-               dmu_tx_hold_bonus(tx, xzp->z_id);
-               dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+               dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+               dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+               /* Is this really needed ? */
+               zfs_sa_upgrade_txholds(tx, xzp);
                 error = dmu_tx_assign(tx, TXG_WAIT);
                 if (error) {
                         dmu_tx_abort(tx);
-                       VN_RELE(ZTOV(xzp));
+                       iput(ZTOI(xzp));
                         skipped += 1;
                         continue;
                 }
@@ -540,7 +582,7 @@ zfs_purgedir(znode_t *dzp)
                         skipped += 1;
                 dmu_tx_commit(tx);
  
-               VN_RELE(ZTOV(xzp));
+               iput(ZTOI(xzp));
         }
         zap_cursor_fini(&zc);
         if (error != ENOENT)
@@ -551,45 +593,29 @@ zfs_purgedir(znode_t *dzp)
  void
  zfs_rmnode(znode_t *zp)
  {
-       zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
-       objset_t        *os = zfsvfs->z_os;
+       zfs_sb_t        *zsb = ZTOZSB(zp);
+       objset_t        *os = zsb->z_os;
         znode_t         *xzp = NULL;
         dmu_tx_t        *tx;
         uint64_t        acl_obj;
+       uint64_t        xattr_obj;
         int             error;
  
-       ASSERT(ZTOV(zp)->v_count == 0);
-       ASSERT(zp->z_phys->zp_links == 0);
-
-       /*
-        * If this is a ZIL replay then leave the object in the unlinked set.
-        * Otherwise we can get a deadlock, because the delete can be
-        * quite large and span multiple tx's and txgs, but each replay
-        * creates a tx to atomically run the replay function and mark the
-        * replay record as complete. We deadlock trying to start a tx in
-        * a new txg to further the deletion but can't because the replay
-        * tx hasn't finished.
-        *
-        * We actually delete the object if we get a failure to create an
-        * object in zil_replay_log_record(), or after calling zil_replay().
-        */
-       if (zfsvfs->z_assign >= TXG_INITIAL) {
-               zfs_znode_dmu_fini(zp);
-               zfs_znode_free(zp);
-               return;
-       }
+       ASSERT(zp->z_links == 0);
+       ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
  
         /*
          * If this is an attribute directory, purge its contents.
          */
-       if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) {
+       if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
                 if (zfs_purgedir(zp) != 0) {
                         /*
                          * Not enough space to delete some xattrs.
                          * Leave it in the unlinked set.
                          */
                         zfs_znode_dmu_fini(zp);
-                       zfs_znode_free(zp);
+                       zfs_inode_destroy(ZTOI(zp));
+
                         return;
                 }
         }
@@ -603,7 +629,7 @@ zfs_rmnode(znode_t *zp)
                  * Not enough space.  Leave the file in the unlinked set.
                  */
                 zfs_znode_dmu_fini(zp);
-               zfs_znode_free(zp);
+               zfs_inode_destroy(ZTOI(zp));
                 return;
         }
  
@@ -611,25 +637,29 @@ zfs_rmnode(znode_t *zp)
          * If the file has extended attributes, we're going to unlink
          * the xattr dir.
          */
-       if (zp->z_phys->zp_xattr) {
-               error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
+       error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
+           &xattr_obj, sizeof (xattr_obj));
+       if (error == 0 && xattr_obj) {
+               error = zfs_zget(zsb, xattr_obj, &xzp);
                 ASSERT(error == 0);
         }
  
-       acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
+       acl_obj = zfs_external_acl(zp);
  
         /*
          * Set up the final transaction.
          */
         tx = dmu_tx_create(os);
         dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
-       dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+       dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
         if (xzp) {
-               dmu_tx_hold_bonus(tx, xzp->z_id);
-               dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
+               dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
+               dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
         }
         if (acl_obj)
                 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+
+       zfs_sa_upgrade_txholds(tx, zp);
         error = dmu_tx_assign(tx, TXG_WAIT);
         if (error) {
                 /*
@@ -639,37 +669,40 @@ zfs_rmnode(znode_t *zp)
                  */
                 dmu_tx_abort(tx);
                 zfs_znode_dmu_fini(zp);
-               zfs_znode_free(zp);
+               zfs_inode_destroy(ZTOI(zp));
                 goto out;
         }
  
         if (xzp) {
-               dmu_buf_will_dirty(xzp->z_dbuf, tx);
+               ASSERT(error == 0);
                 mutex_enter(&xzp->z_lock);
                 xzp->z_unlinked = B_TRUE;       /* mark xzp for deletion */
-               xzp->z_phys->zp_links = 0;      /* no more links to it */
+               xzp->z_links = 0;       /* no more links to it */
+               VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
+                   &xzp->z_links, sizeof (xzp->z_links), tx));
                 mutex_exit(&xzp->z_lock);
                 zfs_unlinked_add(xzp, tx);
         }
  
         /* Remove this znode from the unlinked set */
         VERIFY3U(0, ==,
-           zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+           zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
  
         zfs_znode_delete(zp, tx);
  
         dmu_tx_commit(tx);
  out:
         if (xzp)
-               VN_RELE(ZTOV(xzp));
+               iput(ZTOI(xzp));
  }
  
  static uint64_t
-zfs_dirent(znode_t *zp)
+zfs_dirent(znode_t *zp, uint64_t mode)
  {
         uint64_t de = zp->z_id;
-       if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
-               de |= IFTODT((zp)->z_phys->zp_mode) << 60;
+
+       if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
+               de |= IFTODT(mode) << 60;
         return (de);
  }
  
@@ -680,12 +713,14 @@ int
  zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
  {
         znode_t *dzp = dl->dl_dzp;
-       vnode_t *vp = ZTOV(zp);
+       zfs_sb_t *zsb = ZTOZSB(zp);
         uint64_t value;
-       int zp_is_dir = (vp->v_type == VDIR);
+       int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
+       sa_bulk_attr_t bulk[5];
+       uint64_t mtime[2], ctime[2];
+       int count = 0;
         int error;
  
-       dmu_buf_will_dirty(zp->z_dbuf, tx);
         mutex_enter(&zp->z_lock);
  
         if (!(flag & ZRENAMING)) {
@@ -694,31 +729,82 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
                         mutex_exit(&zp->z_lock);
                         return (ENOENT);
                 }
-               zp->z_phys->zp_links++;
+               zp->z_links++;
+               SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
+                   &zp->z_links, sizeof (zp->z_links));
+
+       }
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
+           &dzp->z_id, sizeof (dzp->z_id));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+           &zp->z_pflags, sizeof (zp->z_pflags));
+
+       if (!(flag & ZNEW)) {
+               SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
+                   ctime, sizeof (ctime));
+               zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+                   ctime, B_TRUE);
         }
-       zp->z_phys->zp_parent = dzp->z_id;      /* dzp is now zp's parent */
+       error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+       ASSERT(error == 0);
  
-       if (!(flag & ZNEW))
-               zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
         mutex_exit(&zp->z_lock);
  
-       dmu_buf_will_dirty(dzp->z_dbuf, tx);
         mutex_enter(&dzp->z_lock);
-       dzp->z_phys->zp_size++;                 /* one dirent added */
-       dzp->z_phys->zp_links += zp_is_dir;     /* ".." link from zp */
-       zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
+       dzp->z_size++;
+       dzp->z_links += zp_is_dir;
+       count = 0;
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
+           &dzp->z_size, sizeof (dzp->z_size));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
+           &dzp->z_links, sizeof (dzp->z_links));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
+           mtime, sizeof (mtime));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
+           ctime, sizeof (ctime));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+           &dzp->z_pflags, sizeof (dzp->z_pflags));
+       zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
+       error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+       ASSERT(error == 0);
         mutex_exit(&dzp->z_lock);
  
-       value = zfs_dirent(zp);
-       error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
+       value = zfs_dirent(zp, zp->z_mode);
+       error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
             8, 1, &value, tx);
         ASSERT(error == 0);
  
-       dnlc_update(ZTOV(dzp), dl->dl_name, vp);
+#ifdef HAVE_DNLC
+       dnlc_update(ZTOI(dzp), dl->dl_name, vp);
+#endif /* HAVE_DNLC */
  
         return (0);
  }
  
+static int
+zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
+    int flag)
+{
+       int error;
+
+       if (ZTOZSB(zp)->z_norm) {
+               if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) &&
+                   (flag & ZCIEXACT)) ||
+                   ((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) &&
+                   !(flag & ZCILOOK)))
+                       error = zap_remove_norm(ZTOZSB(zp)->z_os,
+                           dzp->z_id, dl->dl_name, MT_EXACT, tx);
+               else
+                       error = zap_remove_norm(ZTOZSB(zp)->z_os,
+                           dzp->z_id, dl->dl_name, MT_FIRST, tx);
+       } else {
+               error = zap_remove(ZTOZSB(zp)->z_os,
+                   dzp->z_id, dl->dl_name, tx);
+       }
+
+       return (error);
+}
+
  /*
   * Unlink zp from dl, and mark zp for deletion if this was the last link.
   * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
@@ -731,71 +817,84 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
         boolean_t *unlinkedp)
  {
         znode_t *dzp = dl->dl_dzp;
-       vnode_t *vp = ZTOV(zp);
-       int zp_is_dir = (vp->v_type == VDIR);
+       zfs_sb_t *zsb = ZTOZSB(dzp);
+       int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
         boolean_t unlinked = B_FALSE;
+       sa_bulk_attr_t bulk[5];
+       uint64_t mtime[2], ctime[2];
+       int count = 0;
         int error;
  
-       dnlc_remove(ZTOV(dzp), dl->dl_name);
+#ifdef HAVE_DNLC
+       dnlc_remove(ZTOI(dzp), dl->dl_name);
+#endif /* HAVE_DNLC */
  
         if (!(flag & ZRENAMING)) {
-               dmu_buf_will_dirty(zp->z_dbuf, tx);
-
-               if (vn_vfswlock(vp))            /* prevent new mounts on zp */
-                       return (EBUSY);
+               mutex_enter(&zp->z_lock);
  
-               if (vn_ismntpt(vp)) {           /* don't remove mount point */
-                       vn_vfsunlock(vp);
-                       return (EBUSY);
+               if (zp_is_dir && !zfs_dirempty(zp)) {
+                       mutex_exit(&zp->z_lock);
+                       return (EEXIST);
                 }
  
-               mutex_enter(&zp->z_lock);
-               if (zp_is_dir && !zfs_dirempty(zp)) {   /* dir not empty */
+               /*
+                * If we get here, we are going to try to remove the object.
+                * First try removing the name from the directory; if that
+                * fails, return the error.
+                */
+               error = zfs_dropname(dl, zp, dzp, tx, flag);
+               if (error != 0) {
                         mutex_exit(&zp->z_lock);
-                       vn_vfsunlock(vp);
-                       return (EEXIST);
+                       return (error);
                 }
-               if (zp->z_phys->zp_links <= zp_is_dir) {
-                       zfs_panic_recover("zfs: link count on %s is %u, "
-                           "should be at least %u",
-                           zp->z_vnode->v_path ? zp->z_vnode->v_path :
-                           "<unknown>", (int)zp->z_phys->zp_links,
-                           zp_is_dir + 1);
-                       zp->z_phys->zp_links = zp_is_dir + 1;
+
+               if (zp->z_links <= zp_is_dir) {
+                       zfs_panic_recover("zfs: link count on %lu is %u, "
+                           "should be at least %u", zp->z_id,
+                           (int)zp->z_links, zp_is_dir + 1);
+                       zp->z_links = zp_is_dir + 1;
                 }
-               if (--zp->z_phys->zp_links == zp_is_dir) {
+               if (--zp->z_links == zp_is_dir) {
                         zp->z_unlinked = B_TRUE;
-                       zp->z_phys->zp_links = 0;
+                       zp->z_links = 0;
                         unlinked = B_TRUE;
                 } else {
-                       zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
+                       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
+                           NULL, &ctime, sizeof (ctime));
+                       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+                           NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+                       zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
+                           B_TRUE);
                 }
+               SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
+                   NULL, &zp->z_links, sizeof (zp->z_links));
+               error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+               count = 0;
+               ASSERT(error == 0);
                 mutex_exit(&zp->z_lock);
-               vn_vfsunlock(vp);
+       } else {
+               error = zfs_dropname(dl, zp, dzp, tx, flag);
+               if (error != 0)
+                       return (error);
         }
  
-       dmu_buf_will_dirty(dzp->z_dbuf, tx);
         mutex_enter(&dzp->z_lock);
-       dzp->z_phys->zp_size--;                 /* one dirent removed */
-       dzp->z_phys->zp_links -= zp_is_dir;     /* ".." link from zp */
-       zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
-       mutex_exit(&dzp->z_lock);
-
-       if (zp->z_zfsvfs->z_norm) {
-               if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
-                   (flag & ZCIEXACT)) ||
-                   ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
-                   !(flag & ZCILOOK)))
-                       error = zap_remove_norm(zp->z_zfsvfs->z_os,
-                           dzp->z_id, dl->dl_name, MT_EXACT, tx);
-               else
-                       error = zap_remove_norm(zp->z_zfsvfs->z_os,
-                           dzp->z_id, dl->dl_name, MT_FIRST, tx);
-       } else {
-               error = zap_remove(zp->z_zfsvfs->z_os,
-                   dzp->z_id, dl->dl_name, tx);
-       }
+       dzp->z_size--;          /* one dirent removed */
+       dzp->z_links -= zp_is_dir;      /* ".." link from zp */
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
+           NULL, &dzp->z_links, sizeof (dzp->z_links));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
+           NULL, &dzp->z_size, sizeof (dzp->z_size));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
+           NULL, ctime, sizeof (ctime));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
+           NULL, mtime, sizeof (mtime));
+       SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+           NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
+       zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
+       error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
         ASSERT(error == 0);
+       mutex_exit(&dzp->z_lock);
  
         if (unlinkedp != NULL)
                 *unlinkedp = unlinked;
@@ -813,57 +912,76 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
  boolean_t
  zfs_dirempty(znode_t *dzp)
  {
-       return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0);
+       return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
  }
  
  int
-zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
+zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
  {
-       zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+       zfs_sb_t *zsb = ZTOZSB(zp);
         znode_t *xzp;
         dmu_tx_t *tx;
         int error;
-       zfs_fuid_info_t *fuidp = NULL;
+       zfs_acl_ids_t acl_ids;
+       boolean_t fuid_dirtied;
+#ifdef DEBUG
+       uint64_t parent;
+#endif
  
-       *xvpp = NULL;
+       *xipp = NULL;
  
-       if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
+       if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
                 return (error);
  
-       tx = dmu_tx_create(zfsvfs->z_os);
-       dmu_tx_hold_bonus(tx, zp->z_id);
-       dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-       if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
-               if (zfsvfs->z_fuid_obj == 0) {
-                       dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-                       dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-                           FUID_SIZE_ESTIMATE(zfsvfs));
-                       dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
-               } else {
-                       dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-                       dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-                           FUID_SIZE_ESTIMATE(zfsvfs));
-               }
+       if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
+           &acl_ids)) != 0)
+               return (error);
+       if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
+               zfs_acl_ids_free(&acl_ids);
+               return (EDQUOT);
         }
-       error = dmu_tx_assign(tx, zfsvfs->z_assign);
+
+top:
+       tx = dmu_tx_create(zsb->z_os);
+       dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+           ZFS_SA_BASE_ATTR_SIZE);
+       dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+       dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+       fuid_dirtied = zsb->z_fuid_dirty;
+       if (fuid_dirtied)
+               zfs_fuid_txhold(zsb, tx);
+       error = dmu_tx_assign(tx, TXG_NOWAIT);
         if (error) {
-               if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
+               if (error == ERESTART) {
                         dmu_tx_wait(tx);
+                       dmu_tx_abort(tx);
+                       goto top;
+               }
+               zfs_acl_ids_free(&acl_ids);
                 dmu_tx_abort(tx);
                 return (error);
         }
-       zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
-       ASSERT(xzp->z_phys->zp_parent == zp->z_id);
-       dmu_buf_will_dirty(zp->z_dbuf, tx);
-       zp->z_phys->zp_xattr = xzp->z_id;
-
-       (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
-           xzp, "", NULL, fuidp, vap);
-       if (fuidp)
-               zfs_fuid_info_free(fuidp);
+       zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
+
+       if (fuid_dirtied)
+               zfs_fuid_sync(zsb, tx);
+
+#ifdef DEBUG
+       error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
+           &parent, sizeof (parent));
+       ASSERT(error == 0 && parent == zp->z_id);
+#endif
+
+       VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
+           sizeof (xzp->z_id), tx));
+
+       (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
+           xzp, "", NULL, acl_ids.z_fuidp, vap);
+
+       zfs_acl_ids_free(&acl_ids);
         dmu_tx_commit(tx);
  
-       *xvpp = ZTOV(xzp);
+       *xipp = ZTOI(xzp);
  
         return (0);
  }
@@ -876,15 +994,15 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
   *             cr      - credentials of caller
   *             flags   - flags from the VOP_LOOKUP call
   *
- *     OUT:    xzpp    - pointer to extended attribute znode
+ *     OUT:    xipp    - pointer to extended attribute znode
   *
   *     RETURN: 0 on success
   *             error number on failure
   */
  int
-zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
+zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
  {
-       zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
+       zfs_sb_t        *zsb = ZTOZSB(zp);
         znode_t         *xzp;
         zfs_dirlock_t   *dl;
         vattr_t         va;
@@ -895,19 +1013,17 @@ top:
                 return (error);
  
         if (xzp != NULL) {
-               *xvpp = ZTOV(xzp);
+               *xipp = ZTOI(xzp);
                 zfs_dirent_unlock(dl);
                 return (0);
         }
  
-       ASSERT(zp->z_phys->zp_xattr == 0);
-
         if (!(flags & CREATE_XATTR_DIR)) {
                 zfs_dirent_unlock(dl);
                 return (ENOENT);
         }
  
-       if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
+       if (zsb->z_vfs->mnt_flags & MNT_READONLY) {
                 zfs_dirent_unlock(dl);
                 return (EROFS);
         }
@@ -922,15 +1038,14 @@ top:
          * Once in a directory the ability to read/write attributes
          * is controlled by the permissions on the attribute file.
          */
-       va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
-       va.va_type = VDIR;
+       va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
         va.va_mode = S_IFDIR | S_ISVTX | 0777;
         zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
  
-       error = zfs_make_xattrdir(zp, &va, xvpp, cr);
+       error = zfs_make_xattrdir(zp, &va, xipp, cr);
         zfs_dirent_unlock(dl);
  
-       if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+       if (error == ERESTART) {
                 /* NB: we already did dmu_tx_wait() if necessary */
                 goto top;
         }
@@ -954,22 +1069,22 @@ top:
  int
  zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
  {
-       uid_t           uid;
+       uid_t           uid;
         uid_t           downer;
         uid_t           fowner;
-       zfsvfs_t        *zfsvfs = zdp->z_zfsvfs;
+       zfs_sb_t        *zsb = ZTOZSB(zdp);
  
-       if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL)     /* ZIL replay */
+       if (zsb->z_replay)
                 return (0);
  
-       if ((zdp->z_phys->zp_mode & S_ISVTX) == 0)
+       if ((zdp->z_mode & S_ISVTX) == 0)
                 return (0);
  
-       downer = zfs_fuid_map_id(zfsvfs, zdp->z_phys->zp_uid, cr, ZFS_OWNER);
-       fowner = zfs_fuid_map_id(zfsvfs, zp->z_phys->zp_uid, cr, ZFS_OWNER);
+       downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER);
+       fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER);
  
         if ((uid = crgetuid(cr)) == downer || uid == fowner ||
-           (ZTOV(zp)->v_type == VREG &&
+           (S_ISDIR(ZTOI(zp)->i_mode) &&
             zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
                 return (0);
         else