Fix zfs_sb_teardown/zfs_resume_fs NULL dereference
[zfs.git] / module / zfs / zfs_vfsops.c
index 175dca8..602c332 100644 (file)
@@ -49,6 +49,7 @@
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
+#include <sys/sa_impl.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
@@ -63,7 +64,6 @@
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
-#include <sys/sa.h>
 #include <sys/zpl.h>
 #include "zfs_comutil.h"
 
@@ -300,7 +300,6 @@ static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
-       znode_phys_t *znp = data;
        int error = 0;
 
        /*
@@ -319,20 +318,18 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
                return (EEXIST);
 
        if (bonustype == DMU_OT_ZNODE) {
+               znode_phys_t *znp = data;
                *userp = znp->zp_uid;
                *groupp = znp->zp_gid;
        } else {
                int hdrsize;
+               sa_hdr_phys_t *sap = data;
+               sa_hdr_phys_t sa = *sap;
+               boolean_t swap = B_FALSE;
 
                ASSERT(bonustype == DMU_OT_SA);
-               hdrsize = sa_hdrsize(data);
 
-               if (hdrsize != 0) {
-                       *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
-                           SA_UID_OFFSET));
-                       *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
-                           SA_GID_OFFSET));
-               } else {
+               if (sa.sa_magic == 0) {
                        /*
                         * This should only happen for newly created
                         * files that haven't had the znode data filled
@@ -340,6 +337,25 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
                         */
                        *userp = 0;
                        *groupp = 0;
+                       return (0);
+               }
+               if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
+                       sa.sa_magic = SA_MAGIC;
+                       sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
+                       swap = B_TRUE;
+               } else {
+                       VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
+               }
+
+               hdrsize = sa_hdrsize(&sa);
+               VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
+               *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+                   SA_UID_OFFSET));
+               *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+                   SA_GID_OFFSET));
+               if (swap) {
+                       *userp = BSWAP_64(*userp);
+                       *groupp = BSWAP_64(*groupp);
                }
        }
        return (error);
@@ -1016,7 +1032,7 @@ EXPORT_SYMBOL(zfs_sb_prune);
 #endif /* HAVE_SHRINK */
 
 /*
- * Teardown the zfs_sb_t::z_os.
+ * Teardown the zfs_sb_t.
  *
  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
@@ -1037,10 +1053,17 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
                 * for non-snapshots.
                 */
                shrink_dcache_sb(zsb->z_parent->z_sb);
-               (void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
        }
 
        /*
+        * If someone has not already unmounted this file system,
+        * drain the iput_taskq to ensure all active references to the
+        * zfs_sb_t have been handled only then can it be safely destroyed.
+        */
+       if (zsb->z_os)
+               taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
+
+       /*
         * Close the zil. NB: Can't close the zil while zfs_inactive
         * threads are blocked as zil_close can call zfs_inactive.
         */
@@ -1063,25 +1086,26 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
        }
 
        /*
-        * At this point there are no vops active, and any new vops will
-        * fail with EIO since we have z_teardown_lock for writer (only
-        * relavent for forced unmount).
+        * At this point there are no VFS ops active, and any new VFS ops
+        * will fail with EIO since we have z_teardown_lock for writer (only
+        * relevant for forced unmount).
         *
         * Release all holds on dbufs.
         */
        mutex_enter(&zsb->z_znodes_lock);
        for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
-           zp = list_next(&zsb->z_all_znodes, zp))
+           zp = list_next(&zsb->z_all_znodes, zp)) {
                if (zp->z_sa_hdl) {
                        ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
                        zfs_znode_dmu_fini(zp);
                }
+       }
        mutex_exit(&zsb->z_znodes_lock);
 
        /*
-        * If we are unmounting, set the unmounted flag and let new vops
+        * If we are unmounting, set the unmounted flag and let new VFS ops
         * unblock.  zfs_inactive will have the unmounted behavior, and all
-        * other vops will fail with EIO.
+        * other VFS ops will fail with EIO.
         */
        if (unmounting) {
                zsb->z_unmounted = B_TRUE;
@@ -1166,6 +1190,9 @@ zfs_domount(struct super_block *sb, void *data, int silent)
        sb->s_op = &zpl_super_operations;
        sb->s_xattr = zpl_xattr_handlers;
        sb->s_export_op = &zpl_export_operations;
+#ifdef HAVE_S_D_OP
+       sb->s_d_op = &zpl_dentry_operations;
+#endif /* HAVE_S_D_OP */
 
        /* Set features for file system. */
        zfs_set_fuid_feature(zsb);
@@ -1376,7 +1403,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 EXPORT_SYMBOL(zfs_vget);
 
 /*
- * Block out VOPs and close zfs_sb_t::z_os
+ * Block out VFS ops and close zfs_sb_t
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.
@@ -1388,6 +1415,7 @@ zfs_suspend_fs(zfs_sb_t *zsb)
 
        if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
                return (error);
+
        dmu_objset_disown(zsb->z_os, zsb);
 
        return (0);
@@ -1395,7 +1423,7 @@ zfs_suspend_fs(zfs_sb_t *zsb)
 EXPORT_SYMBOL(zfs_suspend_fs);
 
 /*
- * Reopen zfs_sb_t::z_os and release VOPs.
+ * Reopen zfs_sb_t and release VFS ops.
  */
 int
 zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
@@ -1424,33 +1452,41 @@ zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
                        goto bail;
 
                VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
+               zsb->z_rollback_time = jiffies;
 
                /*
-                * Attempt to re-establish all the active znodes with
-                * their dbufs.  If a zfs_rezget() fails, then we'll let
-                * any potential callers discover that via ZFS_ENTER_VERIFY_VP
-                * when they try to use their znode.
+                * Attempt to re-establish all the active inodes with their
+                * dbufs.  If a zfs_rezget() fails, then we unhash the inode
+                * and mark it stale.  This prevents a collision if a new
+                * inode/object is created which must use the same inode
+                * number.  The stale inode will be be released when the
+                * VFS prunes the dentry holding the remaining references
+                * on the stale inode.
                 */
                mutex_enter(&zsb->z_znodes_lock);
                for (zp = list_head(&zsb->z_all_znodes); zp;
                    zp = list_next(&zsb->z_all_znodes, zp)) {
-                       (void) zfs_rezget(zp);
+                       err2 = zfs_rezget(zp);
+                       if (err2) {
+                               remove_inode_hash(ZTOI(zp));
+                               zp->z_is_stale = B_TRUE;
+                       }
                }
                mutex_exit(&zsb->z_znodes_lock);
-
        }
 
 bail:
-       /* release the VOPs */
+       /* release the VFS ops */
        rw_exit(&zsb->z_teardown_inactive_lock);
        rrw_exit(&zsb->z_teardown_lock, FTAG);
 
        if (err) {
                /*
-                * Since we couldn't reopen zfs_sb_t::z_os, force
-                * unmount this file system.
+                * Since we couldn't reopen zfs_sb_t or, setup the
+                * sa framework, force unmount this file system.
                 */
-               (void) zfs_umount(zsb->z_sb);
+               if (zsb->z_os)
+                       (void) zfs_umount(zsb->z_sb);
        }
        return (err);
 }
@@ -1504,7 +1540,7 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
 
                error = zap_add(os, MASTER_NODE_OBJ,
                    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
-               ASSERT3U(error, ==, 0);
+               ASSERT0(error);
 
                VERIFY(0 == sa_set_sa_object(os, sa_obj));
                sa_register_update_callback(os, zfs_sa_upgrade);