Fix zfsctl_expire_snapshot() deadlock

[zfs.git] / module / zfs / zfs_ctldir.c
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c

index 01bf52f..168f853 100644 (file)
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -90,6 +90,11 @@
   */
  int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
  
+/*
+ * Dedicated task queue for unmounting snapshots.
+ */
+static taskq_t *zfs_expire_taskq;
+
  static zfs_snapentry_t *
  zfsctl_sep_alloc(void)
  {
@@ -112,16 +117,15 @@ zfsctl_sep_free(zfs_snapentry_t *sep)
  static void
  zfsctl_expire_snapshot(void *data)
  {
-       zfs_snapentry_t *sep;
-       zfs_sb_t *zsb;
+       zfs_snapentry_t *sep = (zfs_snapentry_t *)data;
+       zfs_sb_t *zsb = ITOZSB(sep->se_inode);
         int error;
  
-       sep = spl_get_work_data(data, zfs_snapentry_t, se_work.work);
-       zsb = ITOZSB(sep->se_inode);
-
         error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
         if (error == EBUSY)
-               schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
+               sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
+                   zfsctl_expire_snapshot, sep, TQ_SLEEP,
+                   ddi_get_lbolt() + zfs_expire_snapshot * HZ);
  }
  
  int
@@ -193,6 +197,7 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
         zp->z_is_mapped = B_FALSE;
         zp->z_is_ctldir = B_TRUE;
         zp->z_is_sa = B_FALSE;
+       zp->z_is_stale = B_FALSE;
         ip->i_ino = id;
         ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
         ip->i_uid = 0;
@@ -212,6 +217,7 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
  
         mutex_enter(&zsb->z_znodes_lock);
         list_insert_tail(&zsb->z_all_znodes, zp);
+       zsb->z_nr_znodes++;
         membar_producer();
         mutex_exit(&zsb->z_znodes_lock);
  
@@ -224,13 +230,13 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
   * Lookup the inode with given id, it will be allocated if needed.
   */
  static struct inode *
-zfsctl_inode_lookup(zfs_sb_t *zsb, unsigned long id,
+zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
      const struct file_operations *fops, const struct inode_operations *ops)
  {
         struct inode *ip = NULL;
  
         while (ip == NULL) {
-               ip = ilookup(zsb->z_sb, id);
+               ip = ilookup(zsb->z_sb, (unsigned long)id);
                 if (ip)
                         break;
  
@@ -266,10 +272,14 @@ zfsctl_inode_inactive(struct inode *ip)
   * therefore checks against a vfs_count of 2 instead of 1.  This reference
   * is removed when the ctldir is destroyed in the unmount.  All other entities
   * under the '.zfs' directory are created dynamically as needed.
+ *
+ * Because the dynamically created '.zfs' directory entries assume the use
+ * of 64-bit inode numbers this support must be disabled on 32-bit systems.
   */
  int
  zfsctl_create(zfs_sb_t *zsb)
  {
+#if defined(CONFIG_64BIT)
         ASSERT(zsb->z_ctldir == NULL);
  
         zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
@@ -278,6 +288,9 @@ zfsctl_create(zfs_sb_t *zsb)
                 return (ENOENT);
  
         return (0);
+#else
+       return (EOPNOTSUPP);
+#endif /* CONFIG_64BIT */
  }
  
  /*
@@ -430,7 +443,7 @@ zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
  
         ZFS_ENTER(zsb);
  
-       error = dmu_snapshot_id(zsb->z_os, name, &id);
+       error = dmu_snapshot_lookup(zsb->z_os, name, &id);
         if (error) {
                 ZFS_EXIT(zsb);
                 return (error);
@@ -653,7 +666,7 @@ zfsctl_snapdir_inactive(struct inode *ip)
  
                 if (sep->se_inode == ip) {
                         avl_remove(&zsb->z_ctldir_snaps, sep);
-                       cancel_delayed_work_sync(&sep->se_work);
+                       taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
                         zfsctl_sep_free(sep);
                         break;
                 }
@@ -673,7 +686,7 @@ zfsctl_snapdir_inactive(struct inode *ip)
         "exec 0</dev/null " \
         "     1>/dev/null " \
         "     2>/dev/null; " \
-       "umount -t zfs -n %s%s"
+       "umount -t zfs -n %s'%s'"
  
  static int
  __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
@@ -684,7 +697,7 @@ __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
  
         argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
             flags & MNT_FORCE ? "-f " : "", sep->se_path);
-       error = call_usermodehelper(argv[0], argv, envp, 1);
+       error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
         strfree(argv[2]);
  
         /*
@@ -700,7 +713,8 @@ __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
          * to prevent zfsctl_expire_snapshot() from attempting a unmount.
          */
         if ((error == 0) && !(flags & MNT_EXPIRE))
-               cancel_delayed_work(&sep->se_work);
+               taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
+
  
         return (error);
  }
@@ -718,7 +732,11 @@ zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
         sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
         if (sep) {
                 avl_remove(&zsb->z_ctldir_snaps, sep);
+               mutex_exit(&zsb->z_ctldir_lock);
+
                 error = __zfsctl_unmount_snapshot(sep, flags);
+
+               mutex_enter(&zsb->z_ctldir_lock);
                 if (error == EBUSY)
                         avl_add(&zsb->z_ctldir_snaps, sep);
                 else
@@ -753,7 +771,11 @@ zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
         while (sep != NULL) {
                 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
                 avl_remove(&zsb->z_ctldir_snaps, sep);
+               mutex_exit(&zsb->z_ctldir_lock);
+
                 error = __zfsctl_unmount_snapshot(sep, flags);
+
+               mutex_enter(&zsb->z_ctldir_lock);
                 if (error == EBUSY) {
                         avl_add(&zsb->z_ctldir_snaps, sep);
                         (*count)++;
@@ -773,7 +795,7 @@ zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
         "exec 0</dev/null " \
         "     1>/dev/null " \
         "     2>/dev/null; " \
-       "mount -t zfs -n %s %s"
+       "mount -t zfs -n '%s' '%s'"
  
  int
  zfsctl_mount_snapshot(struct path *path, int flags)
@@ -809,7 +831,7 @@ zfsctl_mount_snapshot(struct path *path, int flags)
          * to safely abort the automount.  This should be very rare.
          */
         argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
-       error = call_usermodehelper(argv[0], argv, envp, 1);
+       error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
         strfree(argv[2]);
         if (error) {
                 printk("ZFS: Unable to automount %s at %s: %d\n",
@@ -829,7 +851,7 @@ zfsctl_mount_snapshot(struct path *path, int flags)
         sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
         if (sep) {
                 avl_remove(&zsb->z_ctldir_snaps, sep);
-               cancel_delayed_work_sync(&sep->se_work);
+               taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
                 zfsctl_sep_free(sep);
         }
  
@@ -839,8 +861,9 @@ zfsctl_mount_snapshot(struct path *path, int flags)
         sep->se_inode = ip;
         avl_add(&zsb->z_ctldir_snaps, sep);
  
-        spl_init_delayed_work(&sep->se_work, zfsctl_expire_snapshot, sep);
-       schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
+       sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
+           zfsctl_expire_snapshot, sep, TQ_SLEEP,
+           ddi_get_lbolt() + zfs_expire_snapshot * HZ);
  
         mutex_exit(&zsb->z_ctldir_lock);
  error:
@@ -894,7 +917,7 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
          */
         sep = avl_first(&zsb->z_ctldir_snaps);
         while (sep != NULL) {
-               error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id);
+               error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id);
                 if (error)
                         goto out;
  
@@ -912,8 +935,8 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
                  * race cannot occur to an expired mount point because
                  * we hold the zsb->z_ctldir_lock to prevent the race.
                  */
-               sbp = sget(&zpl_fs_type, zfsctl_test_super,
-                   zfsctl_set_super, &id);
+               sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
+                   zfsctl_set_super, 0, &id);
                 if (IS_ERR(sbp)) {
                         error = -PTR_ERR(sbp);
                 } else {
@@ -944,7 +967,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
  
         if (zsb->z_shares_dir == 0) {
                 ZFS_EXIT(zsb);
-               return (-ENOTSUP);
+               return (ENOTSUP);
         }
  
         error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
@@ -969,6 +992,8 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
  void
  zfsctl_init(void)
  {
+       zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
+           1, 8, TASKQ_PREPOPULATE);
  }
  
  /*
@@ -978,6 +1003,7 @@ zfsctl_init(void)
  void
  zfsctl_fini(void)
  {
+       taskq_destroy(zfs_expire_taskq);
  }
  
  module_param(zfs_expire_snapshot, int, 0644);