Move cv_destroy() outside zp->z_range_lock()
authorBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 4 Feb 2011 22:38:11 +0000 (14:38 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 10 Feb 2011 17:27:21 +0000 (09:27 -0800)
With the recent SPL change (d599e4fa) that forces cv_destroy()
to block until all waiters have been woken.  It is now unsafe
to call cv_destroy() under the zp->z_range_lock() because it
is used as the condition variable mutex.  If there are waiters
cv_destroy() will block until they wake up and aquire the mutex.
However, they will never aquire the mutex because cv_destroy()
will not return allowing it's caller to drop the lock.  Deadlock.

To avoid this cv_destroy() is now run asynchronously in a taskq.
This solves two problems:

1) It is no longer run under the zp->z_range_lock so no deadlock.
2) Since cv_destroy() may now block we don't want this slowing
   down zfs_range_unlock() and throttling the system.

This was not as much of an issue under OpenSolaris because their
cv_destroy() implementation does not do anything.  They do however
risk a bad paging request if cv_destroy() returns, the memory holding
the condition variable is free'd, and then the waiters wake up and
try to reference it.  It's a very small unlikely race, but it is
possible.

module/zfs/zfs_rlock.c

index 9362fb4..26ad58d 100644 (file)
@@ -453,6 +453,20 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
        return (new);
 }
 
+static void
+zfs_range_free(void *arg)
+{
+       rl_t *rl = arg;
+
+       if (rl->r_write_wanted)
+               cv_destroy(&rl->r_wr_cv);
+
+       if (rl->r_read_wanted)
+               cv_destroy(&rl->r_rd_cv);
+
+       kmem_free(rl, sizeof (rl_t));
+}
+
 /*
  * Unlock a reader lock
  */
@@ -472,14 +486,14 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
         */
        if (remove->r_cnt == 1) {
                avl_remove(tree, remove);
-               if (remove->r_write_wanted) {
+               mutex_exit(&zp->z_range_lock);
+               if (remove->r_write_wanted)
                        cv_broadcast(&remove->r_wr_cv);
-                       cv_destroy(&remove->r_wr_cv);
-               }
-               if (remove->r_read_wanted) {
+
+               if (remove->r_read_wanted)
                        cv_broadcast(&remove->r_rd_cv);
-                       cv_destroy(&remove->r_rd_cv);
-               }
+
+               taskq_dispatch(system_taskq, zfs_range_free, remove, 0);
        } else {
                ASSERT3U(remove->r_cnt, ==, 0);
                ASSERT3U(remove->r_write_wanted, ==, 0);
@@ -505,19 +519,21 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
                        rl->r_cnt--;
                        if (rl->r_cnt == 0) {
                                avl_remove(tree, rl);
-                               if (rl->r_write_wanted) {
+
+                               if (rl->r_write_wanted)
                                        cv_broadcast(&rl->r_wr_cv);
-                                       cv_destroy(&rl->r_wr_cv);
-                               }
-                               if (rl->r_read_wanted) {
+
+                               if (rl->r_read_wanted)
                                        cv_broadcast(&rl->r_rd_cv);
-                                       cv_destroy(&rl->r_rd_cv);
-                               }
-                               kmem_free(rl, sizeof (rl_t));
+
+                               taskq_dispatch(system_taskq,
+                                   zfs_range_free, rl, 0);
                        }
                }
+
+               mutex_exit(&zp->z_range_lock);
+               kmem_free(remove, sizeof (rl_t));
        }
-       kmem_free(remove, sizeof (rl_t));
 }
 
 /*
@@ -537,22 +553,19 @@ zfs_range_unlock(rl_t *rl)
                /* writer locks can't be shared or split */
                avl_remove(&zp->z_range_avl, rl);
                mutex_exit(&zp->z_range_lock);
-               if (rl->r_write_wanted) {
+               if (rl->r_write_wanted)
                        cv_broadcast(&rl->r_wr_cv);
-                       cv_destroy(&rl->r_wr_cv);
-               }
-               if (rl->r_read_wanted) {
+
+               if (rl->r_read_wanted)
                        cv_broadcast(&rl->r_rd_cv);
-                       cv_destroy(&rl->r_rd_cv);
-               }
-               kmem_free(rl, sizeof (rl_t));
+
+               taskq_dispatch(system_taskq, zfs_range_free, rl, 0);
        } else {
                /*
                 * lock may be shared, let zfs_range_unlock_reader()
-                * release the lock and free the rl_t
+                * release the zp->z_range_lock lock and free the rl_t
                 */
                zfs_range_unlock_reader(zp, rl);
-               mutex_exit(&zp->z_range_lock);
        }
 }