Illumos #3006
[zfs.git] / module / zfs / zfs_rlock.c
index 26ad58d..136972b 100644 (file)
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 /*
  * This file contains the code to implement file range locking in
@@ -199,7 +202,7 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
        rl->r_cnt = 0;
 
        /* create a proxy range lock */
-       proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       proxy = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        proxy->r_off = rl->r_off;
        proxy->r_len = rl->r_len;
        proxy->r_cnt = 1;
@@ -228,7 +231,7 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
        ASSERT(rl->r_read_wanted == B_FALSE);
 
        /* create the rear proxy range lock */
-       rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       rear = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        rear->r_off = off;
        rear->r_len = rl->r_off + rl->r_len - off;
        rear->r_cnt = rl->r_cnt;
@@ -253,7 +256,7 @@ zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
        rl_t *rl;
 
        ASSERT(len);
-       rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       rl = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        rl->r_off = off;
        rl->r_len = len;
        rl->r_cnt = 1;
@@ -426,7 +429,7 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
 
        ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
 
-       new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       new = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        new->r_zp = zp;
        new->r_off = off;
        if (len + off < off)    /* overflow */
@@ -471,7 +474,7 @@ zfs_range_free(void *arg)
  * Unlock a reader lock
  */
 static void
-zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
+zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
 {
        avl_tree_t *tree = &zp->z_range_avl;
        rl_t *rl, *next = NULL;
@@ -486,18 +489,18 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
         */
        if (remove->r_cnt == 1) {
                avl_remove(tree, remove);
-               mutex_exit(&zp->z_range_lock);
+
                if (remove->r_write_wanted)
                        cv_broadcast(&remove->r_wr_cv);
 
                if (remove->r_read_wanted)
                        cv_broadcast(&remove->r_rd_cv);
 
-               taskq_dispatch(system_taskq, zfs_range_free, remove, 0);
+               list_insert_tail(free_list, remove);
        } else {
-               ASSERT3U(remove->r_cnt, ==, 0);
-               ASSERT3U(remove->r_write_wanted, ==, 0);
-               ASSERT3U(remove->r_read_wanted, ==, 0);
+               ASSERT0(remove->r_cnt);
+               ASSERT0(remove->r_write_wanted);
+               ASSERT0(remove->r_read_wanted);
                /*
                 * Find start proxy representing this reader lock,
                 * then decrement ref count on all proxies
@@ -526,12 +529,10 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
                                if (rl->r_read_wanted)
                                        cv_broadcast(&rl->r_rd_cv);
 
-                               taskq_dispatch(system_taskq,
-                                   zfs_range_free, rl, 0);
+                               list_insert_tail(free_list, rl);
                        }
                }
 
-               mutex_exit(&zp->z_range_lock);
                kmem_free(remove, sizeof (rl_t));
        }
 }
@@ -543,30 +544,40 @@ void
 zfs_range_unlock(rl_t *rl)
 {
        znode_t *zp = rl->r_zp;
+       list_t free_list;
+       rl_t *free_rl;
 
        ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
        ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
        ASSERT(!rl->r_proxy);
+       list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node));
 
        mutex_enter(&zp->z_range_lock);
        if (rl->r_type == RL_WRITER) {
                /* writer locks can't be shared or split */
                avl_remove(&zp->z_range_avl, rl);
-               mutex_exit(&zp->z_range_lock);
                if (rl->r_write_wanted)
                        cv_broadcast(&rl->r_wr_cv);
 
                if (rl->r_read_wanted)
                        cv_broadcast(&rl->r_rd_cv);
 
-               taskq_dispatch(system_taskq, zfs_range_free, rl, 0);
+               list_insert_tail(&free_list, rl);
        } else {
                /*
                 * lock may be shared, let zfs_range_unlock_reader()
                 * release the zp->z_range_lock lock and free the rl_t
                 */
-               zfs_range_unlock_reader(zp, rl);
+               zfs_range_unlock_reader(zp, rl, &free_list);
+       }
+       mutex_exit(&zp->z_range_lock);
+
+       while ((free_rl = list_head(&free_list)) != NULL) {
+               list_remove(&free_list, free_rl);
+               zfs_range_free(free_rl);
        }
+
+       list_destroy(&free_list);
 }
 
 /*
@@ -590,11 +601,13 @@ zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
        mutex_enter(&zp->z_range_lock);
        rl->r_off = off;
        rl->r_len = len;
-       mutex_exit(&zp->z_range_lock);
+
        if (rl->r_write_wanted)
                cv_broadcast(&rl->r_wr_cv);
        if (rl->r_read_wanted)
                cv_broadcast(&rl->r_rd_cv);
+
+       mutex_exit(&zp->z_range_lock);
 }
 
 /*