Illumos #3006
[zfs.git] / module / zfs / zfs_rlock.c
index 7fd8f60..136972b 100644 (file)
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 /*
  * This file contains the code to implement file range locking in
@@ -112,14 +115,14 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
                 * Range locking is also used by zvol and uses a
                 * dummied up znode. However, for zvol, we don't need to
                 * append or grow blocksize, and besides we don't have
-                * a "sa" data or z_zfsvfs - so skip that processing.
+                * a "sa" data or zfs_sb_t - so skip that processing.
                 *
                 * Yes, this is ugly, and would be solved by not handling
                 * grow or append in range lock code. If that was done then
                 * we could make the range locking code generically available
                 * to other non-zfs consumers.
                 */
-               if (zp->z_vnode) { /* caller is ZPL */
+               if (!zp->z_is_zvol) { /* caller is ZPL */
                        /*
                         * If in append mode pick up the current end of file.
                         * This is done under z_range_lock to avoid races.
@@ -134,7 +137,7 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
                         */
                        end_size = MAX(zp->z_size, new->r_off + len);
                        if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
-                           zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
+                           zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
                                new->r_off = 0;
                                new->r_len = UINT64_MAX;
                        }
@@ -199,7 +202,7 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
        rl->r_cnt = 0;
 
        /* create a proxy range lock */
-       proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       proxy = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        proxy->r_off = rl->r_off;
        proxy->r_len = rl->r_len;
        proxy->r_cnt = 1;
@@ -228,7 +231,7 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
        ASSERT(rl->r_read_wanted == B_FALSE);
 
        /* create the rear proxy range lock */
-       rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       rear = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        rear->r_off = off;
        rear->r_len = rl->r_off + rl->r_len - off;
        rear->r_cnt = rl->r_cnt;
@@ -253,7 +256,7 @@ zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
        rl_t *rl;
 
        ASSERT(len);
-       rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       rl = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        rl->r_off = off;
        rl->r_len = len;
        rl->r_cnt = 1;
@@ -426,7 +429,7 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
 
        ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
 
-       new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
+       new = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
        new->r_zp = zp;
        new->r_off = off;
        if (len + off < off)    /* overflow */
@@ -453,14 +456,28 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
        return (new);
 }
 
+static void
+zfs_range_free(void *arg)
+{
+       rl_t *rl = arg;
+
+       if (rl->r_write_wanted)
+               cv_destroy(&rl->r_wr_cv);
+
+       if (rl->r_read_wanted)
+               cv_destroy(&rl->r_rd_cv);
+
+       kmem_free(rl, sizeof (rl_t));
+}
+
 /*
  * Unlock a reader lock
  */
 static void
-zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
+zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
 {
        avl_tree_t *tree = &zp->z_range_avl;
-       rl_t *rl, *next;
+       rl_t *rl, *next = NULL;
        uint64_t len;
 
        /*
@@ -472,18 +489,18 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
         */
        if (remove->r_cnt == 1) {
                avl_remove(tree, remove);
-               if (remove->r_write_wanted) {
+
+               if (remove->r_write_wanted)
                        cv_broadcast(&remove->r_wr_cv);
-                       cv_destroy(&remove->r_wr_cv);
-               }
-               if (remove->r_read_wanted) {
+
+               if (remove->r_read_wanted)
                        cv_broadcast(&remove->r_rd_cv);
-                       cv_destroy(&remove->r_rd_cv);
-               }
+
+               list_insert_tail(free_list, remove);
        } else {
-               ASSERT3U(remove->r_cnt, ==, 0);
-               ASSERT3U(remove->r_write_wanted, ==, 0);
-               ASSERT3U(remove->r_read_wanted, ==, 0);
+               ASSERT0(remove->r_cnt);
+               ASSERT0(remove->r_write_wanted);
+               ASSERT0(remove->r_read_wanted);
                /*
                 * Find start proxy representing this reader lock,
                 * then decrement ref count on all proxies
@@ -505,19 +522,19 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
                        rl->r_cnt--;
                        if (rl->r_cnt == 0) {
                                avl_remove(tree, rl);
-                               if (rl->r_write_wanted) {
+
+                               if (rl->r_write_wanted)
                                        cv_broadcast(&rl->r_wr_cv);
-                                       cv_destroy(&rl->r_wr_cv);
-                               }
-                               if (rl->r_read_wanted) {
+
+                               if (rl->r_read_wanted)
                                        cv_broadcast(&rl->r_rd_cv);
-                                       cv_destroy(&rl->r_rd_cv);
-                               }
-                               kmem_free(rl, sizeof (rl_t));
+
+                               list_insert_tail(free_list, rl);
                        }
                }
+
+               kmem_free(remove, sizeof (rl_t));
        }
-       kmem_free(remove, sizeof (rl_t));
 }
 
 /*
@@ -527,33 +544,40 @@ void
 zfs_range_unlock(rl_t *rl)
 {
        znode_t *zp = rl->r_zp;
+       list_t free_list;
+       rl_t *free_rl;
 
        ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
        ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
        ASSERT(!rl->r_proxy);
+       list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node));
 
        mutex_enter(&zp->z_range_lock);
        if (rl->r_type == RL_WRITER) {
                /* writer locks can't be shared or split */
                avl_remove(&zp->z_range_avl, rl);
-               mutex_exit(&zp->z_range_lock);
-               if (rl->r_write_wanted) {
+               if (rl->r_write_wanted)
                        cv_broadcast(&rl->r_wr_cv);
-                       cv_destroy(&rl->r_wr_cv);
-               }
-               if (rl->r_read_wanted) {
+
+               if (rl->r_read_wanted)
                        cv_broadcast(&rl->r_rd_cv);
-                       cv_destroy(&rl->r_rd_cv);
-               }
-               kmem_free(rl, sizeof (rl_t));
+
+               list_insert_tail(&free_list, rl);
        } else {
                /*
                 * lock may be shared, let zfs_range_unlock_reader()
-                * release the lock and free the rl_t
+                * release the zp->z_range_lock lock and free the rl_t
                 */
-               zfs_range_unlock_reader(zp, rl);
-               mutex_exit(&zp->z_range_lock);
+               zfs_range_unlock_reader(zp, rl, &free_list);
        }
+       mutex_exit(&zp->z_range_lock);
+
+       while ((free_rl = list_head(&free_list)) != NULL) {
+               list_remove(&free_list, free_rl);
+               zfs_range_free(free_rl);
+       }
+
+       list_destroy(&free_list);
 }
 
 /*
@@ -577,11 +601,13 @@ zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
        mutex_enter(&zp->z_range_lock);
        rl->r_off = off;
        rl->r_len = len;
-       mutex_exit(&zp->z_range_lock);
+
        if (rl->r_write_wanted)
                cv_broadcast(&rl->r_wr_cv);
        if (rl->r_read_wanted)
                cv_broadcast(&rl->r_rd_cv);
+
+       mutex_exit(&zp->z_range_lock);
 }
 
 /*