X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_rlock.c;h=136972b32d03dc7bb9554939b641234efb26e040;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=f0a75b5fa0d71f0e0f21ce16110d39623bffb0fb;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index f0a75b5..136972b 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -19,11 +19,12 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ /* * This file contains the code to implement file range locking in @@ -114,29 +115,29 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new) * Range locking is also used by zvol and uses a * dummied up znode. However, for zvol, we don't need to * append or grow blocksize, and besides we don't have - * a z_phys or z_zfsvfs - so skip that processing. + * a "sa" data or zfs_sb_t - so skip that processing. * * Yes, this is ugly, and would be solved by not handling * grow or append in range lock code. If that was done then * we could make the range locking code generically available * to other non-zfs consumers. */ - if (zp->z_vnode) { /* caller is ZPL */ + if (!zp->z_is_zvol) { /* caller is ZPL */ /* * If in append mode pick up the current end of file. * This is done under z_range_lock to avoid races. */ if (new->r_type == RL_APPEND) - new->r_off = zp->z_phys->zp_size; + new->r_off = zp->z_size; /* * If we need to grow the block size then grab the whole * file range. This is also done under z_range_lock to * avoid races. */ - end_size = MAX(zp->z_phys->zp_size, new->r_off + len); + end_size = MAX(zp->z_size, new->r_off + len); if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || - zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { + zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { new->r_off = 0; new->r_len = UINT64_MAX; } @@ -201,7 +202,7 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl) rl->r_cnt = 0; /* create a proxy range lock */ - proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP); + proxy = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); proxy->r_off = rl->r_off; proxy->r_len = rl->r_len; proxy->r_cnt = 1; @@ -230,7 +231,7 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off) ASSERT(rl->r_read_wanted == B_FALSE); /* create the rear proxy range lock */ - rear = kmem_alloc(sizeof (rl_t), KM_SLEEP); + rear = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); rear->r_off = off; rear->r_len = rl->r_off + rl->r_len - off; rear->r_cnt = rl->r_cnt; @@ -255,7 +256,7 @@ zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) rl_t *rl; ASSERT(len); - rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); + rl = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); rl->r_off = off; rl->r_len = len; rl->r_cnt = 1; @@ -428,9 +429,11 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - new = kmem_alloc(sizeof (rl_t), KM_SLEEP); + new = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); new->r_zp = zp; new->r_off = off; + if (len + off < off) /* overflow */ + len = UINT64_MAX - off; new->r_len = len; new->r_cnt = 1; /* assume it's going to be in the tree */ new->r_type = type; @@ -453,14 +456,28 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) return (new); } +static void +zfs_range_free(void *arg) +{ + rl_t *rl = arg; + + if (rl->r_write_wanted) + cv_destroy(&rl->r_wr_cv); + + if (rl->r_read_wanted) + cv_destroy(&rl->r_rd_cv); + + kmem_free(rl, sizeof (rl_t)); +} + /* * Unlock a reader lock */ static void -zfs_range_unlock_reader(znode_t *zp, rl_t *remove) +zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list) { avl_tree_t *tree = &zp->z_range_avl; - rl_t *rl, *next; + rl_t *rl, *next = NULL; uint64_t len; /* @@ -472,18 +489,18 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) */ if (remove->r_cnt == 1) { avl_remove(tree, remove); - if (remove->r_write_wanted) { + + if (remove->r_write_wanted) cv_broadcast(&remove->r_wr_cv); - cv_destroy(&remove->r_wr_cv); - } - if (remove->r_read_wanted) { + + if (remove->r_read_wanted) cv_broadcast(&remove->r_rd_cv); - cv_destroy(&remove->r_rd_cv); - } + + list_insert_tail(free_list, remove); } else { - ASSERT3U(remove->r_cnt, ==, 0); - ASSERT3U(remove->r_write_wanted, ==, 0); - ASSERT3U(remove->r_read_wanted, ==, 0); + ASSERT0(remove->r_cnt); + ASSERT0(remove->r_write_wanted); + ASSERT0(remove->r_read_wanted); /* * Find start proxy representing this reader lock, * then decrement ref count on all proxies @@ -505,19 +522,19 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) rl->r_cnt--; if (rl->r_cnt == 0) { avl_remove(tree, rl); - if (rl->r_write_wanted) { + + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + list_insert_tail(free_list, rl); } } + + kmem_free(remove, sizeof (rl_t)); } - kmem_free(remove, sizeof (rl_t)); } /* @@ -527,33 +544,40 @@ void zfs_range_unlock(rl_t *rl) { znode_t *zp = rl->r_zp; + list_t free_list; + rl_t *free_rl; ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER); ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0); ASSERT(!rl->r_proxy); + list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node)); mutex_enter(&zp->z_range_lock); if (rl->r_type == RL_WRITER) { /* writer locks can't be shared or split */ avl_remove(&zp->z_range_avl, rl); - mutex_exit(&zp->z_range_lock); - if (rl->r_write_wanted) { + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + list_insert_tail(&free_list, rl); } else { /* * lock may be shared, let zfs_range_unlock_reader() - * release the lock and free the rl_t + * release the zp->z_range_lock lock and free the rl_t */ - zfs_range_unlock_reader(zp, rl); - mutex_exit(&zp->z_range_lock); + zfs_range_unlock_reader(zp, rl, &free_list); } + mutex_exit(&zp->z_range_lock); + + while ((free_rl = list_head(&free_list)) != NULL) { + list_remove(&free_list, free_rl); + zfs_range_free(free_rl); + } + + list_destroy(&free_list); } /* @@ -577,11 +601,13 @@ zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len) mutex_enter(&zp->z_range_lock); rl->r_off = off; rl->r_len = len; - mutex_exit(&zp->z_range_lock); + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); + + mutex_exit(&zp->z_range_lock); } /*