X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Farc.c;h=89f5fe5f0272e9706a1e6cd838c10831b2a47c44;hb=57b650b86f15793195f9bf2185e1161c7c67904d;hp=6ec9f04b772766998f26bd8a997637f76a2abe4e;hpb=b68503fb30dfed0823b1fe7d547d82c3df04d187;p=zfs.git diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 6ec9f04..89f5fe5 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -80,9 +80,9 @@ * types of locks: 1) the hash table lock array, and 2) the * arc list locks. * - * Buffers do not have their own mutexs, rather they rely on the - * hash table mutexs for the bulk of their protection (i.e. most - * fields in the arc_buf_hdr_t are protected by these mutexs). + * Buffers do not have their own mutexes, rather they rely on the + * hash table mutexes for the bulk of their protection (i.e. most + * fields in the arc_buf_hdr_t are protected by these mutexes). * * buf_hash_find() returns the appropriate mutex (held) when it * locates the requested buffer in the hash table. It returns @@ -148,7 +148,7 @@ static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */ static uint8_t arc_thread_exit; /* number of bytes to prune from caches when at arc_meta_limit is reached */ -uint_t arc_meta_prune = 1048576; +int zfs_arc_meta_prune = 1048576; typedef enum arc_reclaim_strategy { ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */ @@ -156,25 +156,31 @@ typedef enum arc_reclaim_strategy { } arc_reclaim_strategy_t; /* number of seconds before growing cache again */ -static int arc_grow_retry = 5; - -/* expiration time for arc_no_grow */ -static clock_t arc_grow_time = 0; +int zfs_arc_grow_retry = 5; /* shift of arc_c for calculating both min and max arc_p */ -static int arc_p_min_shift = 4; +int zfs_arc_p_min_shift = 4; /* log2(fraction of arc to reclaim) */ -static int arc_shrink_shift = 5; +int zfs_arc_shrink_shift = 5; /* * minimum lifespan of a prefetch block in clock ticks * (initialized in arc_init()) */ -static int arc_min_prefetch_lifespan; +int zfs_arc_min_prefetch_lifespan = HZ; + +/* disable arc proactive arc throttle due to low memory */ +int zfs_arc_memory_throttle_disable = 1; + +/* disable duplicate buffer eviction */ +int zfs_disable_dup_eviction = 0; static int arc_dead; +/* expiration time for arc_no_grow */ +static clock_t arc_grow_time = 0; + /* * The arc has filled available memory and has now warmed up. */ @@ -186,10 +192,6 @@ static boolean_t arc_warm; unsigned long zfs_arc_max = 0; unsigned long zfs_arc_min = 0; unsigned long zfs_arc_meta_limit = 0; -int zfs_arc_grow_retry = 0; -int zfs_arc_shrink_shift = 0; -int zfs_arc_p_min_shift = 0; -int zfs_arc_meta_prune = 0; /* * Note that buffers can be in one of 6 states: @@ -307,6 +309,9 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_size; kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_memory_throttle_count; + kstat_named_t arcstat_duplicate_buffers; + kstat_named_t arcstat_duplicate_buffers_size; + kstat_named_t arcstat_duplicate_reads; kstat_named_t arcstat_memory_direct_count; kstat_named_t arcstat_memory_indirect_count; kstat_named_t arcstat_no_grow; @@ -387,6 +392,9 @@ static arc_stats_t arc_stats = { { "l2_size", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, { "memory_throttle_count", KSTAT_DATA_UINT64 }, + { "duplicate_buffers", KSTAT_DATA_UINT64 }, + { "duplicate_buffers_size", KSTAT_DATA_UINT64 }, + { "duplicate_reads", KSTAT_DATA_UINT64 }, { "memory_direct_count", KSTAT_DATA_UINT64 }, { "memory_indirect_count", KSTAT_DATA_UINT64 }, { "arc_no_grow", KSTAT_DATA_UINT64 }, @@ -493,7 +501,6 @@ struct arc_buf_hdr { kmutex_t b_freeze_lock; zio_cksum_t *b_freeze_cksum; - void *b_thawed; arc_buf_hdr_t *b_hash_next; arc_buf_t *b_buf; @@ -632,7 +639,7 @@ unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ int l2arc_feed_again = B_TRUE; /* turbo warmup */ -int l2arc_norw = B_TRUE; /* no reads during writes */ +int l2arc_norw = B_FALSE; /* no reads during writes */ /* * L2ARC Internals @@ -876,7 +883,6 @@ buf_cons(void *vbuf, void *unused, int kmflag) bzero(buf, sizeof (arc_buf_t)); mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL); arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS); return (0); @@ -906,7 +912,6 @@ buf_dest(void *vbuf, void *unused) arc_buf_t *buf = vbuf; mutex_destroy(&buf->b_evict_lock); - rw_destroy(&buf->b_data_lock); arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); } @@ -1027,12 +1032,6 @@ arc_buf_thaw(arc_buf_t *buf) buf->b_hdr->b_freeze_cksum = NULL; } - if (zfs_flags & ZFS_DEBUG_MODIFY) { - if (buf->b_hdr->b_thawed) - kmem_free(buf->b_hdr->b_thawed, 1); - buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP); - } - mutex_exit(&buf->b_hdr->b_freeze_lock); } @@ -1069,7 +1068,7 @@ add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) ASSERT(list_link_active(&ab->b_arc_node)); list_remove(list, ab); if (GHOST_STATE(ab->b_state)) { - ASSERT3U(ab->b_datacnt, ==, 0); + ASSERT0(ab->b_datacnt); ASSERT3P(ab->b_buf, ==, NULL); delta = ab->b_size; } @@ -1254,23 +1253,6 @@ arc_space_return(uint64_t space, arc_space_type_t type) atomic_add_64(&arc_size, -space); } -void * -arc_data_buf_alloc(uint64_t size) -{ - if (arc_evict_needed(ARC_BUFC_DATA)) - cv_signal(&arc_reclaim_thr_cv); - atomic_add_64(&arc_size, size); - return (zio_data_buf_alloc(size)); -} - -void -arc_data_buf_free(void *buf, uint64_t size) -{ - zio_data_buf_free(buf, size); - ASSERT(arc_size >= size); - atomic_add_64(&arc_size, -size); -} - arc_buf_t * arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type) { @@ -1369,6 +1351,17 @@ arc_buf_clone(arc_buf_t *from) hdr->b_buf = buf; arc_get_data_buf(buf); bcopy(from->b_data, buf->b_data, size); + + /* + * This buffer already exists in the arc so create a duplicate + * copy for the caller. If the buffer is associated with user data + * then track the size and number of duplicates. These stats will be + * updated as duplicate buffers are created and destroyed. + */ + if (hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMP(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, size); + } hdr->b_datacnt += 1; return (buf); } @@ -1467,6 +1460,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) ASSERT3U(state->arcs_size, >=, size); atomic_add_64(&state->arcs_size, -size); buf->b_data = NULL; + + /* + * If we're destroying a duplicate buffer make sure + * that the appropriate statistics are updated. + */ + if (buf->b_hdr->b_datacnt > 1 && + buf->b_hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size); + } ASSERT(buf->b_hdr->b_datacnt > 0); buf->b_hdr->b_datacnt -= 1; } @@ -1518,6 +1521,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) list_remove(l2hdr->b_dev->l2ad_buflist, hdr); ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); + arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS); if (hdr->b_state == arc_l2c_only) l2arc_hdr_stat_remove(); hdr->b_l2hdr = NULL; @@ -1553,10 +1557,6 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t)); hdr->b_freeze_cksum = NULL; } - if (hdr->b_thawed) { - kmem_free(hdr->b_thawed, 1); - hdr->b_thawed = NULL; - } ASSERT(!list_link_active(&hdr->b_arc_node)); ASSERT3P(hdr->b_hash_next, ==, NULL); @@ -1615,7 +1615,7 @@ int arc_buf_remove_ref(arc_buf_t *buf, void* tag) { arc_buf_hdr_t *hdr = buf->b_hdr; - kmutex_t *hash_lock = HDR_LOCK(hdr); + kmutex_t *hash_lock = NULL; int no_callback = (buf->b_efunc == NULL); if (hdr->b_state == arc_anon) { @@ -1624,6 +1624,7 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag) return (no_callback); } + hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); @@ -1652,6 +1653,48 @@ arc_buf_size(arc_buf_t *buf) } /* + * Called from the DMU to determine if the current buffer should be + * evicted. In order to ensure proper locking, the eviction must be initiated + * from the DMU. Return true if the buffer is associated with user data and + * duplicate buffers still exist. + */ +boolean_t +arc_buf_eviction_needed(arc_buf_t *buf) +{ + arc_buf_hdr_t *hdr; + boolean_t evict_needed = B_FALSE; + + if (zfs_disable_dup_eviction) + return (B_FALSE); + + mutex_enter(&buf->b_evict_lock); + hdr = buf->b_hdr; + if (hdr == NULL) { + /* + * We are in arc_do_user_evicts(); let that function + * perform the eviction. + */ + ASSERT(buf->b_data == NULL); + mutex_exit(&buf->b_evict_lock); + return (B_FALSE); + } else if (buf->b_data == NULL) { + /* + * We have already been added to the arc eviction list; + * recommend eviction. + */ + ASSERT3P(hdr, ==, &arc_eviction_hdr); + mutex_exit(&buf->b_evict_lock); + return (B_TRUE); + } + + if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA) + evict_needed = B_TRUE; + + mutex_exit(&buf->b_evict_lock); + return (evict_needed); +} + +/* * Evict buffers from list until we've removed the specified number of * bytes. Move the removed buffers to the appropriate evict state. * If the recycle flag is set, then attempt to "recycle" a buffer: @@ -1690,7 +1733,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, (spa && ab->b_spa != spa) || (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) && ddi_get_lbolt() - ab->b_arc_access < - arc_min_prefetch_lifespan)) { + zfs_arc_min_prefetch_lifespan)) { skipped++; continue; } @@ -1701,7 +1744,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, hash_lock = HDR_LOCK(ab); have_lock = MUTEX_HELD(hash_lock); if (have_lock || mutex_tryenter(hash_lock)) { - ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0); + ASSERT0(refcount_count(&ab->b_refcnt)); ASSERT(ab->b_datacnt > 0); while (ab->b_buf) { arc_buf_t *buf = ab->b_buf; @@ -2041,7 +2084,7 @@ arc_adjust_meta(int64_t adjustment, boolean_t may_prune) } if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit)) - arc_do_user_prune(arc_meta_prune); + arc_do_user_prune(zfs_arc_meta_prune); } /* @@ -2092,14 +2135,14 @@ arc_shrink(uint64_t bytes) if (arc_c > arc_c_min) { uint64_t to_free; - to_free = bytes ? bytes : arc_c >> arc_shrink_shift; + to_free = bytes ? bytes : arc_c >> zfs_arc_shrink_shift; if (arc_c > arc_c_min + to_free) atomic_add_64(&arc_c, -to_free); else arc_c = arc_c_min; - atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); + atomic_add_64(&arc_p, -(arc_p >> zfs_arc_shrink_shift)); if (arc_c > arc_size) arc_c = MAX(arc_size, arc_c_min); if (arc_p > arc_c) @@ -2178,7 +2221,7 @@ arc_adapt_thread(void) } /* reset the growth delay for every reclaim */ - arc_grow_time = ddi_get_lbolt()+(arc_grow_retry * hz); + arc_grow_time = ddi_get_lbolt()+(zfs_arc_grow_retry * hz); arc_kmem_reap_now(last_reclaim, 0); arc_warm = B_TRUE; @@ -2208,6 +2251,26 @@ arc_adapt_thread(void) (void) cv_timedwait_interruptible(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz)); CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock); + + + /* Allow the module options to be changed */ + if (zfs_arc_max > 64 << 20 && + zfs_arc_max < physmem * PAGESIZE && + zfs_arc_max != arc_c_max) + arc_c_max = zfs_arc_max; + + if (zfs_arc_min > 0 && + zfs_arc_min < arc_c_max && + zfs_arc_min != arc_c_min) + arc_c_min = zfs_arc_min; + + if (zfs_arc_meta_limit > 0 && + zfs_arc_meta_limit <= arc_c_max && + zfs_arc_meta_limit != arc_meta_limit) + arc_meta_limit = zfs_arc_meta_limit; + + + } arc_thread_exit = 0; @@ -2328,7 +2391,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc) ARCSTAT_BUMP(arcstat_memory_indirect_count); } else { arc_no_grow = B_TRUE; - arc_grow_time = ddi_get_lbolt() + (arc_grow_retry * hz); + arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz); ARCSTAT_BUMP(arcstat_memory_direct_count); } @@ -2350,7 +2413,7 @@ static void arc_adapt(int bytes, arc_state_t *state) { int mult; - uint64_t arc_p_min = (arc_c >> arc_p_min_shift); + uint64_t arc_p_min = (arc_c >> zfs_arc_p_min_shift); if (state == arc_l2c_only) return; @@ -2647,7 +2710,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) * This is a prefetch access... * move this block back to the MRU state. */ - ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0); + ASSERT0(refcount_count(&buf->b_refcnt)); new_state = arc_mru; } @@ -2729,10 +2792,12 @@ arc_read_done(zio_t *zio) callback_list = hdr->b_acb; ASSERT(callback_list != NULL); if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) { - arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ? - byteswap_uint64_array : - dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap; - func(buf->b_data, hdr->b_size); + dmu_object_byteswap_t bswap = + DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp)); + if (BP_GET_LEVEL(zio->io_bp) > 0) + byteswap_uint64_array(buf->b_data, hdr->b_size); + else + dmu_ot_byteswap[bswap].ob_func(buf->b_data, hdr->b_size); } arc_cksum_compute(buf, B_FALSE); @@ -2751,8 +2816,10 @@ arc_read_done(zio_t *zio) abuf = buf; for (acb = callback_list; acb; acb = acb->acb_next) { if (acb->acb_done) { - if (abuf == NULL) + if (abuf == NULL) { + ARCSTAT_BUMP(arcstat_duplicate_reads); abuf = arc_buf_clone(buf); + } acb->acb_buf = abuf; abuf = NULL; } @@ -2816,7 +2883,7 @@ arc_read_done(zio_t *zio) } /* - * "Read" the block block at the specified DVA (in bp) via the + * "Read" the block at the specified DVA (in bp) via the * cache. If the block is found in the cache, invoke the provided * callback immediately and return. Note that the `zio' parameter * in the callback will be NULL in this case, since no IO was @@ -2832,42 +2899,11 @@ arc_read_done(zio_t *zio) * * arc_read_done() will invoke all the requested "done" functions * for readers of this block. - * - * Normal callers should use arc_read and pass the arc buffer and offset - * for the bp. But if you know you don't need locking, you can use - * arc_read_bp. */ int -arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf, - arc_done_func_t *done, void *private, int priority, int zio_flags, - uint32_t *arc_flags, const zbookmark_t *zb) -{ - int err; - - if (pbuf == NULL) { - /* - * XXX This happens from traverse callback funcs, for - * the objset_phys_t block. - */ - return (arc_read_nolock(pio, spa, bp, done, private, priority, - zio_flags, arc_flags, zb)); - } - - ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt)); - ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size); - rw_enter(&pbuf->b_data_lock, RW_READER); - - err = arc_read_nolock(pio, spa, bp, done, private, priority, - zio_flags, arc_flags, zb); - rw_exit(&pbuf->b_data_lock); - - return (err); -} - -int -arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, - arc_done_func_t *done, void *private, int priority, int zio_flags, - uint32_t *arc_flags, const zbookmark_t *zb) +arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, + void *private, int priority, int zio_flags, uint32_t *arc_flags, + const zbookmark_t *zb) { arc_buf_hdr_t *hdr; arc_buf_t *buf = NULL; @@ -2986,7 +3022,7 @@ top: /* this block is in the ghost cache */ ASSERT(GHOST_STATE(hdr->b_state)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0); + ASSERT0(refcount_count(&hdr->b_refcnt)); ASSERT(hdr->b_buf == NULL); /* if this is a prefetch, we don't have a reference */ @@ -3166,6 +3202,34 @@ arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private) } /* + * Notify the arc that a block was freed, and thus will never be used again. + */ +void +arc_freed(spa_t *spa, const blkptr_t *bp) +{ + arc_buf_hdr_t *hdr; + kmutex_t *hash_lock; + uint64_t guid = spa_load_guid(spa); + + hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp), + &hash_lock); + if (hdr == NULL) + return; + if (HDR_BUF_AVAILABLE(hdr)) { + arc_buf_t *buf = hdr->b_buf; + add_reference(hdr, hash_lock, FTAG); + hdr->b_flags &= ~ARC_BUF_AVAILABLE; + mutex_exit(hash_lock); + + arc_release(buf, FTAG); + (void) arc_buf_remove_ref(buf, FTAG); + } else { + mutex_exit(hash_lock); + } + +} + +/* * This is used by the DMU to let the ARC know that a buffer is * being evicted, so the ARC should clean up. If this arc buf * is not yet in the evicted state, it will be put there. @@ -3322,6 +3386,16 @@ arc_release(arc_buf_t *buf, void *tag) ASSERT3U(*size, >=, hdr->b_size); atomic_add_64(size, -hdr->b_size); } + + /* + * We're releasing a duplicate user data buffer, update + * our statistics accordingly. + */ + if (hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, + -hdr->b_size); + } hdr->b_datacnt -= 1; arc_cksum_verify(buf); @@ -3362,24 +3436,12 @@ arc_release(arc_buf_t *buf, void *tag) if (l2hdr) { list_remove(l2hdr->b_dev->l2ad_buflist, hdr); kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); + arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS); ARCSTAT_INCR(arcstat_l2_size, -buf_size); mutex_exit(&l2arc_buflist_mtx); } } -/* - * Release this buffer. If it does not match the provided BP, fill it - * with that block's contents. - */ -/* ARGSUSED */ -int -arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa, - zbookmark_t *zb) -{ - arc_release(buf, tag); - return (0); -} - int arc_released(arc_buf_t *buf) { @@ -3549,6 +3611,9 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) #ifdef _KERNEL uint64_t available_memory; + if (zfs_arc_memory_throttle_disable) + return (0); + /* Easily reclaimable memory (free + inactive + arc-evictable) */ available_memory = ptob(spl_kmem_availrmem()) + arc_evictable_memory(); @@ -3683,7 +3748,7 @@ arc_init(void) cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL); /* Convert seconds to clock ticks */ - arc_min_prefetch_lifespan = 1 * hz; + zfs_arc_min_prefetch_lifespan = 1 * hz; /* Start out with 1/8 of all memory */ arc_c = physmem * PAGESIZE / 8; @@ -3731,18 +3796,6 @@ arc_init(void) if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0) arc_c_min = arc_meta_limit / 2; - if (zfs_arc_grow_retry > 0) - arc_grow_retry = zfs_arc_grow_retry; - - if (zfs_arc_shrink_shift > 0) - arc_shrink_shift = zfs_arc_shrink_shift; - - if (zfs_arc_p_min_shift > 0) - arc_p_min_shift = zfs_arc_p_min_shift; - - if (zfs_arc_meta_prune > 0) - arc_meta_prune = zfs_arc_meta_prune; - /* if kmem_flags are set, lets try to use less memory */ if (kmem_debugging()) arc_c = arc_c / 2; @@ -4071,14 +4124,14 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) static void l2arc_hdr_stat_add(void) { - ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE); + ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE); ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE); } static void l2arc_hdr_stat_remove(void) { - ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE)); + ARCSTAT_INCR(arcstat_l2_hdr_size, -HDR_SIZE); ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE); } @@ -4222,6 +4275,7 @@ l2arc_write_done(zio_t *zio) abl2 = ab->b_l2hdr; ab->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); + arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS); ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); } @@ -4468,6 +4522,7 @@ top: abl2 = ab->b_l2hdr; ab->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); + arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS); ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); } list_remove(buflist, ab); @@ -4593,6 +4648,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) KM_PUSHPAGE); hdrl2->b_dev = dev; hdrl2->b_daddr = dev->l2ad_hand; + arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS); ab->b_flags |= ARC_L2_WRITING; ab->b_l2hdr = hdrl2; @@ -4635,7 +4691,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) mutex_exit(&l2arc_buflist_mtx); if (pio == NULL) { - ASSERT3U(write_sz, ==, 0); + ASSERT0(write_sz); kmem_cache_free(hdr_cache, head); return (0); } @@ -4937,49 +4993,58 @@ EXPORT_SYMBOL(arc_getbuf_func); EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); -module_param(zfs_arc_min, ulong, 0444); +module_param(zfs_arc_min, ulong, 0644); MODULE_PARM_DESC(zfs_arc_min, "Min arc size"); -module_param(zfs_arc_max, ulong, 0444); +module_param(zfs_arc_max, ulong, 0644); MODULE_PARM_DESC(zfs_arc_max, "Max arc size"); -module_param(zfs_arc_meta_limit, ulong, 0444); +module_param(zfs_arc_meta_limit, ulong, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); -module_param(zfs_arc_meta_prune, int, 0444); +module_param(zfs_arc_meta_prune, int, 0644); MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune"); -module_param(zfs_arc_grow_retry, int, 0444); +module_param(zfs_arc_grow_retry, int, 0644); MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size"); -module_param(zfs_arc_shrink_shift, int, 0444); +module_param(zfs_arc_shrink_shift, int, 0644); MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)"); -module_param(zfs_arc_p_min_shift, int, 0444); +module_param(zfs_arc_p_min_shift, int, 0644); MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p"); -module_param(l2arc_write_max, ulong, 0444); +module_param(zfs_disable_dup_eviction, int, 0644); +MODULE_PARM_DESC(zfs_disable_dup_eviction, "disable duplicate buffer eviction"); + +module_param(zfs_arc_memory_throttle_disable, int, 0644); +MODULE_PARM_DESC(zfs_arc_memory_throttle_disable, "disable memory throttle"); + +module_param(zfs_arc_min_prefetch_lifespan, int, 0644); +MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block"); + +module_param(l2arc_write_max, ulong, 0644); MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval"); -module_param(l2arc_write_boost, ulong, 0444); +module_param(l2arc_write_boost, ulong, 0644); MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup"); -module_param(l2arc_headroom, ulong, 0444); +module_param(l2arc_headroom, ulong, 0644); MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache"); -module_param(l2arc_feed_secs, ulong, 0444); +module_param(l2arc_feed_secs, ulong, 0644); MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing"); -module_param(l2arc_feed_min_ms, ulong, 0444); +module_param(l2arc_feed_min_ms, ulong, 0644); MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds"); -module_param(l2arc_noprefetch, int, 0444); +module_param(l2arc_noprefetch, int, 0644); MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers"); -module_param(l2arc_feed_again, int, 0444); +module_param(l2arc_feed_again, int, 0644); MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup"); -module_param(l2arc_norw, int, 0444); +module_param(l2arc_norw, int, 0644); MODULE_PARM_DESC(l2arc_norw, "No reads during writes"); #endif