static uint8_t arc_thread_exit;
/* number of bytes to prune from caches when at arc_meta_limit is reached */
-uint_t arc_meta_prune = 1048576;
+int zfs_arc_meta_prune = 1048576;
typedef enum arc_reclaim_strategy {
ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */
} arc_reclaim_strategy_t;
/* number of seconds before growing cache again */
-static int arc_grow_retry = 5;
-
-/* expiration time for arc_no_grow */
-static clock_t arc_grow_time = 0;
+int zfs_arc_grow_retry = 5;
/* shift of arc_c for calculating both min and max arc_p */
-static int arc_p_min_shift = 4;
+int zfs_arc_p_min_shift = 4;
/* log2(fraction of arc to reclaim) */
-static int arc_shrink_shift = 5;
+int zfs_arc_shrink_shift = 5;
/*
* minimum lifespan of a prefetch block in clock ticks
* (initialized in arc_init())
*/
-static int arc_min_prefetch_lifespan;
+int zfs_arc_min_prefetch_lifespan = HZ;
+
+/* disable arc proactive arc throttle due to low memory */
+int zfs_arc_memory_throttle_disable = 1;
+
+/* disable duplicate buffer eviction */
+int zfs_disable_dup_eviction = 0;
static int arc_dead;
+/* expiration time for arc_no_grow */
+static clock_t arc_grow_time = 0;
+
/*
* The arc has filled available memory and has now warmed up.
*/
unsigned long zfs_arc_max = 0;
unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
-int zfs_arc_grow_retry = 0;
-int zfs_arc_shrink_shift = 0;
-int zfs_arc_p_min_shift = 0;
-int zfs_arc_memory_throttle_disable = 1;
-int zfs_disable_dup_eviction = 0;
-int zfs_arc_meta_prune = 0;
/*
* Note that buffers can be in one of 6 states:
kmutex_t b_freeze_lock;
zio_cksum_t *b_freeze_cksum;
- void *b_thawed;
arc_buf_hdr_t *b_hash_next;
arc_buf_t *b_buf;
unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
int l2arc_feed_again = B_TRUE; /* turbo warmup */
-int l2arc_norw = B_TRUE; /* no reads during writes */
+int l2arc_norw = B_FALSE; /* no reads during writes */
/*
* L2ARC Internals
bzero(buf, sizeof (arc_buf_t));
mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
- rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL);
arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
return (0);
arc_buf_t *buf = vbuf;
mutex_destroy(&buf->b_evict_lock);
- rw_destroy(&buf->b_data_lock);
arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
}
buf->b_hdr->b_freeze_cksum = NULL;
}
- if (zfs_flags & ZFS_DEBUG_MODIFY) {
- if (buf->b_hdr->b_thawed)
- kmem_free(buf->b_hdr->b_thawed, 1);
- buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP);
- }
-
mutex_exit(&buf->b_hdr->b_freeze_lock);
}
atomic_add_64(&arc_size, -space);
}
-void *
-arc_data_buf_alloc(uint64_t size)
-{
- if (arc_evict_needed(ARC_BUFC_DATA))
- cv_signal(&arc_reclaim_thr_cv);
- atomic_add_64(&arc_size, size);
- return (zio_data_buf_alloc(size));
-}
-
-void
-arc_data_buf_free(void *buf, uint64_t size)
-{
- zio_data_buf_free(buf, size);
- ASSERT(arc_size >= size);
- atomic_add_64(&arc_size, -size);
-}
-
arc_buf_t *
arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
{
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
+ arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
if (hdr->b_state == arc_l2c_only)
l2arc_hdr_stat_remove();
hdr->b_l2hdr = NULL;
kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
hdr->b_freeze_cksum = NULL;
}
- if (hdr->b_thawed) {
- kmem_free(hdr->b_thawed, 1);
- hdr->b_thawed = NULL;
- }
ASSERT(!list_link_active(&hdr->b_arc_node));
ASSERT3P(hdr->b_hash_next, ==, NULL);
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
- kmutex_t *hash_lock = HDR_LOCK(hdr);
+ kmutex_t *hash_lock = NULL;
int no_callback = (buf->b_efunc == NULL);
if (hdr->b_state == arc_anon) {
return (no_callback);
}
+ hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
hdr = buf->b_hdr;
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
(spa && ab->b_spa != spa) ||
(ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) &&
ddi_get_lbolt() - ab->b_arc_access <
- arc_min_prefetch_lifespan)) {
+ zfs_arc_min_prefetch_lifespan)) {
skipped++;
continue;
}
}
if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
- arc_do_user_prune(arc_meta_prune);
+ arc_do_user_prune(zfs_arc_meta_prune);
}
/*
if (arc_c > arc_c_min) {
uint64_t to_free;
- to_free = bytes ? bytes : arc_c >> arc_shrink_shift;
+ to_free = bytes ? bytes : arc_c >> zfs_arc_shrink_shift;
if (arc_c > arc_c_min + to_free)
atomic_add_64(&arc_c, -to_free);
else
arc_c = arc_c_min;
- atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
+ atomic_add_64(&arc_p, -(arc_p >> zfs_arc_shrink_shift));
if (arc_c > arc_size)
arc_c = MAX(arc_size, arc_c_min);
if (arc_p > arc_c)
}
/* reset the growth delay for every reclaim */
- arc_grow_time = ddi_get_lbolt()+(arc_grow_retry * hz);
+ arc_grow_time = ddi_get_lbolt()+(zfs_arc_grow_retry * hz);
arc_kmem_reap_now(last_reclaim, 0);
arc_warm = B_TRUE;
(void) cv_timedwait_interruptible(&arc_reclaim_thr_cv,
&arc_reclaim_thr_lock, (ddi_get_lbolt() + hz));
CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
+
+
+ /* Allow the module options to be changed */
+ if (zfs_arc_max > 64 << 20 &&
+ zfs_arc_max < physmem * PAGESIZE &&
+ zfs_arc_max != arc_c_max)
+ arc_c_max = zfs_arc_max;
+
+ if (zfs_arc_min > 0 &&
+ zfs_arc_min < arc_c_max &&
+ zfs_arc_min != arc_c_min)
+ arc_c_min = zfs_arc_min;
+
+ if (zfs_arc_meta_limit > 0 &&
+ zfs_arc_meta_limit <= arc_c_max &&
+ zfs_arc_meta_limit != arc_meta_limit)
+ arc_meta_limit = zfs_arc_meta_limit;
+
+
+
}
arc_thread_exit = 0;
*/
if (pages > 0) {
arc_kmem_reap_now(ARC_RECLAIM_AGGR, ptob(sc->nr_to_scan));
- pages = btop(arc_evictable_memory());
} else {
arc_kmem_reap_now(ARC_RECLAIM_CONS, ptob(sc->nr_to_scan));
- pages = -1;
}
/*
ARCSTAT_BUMP(arcstat_memory_indirect_count);
} else {
arc_no_grow = B_TRUE;
- arc_grow_time = ddi_get_lbolt() + (arc_grow_retry * hz);
+ arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
ARCSTAT_BUMP(arcstat_memory_direct_count);
}
mutex_exit(&arc_reclaim_thr_lock);
- return (pages);
+ return (-1);
}
SPL_SHRINKER_CALLBACK_WRAPPER(arc_shrinker_func);
arc_adapt(int bytes, arc_state_t *state)
{
int mult;
- uint64_t arc_p_min = (arc_c >> arc_p_min_shift);
+ uint64_t arc_p_min = (arc_c >> zfs_arc_p_min_shift);
if (state == arc_l2c_only)
return;
*
* arc_read_done() will invoke all the requested "done" functions
* for readers of this block.
- *
- * Normal callers should use arc_read and pass the arc buffer and offset
- * for the bp. But if you know you don't need locking, you can use
- * arc_read_bp.
*/
int
-arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
- arc_done_func_t *done, void *private, int priority, int zio_flags,
- uint32_t *arc_flags, const zbookmark_t *zb)
-{
- int err;
-
- if (pbuf == NULL) {
- /*
- * XXX This happens from traverse callback funcs, for
- * the objset_phys_t block.
- */
- return (arc_read_nolock(pio, spa, bp, done, private, priority,
- zio_flags, arc_flags, zb));
- }
-
- ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
- ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
- rw_enter(&pbuf->b_data_lock, RW_READER);
-
- err = arc_read_nolock(pio, spa, bp, done, private, priority,
- zio_flags, arc_flags, zb);
- rw_exit(&pbuf->b_data_lock);
-
- return (err);
-}
-
-int
-arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
- arc_done_func_t *done, void *private, int priority, int zio_flags,
- uint32_t *arc_flags, const zbookmark_t *zb)
+arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
+ void *private, int priority, int zio_flags, uint32_t *arc_flags,
+ const zbookmark_t *zb)
{
arc_buf_hdr_t *hdr;
arc_buf_t *buf = NULL;
}
/*
+ * Notify the arc that a block was freed, and thus will never be used again.
+ */
+void
+arc_freed(spa_t *spa, const blkptr_t *bp)
+{
+ arc_buf_hdr_t *hdr;
+ kmutex_t *hash_lock;
+ uint64_t guid = spa_load_guid(spa);
+
+ hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
+ &hash_lock);
+ if (hdr == NULL)
+ return;
+ if (HDR_BUF_AVAILABLE(hdr)) {
+ arc_buf_t *buf = hdr->b_buf;
+ add_reference(hdr, hash_lock, FTAG);
+ hdr->b_flags &= ~ARC_BUF_AVAILABLE;
+ mutex_exit(hash_lock);
+
+ arc_release(buf, FTAG);
+ (void) arc_buf_remove_ref(buf, FTAG);
+ } else {
+ mutex_exit(hash_lock);
+ }
+
+}
+
+/*
* This is used by the DMU to let the ARC know that a buffer is
* being evicted, so the ARC should clean up. If this arc buf
* is not yet in the evicted state, it will be put there.
if (l2hdr) {
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
+ arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
ARCSTAT_INCR(arcstat_l2_size, -buf_size);
mutex_exit(&l2arc_buflist_mtx);
}
}
-/*
- * Release this buffer. If it does not match the provided BP, fill it
- * with that block's contents.
- */
-/* ARGSUSED */
-int
-arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
- zbookmark_t *zb)
-{
- arc_release(buf, tag);
- return (0);
-}
-
int
arc_released(arc_buf_t *buf)
{
cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
/* Convert seconds to clock ticks */
- arc_min_prefetch_lifespan = 1 * hz;
+ zfs_arc_min_prefetch_lifespan = 1 * hz;
/* Start out with 1/8 of all memory */
arc_c = physmem * PAGESIZE / 8;
if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
arc_c_min = arc_meta_limit / 2;
- if (zfs_arc_grow_retry > 0)
- arc_grow_retry = zfs_arc_grow_retry;
-
- if (zfs_arc_shrink_shift > 0)
- arc_shrink_shift = zfs_arc_shrink_shift;
-
- if (zfs_arc_p_min_shift > 0)
- arc_p_min_shift = zfs_arc_p_min_shift;
-
- if (zfs_arc_meta_prune > 0)
- arc_meta_prune = zfs_arc_meta_prune;
-
/* if kmem_flags are set, lets try to use less memory */
if (kmem_debugging())
arc_c = arc_c / 2;
static void
l2arc_hdr_stat_add(void)
{
- ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE);
+ ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE);
ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE);
}
static void
l2arc_hdr_stat_remove(void)
{
- ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE));
+ ARCSTAT_INCR(arcstat_l2_hdr_size, -HDR_SIZE);
ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE);
}
abl2 = ab->b_l2hdr;
ab->b_l2hdr = NULL;
kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+ arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
}
abl2 = ab->b_l2hdr;
ab->b_l2hdr = NULL;
kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+ arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
}
list_remove(buflist, ab);
KM_PUSHPAGE);
hdrl2->b_dev = dev;
hdrl2->b_daddr = dev->l2ad_hand;
+ arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS);
ab->b_flags |= ARC_L2_WRITING;
ab->b_l2hdr = hdrl2;
EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
-module_param(zfs_arc_min, ulong, 0444);
+module_param(zfs_arc_min, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_min, "Min arc size");
-module_param(zfs_arc_max, ulong, 0444);
+module_param(zfs_arc_max, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
-module_param(zfs_arc_meta_limit, ulong, 0444);
+module_param(zfs_arc_meta_limit, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
-module_param(zfs_arc_meta_prune, int, 0444);
+module_param(zfs_arc_meta_prune, int, 0644);
MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune");
-module_param(zfs_arc_grow_retry, int, 0444);
+module_param(zfs_arc_grow_retry, int, 0644);
MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
-module_param(zfs_arc_shrink_shift, int, 0444);
+module_param(zfs_arc_shrink_shift, int, 0644);
MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)");
-module_param(zfs_arc_p_min_shift, int, 0444);
+module_param(zfs_arc_p_min_shift, int, 0644);
MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p");
module_param(zfs_disable_dup_eviction, int, 0644);
module_param(zfs_arc_memory_throttle_disable, int, 0644);
MODULE_PARM_DESC(zfs_arc_memory_throttle_disable, "disable memory throttle");
-module_param(l2arc_write_max, ulong, 0444);
+module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
+MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");
+
+module_param(l2arc_write_max, ulong, 0644);
MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");
-module_param(l2arc_write_boost, ulong, 0444);
+module_param(l2arc_write_boost, ulong, 0644);
MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup");
-module_param(l2arc_headroom, ulong, 0444);
+module_param(l2arc_headroom, ulong, 0644);
MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache");
-module_param(l2arc_feed_secs, ulong, 0444);
+module_param(l2arc_feed_secs, ulong, 0644);
MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");
-module_param(l2arc_feed_min_ms, ulong, 0444);
+module_param(l2arc_feed_min_ms, ulong, 0644);
MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds");
-module_param(l2arc_noprefetch, int, 0444);
+module_param(l2arc_noprefetch, int, 0644);
MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers");
-module_param(l2arc_feed_again, int, 0444);
+module_param(l2arc_feed_again, int, 0644);
MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
-module_param(l2arc_norw, int, 0444);
+module_param(l2arc_norw, int, 0644);
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
#endif