X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Farc.c;h=ff631e61b2eaa2eaf04d2ad1712f6f17db554060;hb=1ef5e8296a07a0d208d0663ae966c8b0206f7e6a;hp=63b659b9a8c56ae79dcfa4baaa1aae4d596cd071;hpb=1834f2d8b715d25bafbb0e4a099994f45c3211ae;p=zfs.git diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 63b659b..ff631e6 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -180,6 +180,7 @@ unsigned long zfs_arc_meta_limit = 0; int zfs_arc_grow_retry = 0; int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; +int zfs_arc_reduce_dnlc_percent = 0; /* * Note that buffers can be in one of 6 states: @@ -282,6 +283,8 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_size; kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_memory_throttle_count; + kstat_named_t arcstat_memory_direct_count; + kstat_named_t arcstat_memory_indirect_count; kstat_named_t arcstat_no_grow; kstat_named_t arcstat_tempreserve; kstat_named_t arcstat_loaned_bytes; @@ -344,6 +347,8 @@ static arc_stats_t arc_stats = { { "l2_size", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, { "memory_throttle_count", KSTAT_DATA_UINT64 }, + { "memory_direct_count", KSTAT_DATA_UINT64 }, + { "memory_indirect_count", KSTAT_DATA_UINT64 }, { "arc_no_grow", KSTAT_DATA_UINT64 }, { "arc_tempreserve", KSTAT_DATA_UINT64 }, { "arc_loaned_bytes", KSTAT_DATA_UINT64 }, @@ -577,14 +582,14 @@ uint64_t zfs_crc64_table[256]; /* * L2ARC Performance Tunables */ -uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* default max write size */ -uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */ -uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */ -uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */ -boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ -boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */ -boolean_t l2arc_norw = B_TRUE; /* no reads during writes */ +unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ +unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ +unsigned long l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ +unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ +int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ +int l2arc_feed_again = B_TRUE; /* turbo warmup */ +int l2arc_norw = B_TRUE; /* no reads during writes */ /* * L2ARC Internals @@ -2080,14 +2085,16 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) kmem_cache_t *prev_data_cache = NULL; extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; - #ifdef _KERNEL - if (arc_meta_used >= arc_meta_limit) { + int retry = 0; + + while ((arc_meta_used >= arc_meta_limit) && (retry < 10)) { /* * We are exceeding our meta-data cache limit. * Purge some DNLC entries to release holds on meta-data. */ dnlc_reduce_cache((void *)(uintptr_t)arc_reduce_dnlc_percent); + retry++; } #if defined(__i386) /* @@ -2153,6 +2160,10 @@ arc_reclaim_thread(void) arc_no_grow = FALSE; } + /* Keep meta data usage within limits */ + if (arc_meta_used >= arc_meta_limit) + arc_kmem_reap_now(ARC_RECLAIM_CONS); + arc_adjust(); if (arc_eviction_list != NULL) @@ -2171,6 +2182,59 @@ arc_reclaim_thread(void) thread_exit(); } +#ifdef _KERNEL +/* + * Under Linux the arc shrinker may be called for synchronous (direct) + * reclaim, or asynchronous (indirect) reclaim. When called by kswapd + * for indirect reclaim we take a conservative approach and just reap + * free slabs from the ARC caches. If this proves to be insufficient + * direct reclaim will be trigger. In direct reclaim a more aggressive + * strategy is used, data is evicted from the ARC and free slabs reaped. + */ +static int +__arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc) +{ + arc_reclaim_strategy_t strategy; + int arc_reclaim; + + /* Return number of reclaimable pages based on arc_shrink_shift */ + arc_reclaim = MAX(btop(((int64_t)arc_size - (int64_t)arc_c_min)) + >> arc_shrink_shift, 0); + if (sc->nr_to_scan == 0) + return (arc_reclaim); + + /* Prevent reclaim below arc_c_min */ + if (arc_reclaim <= 0) + return (-1); + + /* Not allowed to perform filesystem reclaim */ + if (!(sc->gfp_mask & __GFP_FS)) + return (-1); + + /* Reclaim in progress */ + if (mutex_tryenter(&arc_reclaim_thr_lock) == 0) + return (-1); + + if (current_is_kswapd()) { + strategy = ARC_RECLAIM_CONS; + ARCSTAT_INCR(arcstat_memory_indirect_count, 1); + } else { + strategy = ARC_RECLAIM_AGGR; + ARCSTAT_INCR(arcstat_memory_direct_count, 1); + } + + arc_kmem_reap_now(strategy); + arc_reclaim = MAX(btop(((int64_t)arc_size - (int64_t)arc_c_min)) + >> arc_shrink_shift, 0); + mutex_exit(&arc_reclaim_thr_lock); + + return (arc_reclaim); +} +SPL_SHRINKER_CALLBACK_WRAPPER(arc_shrinker_func); + +SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS); +#endif /* _KERNEL */ + /* * Adapt arc info given the number of bytes we are trying to add and * the state that we are comming from. This function is only called @@ -3485,6 +3549,12 @@ arc_init(void) * need to limit the cache to 1/8 of VM size. */ arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); + /* + * Register a shrinker to support synchronous (direct) memory + * reclaim from the arc. This is done to prevent kswapd from + * swapping out pages when it is preferable to shrink the arc. + */ + spl_register_shrinker(&arc_shrinker); #endif /* set min cache to 1/32 of all memory, or 64MB, whichever is more */ @@ -3528,6 +3598,9 @@ arc_init(void) if (zfs_arc_p_min_shift > 0) arc_p_min_shift = zfs_arc_p_min_shift; + if (zfs_arc_reduce_dnlc_percent > 0) + arc_reduce_dnlc_percent = zfs_arc_reduce_dnlc_percent; + /* if kmem_flags are set, lets try to use less memory */ if (kmem_debugging()) arc_c = arc_c / 2; @@ -3602,6 +3675,10 @@ void arc_fini(void) { mutex_enter(&arc_reclaim_thr_lock); +#ifdef _KERNEL + spl_unregister_shrinker(&arc_shrinker); +#endif /* _KERNEL */ + arc_thread_exit = 1; while (arc_thread_exit != 0) cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock); @@ -4698,12 +4775,49 @@ EXPORT_SYMBOL(arc_read); EXPORT_SYMBOL(arc_buf_remove_ref); EXPORT_SYMBOL(arc_getbuf_func); -module_param(zfs_arc_min, ulong, 0644); -MODULE_PARM_DESC(zfs_arc_min, "Minimum arc size"); +module_param(zfs_arc_min, ulong, 0444); +MODULE_PARM_DESC(zfs_arc_min, "Min arc size"); -module_param(zfs_arc_max, ulong, 0644); -MODULE_PARM_DESC(zfs_arc_max, "Maximum arc size"); +module_param(zfs_arc_max, ulong, 0444); +MODULE_PARM_DESC(zfs_arc_max, "Max arc size"); -module_param(zfs_arc_meta_limit, ulong, 0644); +module_param(zfs_arc_meta_limit, ulong, 0444); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); + +module_param(zfs_arc_reduce_dnlc_percent, int, 0444); +MODULE_PARM_DESC(zfs_arc_reduce_dnlc_percent, "Meta reclaim percentage"); + +module_param(zfs_arc_grow_retry, int, 0444); +MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size"); + +module_param(zfs_arc_shrink_shift, int, 0444); +MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)"); + +module_param(zfs_arc_p_min_shift, int, 0444); +MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p"); + +module_param(l2arc_write_max, ulong, 0444); +MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval"); + +module_param(l2arc_write_boost, ulong, 0444); +MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup"); + +module_param(l2arc_headroom, ulong, 0444); +MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache"); + +module_param(l2arc_feed_secs, ulong, 0444); +MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing"); + +module_param(l2arc_feed_min_ms, ulong, 0444); +MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds"); + +module_param(l2arc_noprefetch, int, 0444); +MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers"); + +module_param(l2arc_feed_again, int, 0444); +MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup"); + +module_param(l2arc_norw, int, 0444); +MODULE_PARM_DESC(l2arc_norw, "No reads during writes"); + #endif