Make arc+l2arc module options writable

[zfs.git] / module / zfs / arc.c
diff --git a/module/zfs/arc.c b/module/zfs/arc.c

index 55f1909..97c897d 100644 (file)
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -80,9 +80,9 @@
   * types of locks: 1) the hash table lock array, and 2) the
   * arc list locks.
   *
- * Buffers do not have their own mutexs, rather they rely on the
- * hash table mutexs for the bulk of their protection (i.e. most
- * fields in the arc_buf_hdr_t are protected by these mutexs).
+ * Buffers do not have their own mutexes, rather they rely on the
+ * hash table mutexes for the bulk of their protection (i.e. most
+ * fields in the arc_buf_hdr_t are protected by these mutexes).
   *
   * buf_hash_find() returns the appropriate mutex (held) when it
   * locates the requested buffer in the hash table.  It returns
@@ -147,12 +147,8 @@ static kmutex_t            arc_reclaim_thr_lock;
  static kcondvar_t      arc_reclaim_thr_cv;     /* used to signal reclaim thr */
  static uint8_t         arc_thread_exit;
  
-extern int zfs_write_limit_shift;
-extern uint64_t zfs_write_limit_max;
-extern kmutex_t zfs_write_limit_lock;
-
  /* number of bytes to prune from caches when at arc_meta_limit is reached */
-uint_t arc_meta_prune = 1048576;
+int zfs_arc_meta_prune = 1048576;
  
  typedef enum arc_reclaim_strategy {
         ARC_RECLAIM_AGGR,               /* Aggressive reclaim strategy */
@@ -160,25 +156,31 @@ typedef enum arc_reclaim_strategy {
  } arc_reclaim_strategy_t;
  
  /* number of seconds before growing cache again */
-static int             arc_grow_retry = 5;
-
-/* expiration time for arc_no_grow */
-static clock_t         arc_grow_time = 0;
+int zfs_arc_grow_retry = 5;
  
  /* shift of arc_c for calculating both min and max arc_p */
-static int             arc_p_min_shift = 4;
+int zfs_arc_p_min_shift = 4;
  
  /* log2(fraction of arc to reclaim) */
-static int             arc_shrink_shift = 5;
+int zfs_arc_shrink_shift = 5;
  
  /*
   * minimum lifespan of a prefetch block in clock ticks
   * (initialized in arc_init())
   */
-static int             arc_min_prefetch_lifespan;
+int zfs_arc_min_prefetch_lifespan = HZ;
+
+/* disable arc proactive arc throttle due to low memory */
+int zfs_arc_memory_throttle_disable = 1;
+
+/* disable duplicate buffer eviction */
+int zfs_disable_dup_eviction = 0;
  
  static int arc_dead;
  
+/* expiration time for arc_no_grow */
+static clock_t arc_grow_time = 0;
+
  /*
   * The arc has filled available memory and has now warmed up.
   */
@@ -190,10 +192,6 @@ static boolean_t arc_warm;
  unsigned long zfs_arc_max = 0;
  unsigned long zfs_arc_min = 0;
  unsigned long zfs_arc_meta_limit = 0;
-int zfs_arc_grow_retry = 0;
-int zfs_arc_shrink_shift = 0;
-int zfs_arc_p_min_shift = 0;
-int zfs_arc_meta_prune = 0;
  
  /*
   * Note that buffers can be in one of 6 states:
@@ -311,6 +309,9 @@ typedef struct arc_stats {
         kstat_named_t arcstat_l2_size;
         kstat_named_t arcstat_l2_hdr_size;
         kstat_named_t arcstat_memory_throttle_count;
+       kstat_named_t arcstat_duplicate_buffers;
+       kstat_named_t arcstat_duplicate_buffers_size;
+       kstat_named_t arcstat_duplicate_reads;
         kstat_named_t arcstat_memory_direct_count;
         kstat_named_t arcstat_memory_indirect_count;
         kstat_named_t arcstat_no_grow;
@@ -391,6 +392,9 @@ static arc_stats_t arc_stats = {
         { "l2_size",                    KSTAT_DATA_UINT64 },
         { "l2_hdr_size",                KSTAT_DATA_UINT64 },
         { "memory_throttle_count",      KSTAT_DATA_UINT64 },
+       { "duplicate_buffers",          KSTAT_DATA_UINT64 },
+       { "duplicate_buffers_size",     KSTAT_DATA_UINT64 },
+       { "duplicate_reads",            KSTAT_DATA_UINT64 },
         { "memory_direct_count",        KSTAT_DATA_UINT64 },
         { "memory_indirect_count",      KSTAT_DATA_UINT64 },
         { "arc_no_grow",                KSTAT_DATA_UINT64 },
@@ -636,7 +640,7 @@ unsigned long l2arc_feed_secs = L2ARC_FEED_SECS;    /* interval seconds */
  unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;   /* min interval msecs */
  int l2arc_noprefetch = B_TRUE;                 /* don't cache prefetch bufs */
  int l2arc_feed_again = B_TRUE;                 /* turbo warmup */
-int l2arc_norw = B_TRUE;                       /* no reads during writes */
+int l2arc_norw = B_FALSE;                      /* no reads during writes */
  
  /*
   * L2ARC Internals
@@ -880,7 +884,6 @@ buf_cons(void *vbuf, void *unused, int kmflag)
  
         bzero(buf, sizeof (arc_buf_t));
         mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
-       rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL);
         arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
  
         return (0);
@@ -910,7 +913,6 @@ buf_dest(void *vbuf, void *unused)
         arc_buf_t *buf = vbuf;
  
         mutex_destroy(&buf->b_evict_lock);
-       rw_destroy(&buf->b_data_lock);
         arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
  }
  
@@ -1073,7 +1075,7 @@ add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
                 ASSERT(list_link_active(&ab->b_arc_node));
                 list_remove(list, ab);
                 if (GHOST_STATE(ab->b_state)) {
-                       ASSERT3U(ab->b_datacnt, ==, 0);
+                       ASSERT0(ab->b_datacnt);
                         ASSERT3P(ab->b_buf, ==, NULL);
                         delta = ab->b_size;
                 }
@@ -1373,6 +1375,17 @@ arc_buf_clone(arc_buf_t *from)
         hdr->b_buf = buf;
         arc_get_data_buf(buf);
         bcopy(from->b_data, buf->b_data, size);
+
+       /*
+        * This buffer already exists in the arc so create a duplicate
+        * copy for the caller.  If the buffer is associated with user data
+        * then track the size and number of duplicates.  These stats will be
+        * updated as duplicate buffers are created and destroyed.
+        */
+       if (hdr->b_type == ARC_BUFC_DATA) {
+               ARCSTAT_BUMP(arcstat_duplicate_buffers);
+               ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
+       }
         hdr->b_datacnt += 1;
         return (buf);
  }
@@ -1471,6 +1484,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
                 ASSERT3U(state->arcs_size, >=, size);
                 atomic_add_64(&state->arcs_size, -size);
                 buf->b_data = NULL;
+
+               /*
+                * If we're destroying a duplicate buffer make sure
+                * that the appropriate statistics are updated.
+                */
+               if (buf->b_hdr->b_datacnt > 1 &&
+                   buf->b_hdr->b_type == ARC_BUFC_DATA) {
+                       ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+                       ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
+               }
                 ASSERT(buf->b_hdr->b_datacnt > 0);
                 buf->b_hdr->b_datacnt -= 1;
         }
@@ -1522,6 +1545,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
                         list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
                         ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
                         kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
+                       arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
                         if (hdr->b_state == arc_l2c_only)
                                 l2arc_hdr_stat_remove();
                         hdr->b_l2hdr = NULL;
@@ -1619,7 +1643,7 @@ int
  arc_buf_remove_ref(arc_buf_t *buf, void* tag)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
-       kmutex_t *hash_lock = HDR_LOCK(hdr);
+       kmutex_t *hash_lock = NULL;
         int no_callback = (buf->b_efunc == NULL);
  
         if (hdr->b_state == arc_anon) {
@@ -1628,6 +1652,7 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag)
                 return (no_callback);
         }
  
+       hash_lock = HDR_LOCK(hdr);
         mutex_enter(hash_lock);
         hdr = buf->b_hdr;
         ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
@@ -1656,6 +1681,48 @@ arc_buf_size(arc_buf_t *buf)
  }
  
  /*
+ * Called from the DMU to determine if the current buffer should be
+ * evicted. In order to ensure proper locking, the eviction must be initiated
+ * from the DMU. Return true if the buffer is associated with user data and
+ * duplicate buffers still exist.
+ */
+boolean_t
+arc_buf_eviction_needed(arc_buf_t *buf)
+{
+       arc_buf_hdr_t *hdr;
+       boolean_t evict_needed = B_FALSE;
+
+       if (zfs_disable_dup_eviction)
+               return (B_FALSE);
+
+       mutex_enter(&buf->b_evict_lock);
+       hdr = buf->b_hdr;
+       if (hdr == NULL) {
+               /*
+                * We are in arc_do_user_evicts(); let that function
+                * perform the eviction.
+                */
+               ASSERT(buf->b_data == NULL);
+               mutex_exit(&buf->b_evict_lock);
+               return (B_FALSE);
+       } else if (buf->b_data == NULL) {
+               /*
+                * We have already been added to the arc eviction list;
+                * recommend eviction.
+                */
+               ASSERT3P(hdr, ==, &arc_eviction_hdr);
+               mutex_exit(&buf->b_evict_lock);
+               return (B_TRUE);
+       }
+
+       if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+               evict_needed = B_TRUE;
+
+       mutex_exit(&buf->b_evict_lock);
+       return (evict_needed);
+}
+
+/*
   * Evict buffers from list until we've removed the specified number of
   * bytes.  Move the removed buffers to the appropriate evict state.
   * If the recycle flag is set, then attempt to "recycle" a buffer:
@@ -1694,7 +1761,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
                     (spa && ab->b_spa != spa) ||
                     (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) &&
                     ddi_get_lbolt() - ab->b_arc_access <
-                   arc_min_prefetch_lifespan)) {
+                   zfs_arc_min_prefetch_lifespan)) {
                         skipped++;
                         continue;
                 }
@@ -1705,7 +1772,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
                 hash_lock = HDR_LOCK(ab);
                 have_lock = MUTEX_HELD(hash_lock);
                 if (have_lock || mutex_tryenter(hash_lock)) {
-                       ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0);
+                       ASSERT0(refcount_count(&ab->b_refcnt));
                         ASSERT(ab->b_datacnt > 0);
                         while (ab->b_buf) {
                                 arc_buf_t *buf = ab->b_buf;
@@ -2045,7 +2112,7 @@ arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
         }
  
         if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
-               arc_do_user_prune(arc_meta_prune);
+               arc_do_user_prune(zfs_arc_meta_prune);
  }
  
  /*
@@ -2096,14 +2163,14 @@ arc_shrink(uint64_t bytes)
         if (arc_c > arc_c_min) {
                 uint64_t to_free;
  
-               to_free = bytes ? bytes : arc_c >> arc_shrink_shift;
+               to_free = bytes ? bytes : arc_c >> zfs_arc_shrink_shift;
  
                 if (arc_c > arc_c_min + to_free)
                         atomic_add_64(&arc_c, -to_free);
                 else
                         arc_c = arc_c_min;
  
-               atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
+               atomic_add_64(&arc_p, -(arc_p >> zfs_arc_shrink_shift));
                 if (arc_c > arc_size)
                         arc_c = MAX(arc_size, arc_c_min);
                 if (arc_p > arc_c)
@@ -2182,7 +2249,7 @@ arc_adapt_thread(void)
                         }
  
                         /* reset the growth delay for every reclaim */
-                       arc_grow_time = ddi_get_lbolt()+(arc_grow_retry * hz);
+                       arc_grow_time = ddi_get_lbolt()+(zfs_arc_grow_retry * hz);
  
                         arc_kmem_reap_now(last_reclaim, 0);
                         arc_warm = B_TRUE;
@@ -2212,6 +2279,26 @@ arc_adapt_thread(void)
                 (void) cv_timedwait_interruptible(&arc_reclaim_thr_cv,
                     &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz));
                 CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
+
+
+               /* Allow the module options to be changed */
+               if (zfs_arc_max > 64 << 20 &&
+                   zfs_arc_max < physmem * PAGESIZE &&
+                   zfs_arc_max != arc_c_max)
+                       arc_c_max = zfs_arc_max;
+
+               if (zfs_arc_min > 0 &&
+                   zfs_arc_min < arc_c_max &&
+                   zfs_arc_min != arc_c_min)
+                       arc_c_min = zfs_arc_min;
+
+               if (zfs_arc_meta_limit > 0 &&
+                   zfs_arc_meta_limit <= arc_c_max &&
+                   zfs_arc_meta_limit != arc_meta_limit)
+                       arc_meta_limit = zfs_arc_meta_limit;
+
+
+
         }
  
         arc_thread_exit = 0;
@@ -2332,7 +2419,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
                 ARCSTAT_BUMP(arcstat_memory_indirect_count);
         } else {
                 arc_no_grow = B_TRUE;
-               arc_grow_time = ddi_get_lbolt() + (arc_grow_retry * hz);
+               arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
                 ARCSTAT_BUMP(arcstat_memory_direct_count);
         }
  
@@ -2354,7 +2441,7 @@ static void
  arc_adapt(int bytes, arc_state_t *state)
  {
         int mult;
-       uint64_t arc_p_min = (arc_c >> arc_p_min_shift);
+       uint64_t arc_p_min = (arc_c >> zfs_arc_p_min_shift);
  
         if (state == arc_l2c_only)
                 return;
@@ -2418,18 +2505,6 @@ arc_evict_needed(arc_buf_contents_t type)
         if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
                 return (1);
  
-#ifdef _KERNEL
-       /*
-        * If zio data pages are being allocated out of a separate heap segment,
-        * then enforce that the size of available vmem for this area remains
-        * above about 1/32nd free.
-        */
-       if (type == ARC_BUFC_DATA && zio_arena != NULL &&
-           vmem_size(zio_arena, VMEM_FREE) <
-           (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
-               return (1);
-#endif
-
         if (arc_no_grow)
                 return (1);
  
@@ -2663,7 +2738,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
                          * This is a prefetch access...
                          * move this block back to the MRU state.
                          */
-                       ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0);
+                       ASSERT0(refcount_count(&buf->b_refcnt));
                         new_state = arc_mru;
                 }
  
@@ -2745,10 +2820,12 @@ arc_read_done(zio_t *zio)
         callback_list = hdr->b_acb;
         ASSERT(callback_list != NULL);
         if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
-               arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
-                   byteswap_uint64_array :
-                   dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
-               func(buf->b_data, hdr->b_size);
+               dmu_object_byteswap_t bswap =
+                   DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
+               if (BP_GET_LEVEL(zio->io_bp) > 0)
+                   byteswap_uint64_array(buf->b_data, hdr->b_size);
+               else
+                   dmu_ot_byteswap[bswap].ob_func(buf->b_data, hdr->b_size);
         }
  
         arc_cksum_compute(buf, B_FALSE);
@@ -2767,8 +2844,10 @@ arc_read_done(zio_t *zio)
         abuf = buf;
         for (acb = callback_list; acb; acb = acb->acb_next) {
                 if (acb->acb_done) {
-                       if (abuf == NULL)
+                       if (abuf == NULL) {
+                               ARCSTAT_BUMP(arcstat_duplicate_reads);
                                 abuf = arc_buf_clone(buf);
+                       }
                         acb->acb_buf = abuf;
                         abuf = NULL;
                 }
@@ -2832,7 +2911,7 @@ arc_read_done(zio_t *zio)
  }
  
  /*
- * "Read" the block block at the specified DVA (in bp) via the
+ * "Read" the block at the specified DVA (in bp) via the
   * cache.  If the block is found in the cache, invoke the provided
   * callback immediately and return.  Note that the `zio' parameter
   * in the callback will be NULL in this case, since no IO was
@@ -2848,42 +2927,11 @@ arc_read_done(zio_t *zio)
   *
   * arc_read_done() will invoke all the requested "done" functions
   * for readers of this block.
- *
- * Normal callers should use arc_read and pass the arc buffer and offset
- * for the bp.  But if you know you don't need locking, you can use
- * arc_read_bp.
   */
  int
-arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
-    arc_done_func_t *done, void *private, int priority, int zio_flags,
-    uint32_t *arc_flags, const zbookmark_t *zb)
-{
-       int err;
-
-       if (pbuf == NULL) {
-               /*
-                * XXX This happens from traverse callback funcs, for
-                * the objset_phys_t block.
-                */
-               return (arc_read_nolock(pio, spa, bp, done, private, priority,
-                   zio_flags, arc_flags, zb));
-       }
-
-       ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
-       ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
-       rw_enter(&pbuf->b_data_lock, RW_READER);
-
-       err = arc_read_nolock(pio, spa, bp, done, private, priority,
-           zio_flags, arc_flags, zb);
-       rw_exit(&pbuf->b_data_lock);
-
-       return (err);
-}
-
-int
-arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
-    arc_done_func_t *done, void *private, int priority, int zio_flags,
-    uint32_t *arc_flags, const zbookmark_t *zb)
+arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
+    void *private, int priority, int zio_flags, uint32_t *arc_flags,
+    const zbookmark_t *zb)
  {
         arc_buf_hdr_t *hdr;
         arc_buf_t *buf = NULL;
@@ -3002,7 +3050,7 @@ top:
                         /* this block is in the ghost cache */
                         ASSERT(GHOST_STATE(hdr->b_state));
                         ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-                       ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0);
+                       ASSERT0(refcount_count(&hdr->b_refcnt));
                         ASSERT(hdr->b_buf == NULL);
  
                         /* if this is a prefetch, we don't have a reference */
@@ -3182,6 +3230,34 @@ arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
  }
  
  /*
+ * Notify the arc that a block was freed, and thus will never be used again.
+ */
+void
+arc_freed(spa_t *spa, const blkptr_t *bp)
+{
+       arc_buf_hdr_t *hdr;
+       kmutex_t *hash_lock;
+       uint64_t guid = spa_load_guid(spa);
+
+       hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
+           &hash_lock);
+       if (hdr == NULL)
+               return;
+       if (HDR_BUF_AVAILABLE(hdr)) {
+               arc_buf_t *buf = hdr->b_buf;
+               add_reference(hdr, hash_lock, FTAG);
+               hdr->b_flags &= ~ARC_BUF_AVAILABLE;
+               mutex_exit(hash_lock);
+
+               arc_release(buf, FTAG);
+               (void) arc_buf_remove_ref(buf, FTAG);
+       } else {
+               mutex_exit(hash_lock);
+       }
+
+}
+
+/*
   * This is used by the DMU to let the ARC know that a buffer is
   * being evicted, so the ARC should clean up.  If this arc buf
   * is not yet in the evicted state, it will be put there.
@@ -3338,6 +3414,16 @@ arc_release(arc_buf_t *buf, void *tag)
                         ASSERT3U(*size, >=, hdr->b_size);
                         atomic_add_64(size, -hdr->b_size);
                 }
+
+               /*
+                * We're releasing a duplicate user data buffer, update
+                * our statistics accordingly.
+                */
+               if (hdr->b_type == ARC_BUFC_DATA) {
+                       ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+                       ARCSTAT_INCR(arcstat_duplicate_buffers_size,
+                           -hdr->b_size);
+               }
                 hdr->b_datacnt -= 1;
                 arc_cksum_verify(buf);
  
@@ -3378,24 +3464,12 @@ arc_release(arc_buf_t *buf, void *tag)
         if (l2hdr) {
                 list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
                 kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
+               arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
                 ARCSTAT_INCR(arcstat_l2_size, -buf_size);
                 mutex_exit(&l2arc_buflist_mtx);
         }
  }
  
-/*
- * Release this buffer.  If it does not match the provided BP, fill it
- * with that block's contents.
- */
-/* ARGSUSED */
-int
-arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
-    zbookmark_t *zb)
-{
-       arc_release(buf, tag);
-       return (0);
-}
-
  int
  arc_released(arc_buf_t *buf)
  {
@@ -3565,12 +3639,11 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg)
  #ifdef _KERNEL
         uint64_t available_memory;
  
+       if (zfs_arc_memory_throttle_disable)
+               return (0);
+
         /* Easily reclaimable memory (free + inactive + arc-evictable) */
         available_memory = ptob(spl_kmem_availrmem()) + arc_evictable_memory();
-#if defined(__i386)
-       available_memory =
-           MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
-#endif
  
         if (available_memory <= zfs_write_limit_max) {
                 ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
@@ -3703,7 +3776,7 @@ arc_init(void)
         cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
  
         /* Convert seconds to clock ticks */
-       arc_min_prefetch_lifespan = 1 * hz;
+       zfs_arc_min_prefetch_lifespan = 1 * hz;
  
         /* Start out with 1/8 of all memory */
         arc_c = physmem * PAGESIZE / 8;
@@ -3751,18 +3824,6 @@ arc_init(void)
         if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
                 arc_c_min = arc_meta_limit / 2;
  
-       if (zfs_arc_grow_retry > 0)
-               arc_grow_retry = zfs_arc_grow_retry;
-
-       if (zfs_arc_shrink_shift > 0)
-               arc_shrink_shift = zfs_arc_shrink_shift;
-
-       if (zfs_arc_p_min_shift > 0)
-               arc_p_min_shift = zfs_arc_p_min_shift;
-
-       if (zfs_arc_meta_prune > 0)
-               arc_meta_prune = zfs_arc_meta_prune;
-
         /* if kmem_flags are set, lets try to use less memory */
         if (kmem_debugging())
                 arc_c = arc_c / 2;
@@ -4091,14 +4152,14 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
  static void
  l2arc_hdr_stat_add(void)
  {
-       ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE);
+       ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE);
         ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE);
  }
  
  static void
  l2arc_hdr_stat_remove(void)
  {
-       ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE));
+       ARCSTAT_INCR(arcstat_l2_hdr_size, -HDR_SIZE);
         ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE);
  }
  
@@ -4242,6 +4303,7 @@ l2arc_write_done(zio_t *zio)
                         abl2 = ab->b_l2hdr;
                         ab->b_l2hdr = NULL;
                         kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+                       arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
                         ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
                 }
  
@@ -4488,6 +4550,7 @@ top:
                                 abl2 = ab->b_l2hdr;
                                 ab->b_l2hdr = NULL;
                                 kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+                               arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
                                 ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
                         }
                         list_remove(buflist, ab);
@@ -4613,6 +4676,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                                             KM_PUSHPAGE);
                         hdrl2->b_dev = dev;
                         hdrl2->b_daddr = dev->l2ad_hand;
+                       arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS);
  
                         ab->b_flags |= ARC_L2_WRITING;
                         ab->b_l2hdr = hdrl2;
@@ -4655,7 +4719,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
         mutex_exit(&l2arc_buflist_mtx);
  
         if (pio == NULL) {
-               ASSERT3U(write_sz, ==, 0);
+               ASSERT0(write_sz);
                 kmem_cache_free(hdr_cache, head);
                 return (0);
         }
@@ -4957,49 +5021,58 @@ EXPORT_SYMBOL(arc_getbuf_func);
  EXPORT_SYMBOL(arc_add_prune_callback);
  EXPORT_SYMBOL(arc_remove_prune_callback);
  
-module_param(zfs_arc_min, ulong, 0444);
+module_param(zfs_arc_min, ulong, 0644);
  MODULE_PARM_DESC(zfs_arc_min, "Min arc size");
  
-module_param(zfs_arc_max, ulong, 0444);
+module_param(zfs_arc_max, ulong, 0644);
  MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
  
-module_param(zfs_arc_meta_limit, ulong, 0444);
+module_param(zfs_arc_meta_limit, ulong, 0644);
  MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
  
-module_param(zfs_arc_meta_prune, int, 0444);
+module_param(zfs_arc_meta_prune, int, 0644);
  MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune");
  
-module_param(zfs_arc_grow_retry, int, 0444);
+module_param(zfs_arc_grow_retry, int, 0644);
  MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
  
-module_param(zfs_arc_shrink_shift, int, 0444);
+module_param(zfs_arc_shrink_shift, int, 0644);
  MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)");
  
-module_param(zfs_arc_p_min_shift, int, 0444);
+module_param(zfs_arc_p_min_shift, int, 0644);
  MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p");
  
-module_param(l2arc_write_max, ulong, 0444);
+module_param(zfs_disable_dup_eviction, int, 0644);
+MODULE_PARM_DESC(zfs_disable_dup_eviction, "disable duplicate buffer eviction");
+
+module_param(zfs_arc_memory_throttle_disable, int, 0644);
+MODULE_PARM_DESC(zfs_arc_memory_throttle_disable, "disable memory throttle");
+
+module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
+MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");
+
+module_param(l2arc_write_max, ulong, 0644);
  MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");
  
-module_param(l2arc_write_boost, ulong, 0444);
+module_param(l2arc_write_boost, ulong, 0644);
  MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup");
  
-module_param(l2arc_headroom, ulong, 0444);
+module_param(l2arc_headroom, ulong, 0644);
  MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache");
  
-module_param(l2arc_feed_secs, ulong, 0444);
+module_param(l2arc_feed_secs, ulong, 0644);
  MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");
  
-module_param(l2arc_feed_min_ms, ulong, 0444);
+module_param(l2arc_feed_min_ms, ulong, 0644);
  MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds");
  
-module_param(l2arc_noprefetch, int, 0444);
+module_param(l2arc_noprefetch, int, 0644);
  MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers");
  
-module_param(l2arc_feed_again, int, 0444);
+module_param(l2arc_feed_again, int, 0644);
  MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
  
-module_param(l2arc_norw, int, 0444);
+module_param(l2arc_norw, int, 0644);
  MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
  
  #endif