Fix stack lzjb
[zfs.git] / module / zfs / arc.c
index 8adb54d..5ac73e1 100644 (file)
@@ -794,6 +794,8 @@ hdr_cons(void *vbuf, void *unused, int kmflag)
        refcount_create(&buf->b_refcnt);
        cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
        mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
+       list_link_init(&buf->b_arc_node);
+       list_link_init(&buf->b_l2node);
        arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
 
        return (0);
@@ -952,11 +954,6 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
-       kmutex_t *hash_lock;
-
-       hash_lock = HDR_LOCK(buf->b_hdr);
-       mutex_enter(hash_lock);
-
        if (zfs_flags & ZFS_DEBUG_MODIFY) {
                if (buf->b_hdr->b_state != arc_anon)
                        panic("modifying non-anon buffer!");
@@ -978,7 +975,6 @@ arc_buf_thaw(arc_buf_t *buf)
        }
 
        mutex_exit(&buf->b_hdr->b_freeze_lock);
-       mutex_exit(hash_lock);
 }
 
 void
@@ -1149,6 +1145,8 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
        ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES);
 
        switch (type) {
+       default:
+               break;
        case ARC_SPACE_DATA:
                ARCSTAT_INCR(arcstat_data_size, space);
                break;
@@ -1173,6 +1171,8 @@ arc_space_return(uint64_t space, arc_space_type_t type)
        ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES);
 
        switch (type) {
+       default:
+               break;
        case ARC_SPACE_DATA:
                ARCSTAT_INCR(arcstat_data_size, -space);
                break;
@@ -1432,10 +1432,11 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
 static void
 arc_hdr_destroy(arc_buf_hdr_t *hdr)
 {
+       l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+
        ASSERT(refcount_is_zero(&hdr->b_refcnt));
        ASSERT3P(hdr->b_state, ==, arc_anon);
        ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-       l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
 
        if (l2hdr != NULL) {
                boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
@@ -1709,7 +1710,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
        mutex_exit(&state->arcs_mtx);
 
        if (bytes_evicted < bytes)
-               dprintf("only evicted %lld bytes from %x",
+               dprintf("only evicted %lld bytes from %x\n",
                    (longlong_t)bytes_evicted, state);
 
        if (skipped)
@@ -1730,12 +1731,12 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
                if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) {
                        int64_t todelete =
                            MIN(arc_mru_ghost->arcs_lsize[type], mru_over);
-                       arc_evict_ghost(arc_mru_ghost, NULL, todelete);
+                       arc_evict_ghost(arc_mru_ghost, 0, todelete);
                } else if (arc_mfu_ghost->arcs_lsize[type] > 0) {
                        int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type],
                            arc_mru_ghost->arcs_size +
                            arc_mfu_ghost->arcs_size - arc_c);
-                       arc_evict_ghost(arc_mfu_ghost, NULL, todelete);
+                       arc_evict_ghost(arc_mfu_ghost, 0, todelete);
                }
        }
 
@@ -1750,18 +1751,25 @@ static void
 arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes)
 {
        arc_buf_hdr_t *ab, *ab_prev;
+       arc_buf_hdr_t marker;
        list_t *list = &state->arcs_list[ARC_BUFC_DATA];
        kmutex_t *hash_lock;
        uint64_t bytes_deleted = 0;
        uint64_t bufs_skipped = 0;
 
        ASSERT(GHOST_STATE(state));
+       bzero(&marker, sizeof(marker));
 top:
        mutex_enter(&state->arcs_mtx);
        for (ab = list_tail(list); ab; ab = ab_prev) {
                ab_prev = list_prev(list, ab);
                if (spa && ab->b_spa != spa)
                        continue;
+
+               /* ignore markers */
+               if (ab->b_spa == 0)
+                       continue;
+
                hash_lock = HDR_LOCK(ab);
                /* caller may be trying to modify this buffer, skip it */
                if (MUTEX_HELD(hash_lock))
@@ -1788,15 +1796,21 @@ top:
                        DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab);
                        if (bytes >= 0 && bytes_deleted >= bytes)
                                break;
-               } else {
-                       if (bytes < 0) {
-                               mutex_exit(&state->arcs_mtx);
-                               mutex_enter(hash_lock);
-                               mutex_exit(hash_lock);
-                               goto top;
-                       }
+               } else if (bytes < 0) {
+                       /*
+                        * Insert a list marker and then wait for the
+                        * hash lock to become available. Once its
+                        * available, restart from where we left off.
+                        */
+                       list_insert_after(list, ab, &marker);
+                       mutex_exit(&state->arcs_mtx);
+                       mutex_enter(hash_lock);
+                       mutex_exit(hash_lock);
+                       mutex_enter(&state->arcs_mtx);
+                       ab_prev = list_prev(list, &marker);
+                       list_remove(list, &marker);
+               } else
                        bufs_skipped += 1;
-               }
        }
        mutex_exit(&state->arcs_mtx);
 
@@ -1812,7 +1826,7 @@ top:
        }
 
        if (bytes_deleted < bytes)
-               dprintf("only deleted %lld bytes from %p",
+               dprintf("only deleted %lld bytes from %p\n",
                    (longlong_t)bytes_deleted, state);
 }
 
@@ -1825,18 +1839,19 @@ arc_adjust(void)
         * Adjust MRU size
         */
 
-       adjustment = MIN(arc_size - arc_c,
-           arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used - arc_p);
+       adjustment = MIN((int64_t)(arc_size - arc_c),
+           (int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used -
+           arc_p));
 
        if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) {
                delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment);
-               (void) arc_evict(arc_mru, NULL, delta, FALSE, ARC_BUFC_DATA);
+               (void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_DATA);
                adjustment -= delta;
        }
 
        if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
                delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
-               (void) arc_evict(arc_mru, NULL, delta, FALSE,
+               (void) arc_evict(arc_mru, 0, delta, FALSE,
                    ARC_BUFC_METADATA);
        }
 
@@ -1848,14 +1863,14 @@ arc_adjust(void)
 
        if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) {
                delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]);
-               (void) arc_evict(arc_mfu, NULL, delta, FALSE, ARC_BUFC_DATA);
+               (void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_DATA);
                adjustment -= delta;
        }
 
        if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
                int64_t delta = MIN(adjustment,
                    arc_mfu->arcs_lsize[ARC_BUFC_METADATA]);
-               (void) arc_evict(arc_mfu, NULL, delta, FALSE,
+               (void) arc_evict(arc_mfu, 0, delta, FALSE,
                    ARC_BUFC_METADATA);
        }
 
@@ -1867,7 +1882,7 @@ arc_adjust(void)
 
        if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
                delta = MIN(arc_mru_ghost->arcs_size, adjustment);
-               arc_evict_ghost(arc_mru_ghost, NULL, delta);
+               arc_evict_ghost(arc_mru_ghost, 0, delta);
        }
 
        adjustment =
@@ -1875,7 +1890,7 @@ arc_adjust(void)
 
        if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
                delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
-               arc_evict_ghost(arc_mfu_ghost, NULL, delta);
+               arc_evict_ghost(arc_mfu_ghost, 0, delta);
        }
 }
 
@@ -1976,9 +1991,8 @@ arc_shrink(void)
 static int
 arc_reclaim_needed(void)
 {
-       uint64_t extra;
-
 #ifdef _KERNEL
+       uint64_t extra;
 
        if (needfree)
                return (1);
@@ -2113,9 +2127,7 @@ arc_reclaim_thread(void)
                        arc_no_grow = FALSE;
                }
 
-               if (2 * arc_c < arc_size +
-                   arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size)
-                       arc_adjust();
+               arc_adjust();
 
                if (arc_eviction_list != NULL)
                        arc_do_user_evicts();
@@ -2159,6 +2171,7 @@ arc_adapt(int bytes, arc_state_t *state)
        if (state == arc_mru_ghost) {
                mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
                    1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
+               mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
 
                arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult);
        } else if (state == arc_mfu_ghost) {
@@ -2166,6 +2179,7 @@ arc_adapt(int bytes, arc_state_t *state)
 
                mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
                    1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
+               mult = MIN(mult, 10);
 
                delta = MIN(bytes * mult, arc_p);
                arc_p = MAX(arc_p_min, arc_p - delta);
@@ -2295,7 +2309,7 @@ arc_get_data_buf(arc_buf_t *buf)
                state =  (arc_mru->arcs_lsize[type] >= size &&
                    mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
        }
-       if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) {
+       if ((buf->b_data = arc_evict(state, 0, size, TRUE, type)) == NULL) {
                if (type == ARC_BUFC_METADATA) {
                        buf->b_data = zio_buf_alloc(size);
                        arc_space_consume(size, ARC_SPACE_DATA);
@@ -2666,7 +2680,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
     uint32_t *arc_flags, const zbookmark_t *zb)
 {
        arc_buf_hdr_t *hdr;
-       arc_buf_t *buf;
+       arc_buf_t *buf = NULL;
        kmutex_t *hash_lock;
        zio_t *rzio;
        uint64_t guid = spa_guid(spa);
@@ -2748,7 +2762,7 @@ top:
                uint64_t size = BP_GET_LSIZE(bp);
                arc_callback_t  *acb;
                vdev_t *vd = NULL;
-               uint64_t addr;
+               daddr_t addr = -1;
                boolean_t devw = B_FALSE;
 
                if (hdr == NULL) {
@@ -3026,7 +3040,7 @@ arc_release(arc_buf_t *buf, void *tag)
        arc_buf_hdr_t *hdr;
        kmutex_t *hash_lock = NULL;
        l2arc_buf_hdr_t *l2hdr;
-       uint64_t buf_size;
+       uint64_t buf_size = 0;
 
        /*
         * It would be nice to assert that if it's DMU metadata (level >
@@ -3401,7 +3415,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
         * in order to compress/encrypt/etc the data.  We therefor need to
         * make sure that there is sufficient available memory for this.
         */
-       if (error = arc_memory_throttle(reserve, anon_size, txg))
+       if ((error = arc_memory_throttle(reserve, anon_size, txg)))
                return (error);
 
        /*
@@ -3870,7 +3884,7 @@ out:
  * Free buffers that were tagged for destruction.
  */
 static void
-l2arc_do_free_on_write()
+l2arc_do_free_on_write(void)
 {
        list_t *buflist;
        l2arc_data_free_t *df, *df_prev;
@@ -4050,7 +4064,7 @@ l2arc_read_done(zio_t *zio)
 static list_t *
 l2arc_list_locked(int list_num, kmutex_t **lock)
 {
-       list_t *list;
+       list_t *list = NULL;
 
        ASSERT(list_num >= 0 && list_num <= 3);
 
@@ -4223,11 +4237,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
        list_t *list;
        uint64_t passed_sz, write_sz, buf_sz, headroom;
        void *buf_data;
-       kmutex_t *hash_lock, *list_lock;
+       kmutex_t *hash_lock, *list_lock = NULL;
        boolean_t have_lock, full;
        l2arc_write_callback_t *cb;
        zio_t *pio, *wzio;
        uint64_t guid = spa_guid(spa);
+       int try;
 
        ASSERT(dev->l2ad_vdev != NULL);
 
@@ -4241,7 +4256,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
         * Copy buffers for L2ARC writing.
         */
        mutex_enter(&l2arc_buflist_mtx);
-       for (int try = 0; try <= 3; try++) {
+       for (try = 0; try <= 3; try++) {
                list = l2arc_list_locked(try, &list_lock);
                passed_sz = 0;
 
@@ -4438,6 +4453,16 @@ l2arc_feed_thread(void)
                ASSERT(spa != NULL);
 
                /*
+                * If the pool is read-only then force the feed thread to
+                * sleep a little longer.
+                */
+               if (!spa_writeable(spa)) {
+                       next = ddi_get_lbolt() + 5 * l2arc_feed_secs * hz;
+                       spa_config_exit(spa, SCL_L2ARC, dev);
+                       continue;
+               }
+
+               /*
                 * Avoid contributing to memory pressure.
                 */
                if (arc_reclaim_needed()) {
@@ -4514,6 +4539,7 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
        adddev->l2ad_evict = adddev->l2ad_start;
        adddev->l2ad_first = B_TRUE;
        adddev->l2ad_writing = B_FALSE;
+       list_link_init(&adddev->l2ad_node);
        ASSERT3U(adddev->l2ad_write, >, 0);
 
        /*