X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Farc.c;h=5ac73e1158ca6033ce54a25d03ad4b4d4f0bae4d;hb=c9c0d073da561bcbefbdf09c87fc75b227415619;hp=8adb54dc6e195748d56578f3aa1e8eb213a42f86;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 8adb54d..5ac73e1 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -794,6 +794,8 @@ hdr_cons(void *vbuf, void *unused, int kmflag) refcount_create(&buf->b_refcnt); cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL); mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); + list_link_init(&buf->b_arc_node); + list_link_init(&buf->b_l2node); arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS); return (0); @@ -952,11 +954,6 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force) void arc_buf_thaw(arc_buf_t *buf) { - kmutex_t *hash_lock; - - hash_lock = HDR_LOCK(buf->b_hdr); - mutex_enter(hash_lock); - if (zfs_flags & ZFS_DEBUG_MODIFY) { if (buf->b_hdr->b_state != arc_anon) panic("modifying non-anon buffer!"); @@ -978,7 +975,6 @@ arc_buf_thaw(arc_buf_t *buf) } mutex_exit(&buf->b_hdr->b_freeze_lock); - mutex_exit(hash_lock); } void @@ -1149,6 +1145,8 @@ arc_space_consume(uint64_t space, arc_space_type_t type) ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); switch (type) { + default: + break; case ARC_SPACE_DATA: ARCSTAT_INCR(arcstat_data_size, space); break; @@ -1173,6 +1171,8 @@ arc_space_return(uint64_t space, arc_space_type_t type) ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); switch (type) { + default: + break; case ARC_SPACE_DATA: ARCSTAT_INCR(arcstat_data_size, -space); break; @@ -1432,10 +1432,11 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) static void arc_hdr_destroy(arc_buf_hdr_t *hdr) { + l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr; + ASSERT(refcount_is_zero(&hdr->b_refcnt)); ASSERT3P(hdr->b_state, ==, arc_anon); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr; if (l2hdr != NULL) { boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx); @@ -1709,7 +1710,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, mutex_exit(&state->arcs_mtx); if (bytes_evicted < bytes) - dprintf("only evicted %lld bytes from %x", + dprintf("only evicted %lld bytes from %x\n", (longlong_t)bytes_evicted, state); if (skipped) @@ -1730,12 +1731,12 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) { int64_t todelete = MIN(arc_mru_ghost->arcs_lsize[type], mru_over); - arc_evict_ghost(arc_mru_ghost, NULL, todelete); + arc_evict_ghost(arc_mru_ghost, 0, todelete); } else if (arc_mfu_ghost->arcs_lsize[type] > 0) { int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type], arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size - arc_c); - arc_evict_ghost(arc_mfu_ghost, NULL, todelete); + arc_evict_ghost(arc_mfu_ghost, 0, todelete); } } @@ -1750,18 +1751,25 @@ static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes) { arc_buf_hdr_t *ab, *ab_prev; + arc_buf_hdr_t marker; list_t *list = &state->arcs_list[ARC_BUFC_DATA]; kmutex_t *hash_lock; uint64_t bytes_deleted = 0; uint64_t bufs_skipped = 0; ASSERT(GHOST_STATE(state)); + bzero(&marker, sizeof(marker)); top: mutex_enter(&state->arcs_mtx); for (ab = list_tail(list); ab; ab = ab_prev) { ab_prev = list_prev(list, ab); if (spa && ab->b_spa != spa) continue; + + /* ignore markers */ + if (ab->b_spa == 0) + continue; + hash_lock = HDR_LOCK(ab); /* caller may be trying to modify this buffer, skip it */ if (MUTEX_HELD(hash_lock)) @@ -1788,15 +1796,21 @@ top: DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab); if (bytes >= 0 && bytes_deleted >= bytes) break; - } else { - if (bytes < 0) { - mutex_exit(&state->arcs_mtx); - mutex_enter(hash_lock); - mutex_exit(hash_lock); - goto top; - } + } else if (bytes < 0) { + /* + * Insert a list marker and then wait for the + * hash lock to become available. Once its + * available, restart from where we left off. + */ + list_insert_after(list, ab, &marker); + mutex_exit(&state->arcs_mtx); + mutex_enter(hash_lock); + mutex_exit(hash_lock); + mutex_enter(&state->arcs_mtx); + ab_prev = list_prev(list, &marker); + list_remove(list, &marker); + } else bufs_skipped += 1; - } } mutex_exit(&state->arcs_mtx); @@ -1812,7 +1826,7 @@ top: } if (bytes_deleted < bytes) - dprintf("only deleted %lld bytes from %p", + dprintf("only deleted %lld bytes from %p\n", (longlong_t)bytes_deleted, state); } @@ -1825,18 +1839,19 @@ arc_adjust(void) * Adjust MRU size */ - adjustment = MIN(arc_size - arc_c, - arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used - arc_p); + adjustment = MIN((int64_t)(arc_size - arc_c), + (int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used - + arc_p)); if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) { delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment); - (void) arc_evict(arc_mru, NULL, delta, FALSE, ARC_BUFC_DATA); + (void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_DATA); adjustment -= delta; } if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) { delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment); - (void) arc_evict(arc_mru, NULL, delta, FALSE, + (void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA); } @@ -1848,14 +1863,14 @@ arc_adjust(void) if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) { delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]); - (void) arc_evict(arc_mfu, NULL, delta, FALSE, ARC_BUFC_DATA); + (void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_DATA); adjustment -= delta; } if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) { int64_t delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_METADATA]); - (void) arc_evict(arc_mfu, NULL, delta, FALSE, + (void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA); } @@ -1867,7 +1882,7 @@ arc_adjust(void) if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) { delta = MIN(arc_mru_ghost->arcs_size, adjustment); - arc_evict_ghost(arc_mru_ghost, NULL, delta); + arc_evict_ghost(arc_mru_ghost, 0, delta); } adjustment = @@ -1875,7 +1890,7 @@ arc_adjust(void) if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) { delta = MIN(arc_mfu_ghost->arcs_size, adjustment); - arc_evict_ghost(arc_mfu_ghost, NULL, delta); + arc_evict_ghost(arc_mfu_ghost, 0, delta); } } @@ -1976,9 +1991,8 @@ arc_shrink(void) static int arc_reclaim_needed(void) { - uint64_t extra; - #ifdef _KERNEL + uint64_t extra; if (needfree) return (1); @@ -2113,9 +2127,7 @@ arc_reclaim_thread(void) arc_no_grow = FALSE; } - if (2 * arc_c < arc_size + - arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size) - arc_adjust(); + arc_adjust(); if (arc_eviction_list != NULL) arc_do_user_evicts(); @@ -2159,6 +2171,7 @@ arc_adapt(int bytes, arc_state_t *state) if (state == arc_mru_ghost) { mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ? 1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size)); + mult = MIN(mult, 10); /* avoid wild arc_p adjustment */ arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult); } else if (state == arc_mfu_ghost) { @@ -2166,6 +2179,7 @@ arc_adapt(int bytes, arc_state_t *state) mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ? 1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size)); + mult = MIN(mult, 10); delta = MIN(bytes * mult, arc_p); arc_p = MAX(arc_p_min, arc_p - delta); @@ -2295,7 +2309,7 @@ arc_get_data_buf(arc_buf_t *buf) state = (arc_mru->arcs_lsize[type] >= size && mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu; } - if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) { + if ((buf->b_data = arc_evict(state, 0, size, TRUE, type)) == NULL) { if (type == ARC_BUFC_METADATA) { buf->b_data = zio_buf_alloc(size); arc_space_consume(size, ARC_SPACE_DATA); @@ -2666,7 +2680,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, uint32_t *arc_flags, const zbookmark_t *zb) { arc_buf_hdr_t *hdr; - arc_buf_t *buf; + arc_buf_t *buf = NULL; kmutex_t *hash_lock; zio_t *rzio; uint64_t guid = spa_guid(spa); @@ -2748,7 +2762,7 @@ top: uint64_t size = BP_GET_LSIZE(bp); arc_callback_t *acb; vdev_t *vd = NULL; - uint64_t addr; + daddr_t addr = -1; boolean_t devw = B_FALSE; if (hdr == NULL) { @@ -3026,7 +3040,7 @@ arc_release(arc_buf_t *buf, void *tag) arc_buf_hdr_t *hdr; kmutex_t *hash_lock = NULL; l2arc_buf_hdr_t *l2hdr; - uint64_t buf_size; + uint64_t buf_size = 0; /* * It would be nice to assert that if it's DMU metadata (level > @@ -3401,7 +3415,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) * in order to compress/encrypt/etc the data. We therefor need to * make sure that there is sufficient available memory for this. */ - if (error = arc_memory_throttle(reserve, anon_size, txg)) + if ((error = arc_memory_throttle(reserve, anon_size, txg))) return (error); /* @@ -3870,7 +3884,7 @@ out: * Free buffers that were tagged for destruction. */ static void -l2arc_do_free_on_write() +l2arc_do_free_on_write(void) { list_t *buflist; l2arc_data_free_t *df, *df_prev; @@ -4050,7 +4064,7 @@ l2arc_read_done(zio_t *zio) static list_t * l2arc_list_locked(int list_num, kmutex_t **lock) { - list_t *list; + list_t *list = NULL; ASSERT(list_num >= 0 && list_num <= 3); @@ -4223,11 +4237,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) list_t *list; uint64_t passed_sz, write_sz, buf_sz, headroom; void *buf_data; - kmutex_t *hash_lock, *list_lock; + kmutex_t *hash_lock, *list_lock = NULL; boolean_t have_lock, full; l2arc_write_callback_t *cb; zio_t *pio, *wzio; uint64_t guid = spa_guid(spa); + int try; ASSERT(dev->l2ad_vdev != NULL); @@ -4241,7 +4256,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) * Copy buffers for L2ARC writing. */ mutex_enter(&l2arc_buflist_mtx); - for (int try = 0; try <= 3; try++) { + for (try = 0; try <= 3; try++) { list = l2arc_list_locked(try, &list_lock); passed_sz = 0; @@ -4438,6 +4453,16 @@ l2arc_feed_thread(void) ASSERT(spa != NULL); /* + * If the pool is read-only then force the feed thread to + * sleep a little longer. + */ + if (!spa_writeable(spa)) { + next = ddi_get_lbolt() + 5 * l2arc_feed_secs * hz; + spa_config_exit(spa, SCL_L2ARC, dev); + continue; + } + + /* * Avoid contributing to memory pressure. */ if (arc_reclaim_needed()) { @@ -4514,6 +4539,7 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd) adddev->l2ad_evict = adddev->l2ad_start; adddev->l2ad_first = B_TRUE; adddev->l2ad_writing = B_FALSE; + list_link_init(&adddev->l2ad_node); ASSERT3U(adddev->l2ad_write, >, 0); /*