X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzil.c;h=c1796937b568389806c04a969fdaf796e698cb83;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=292aea27d219679eee93fba49127cf6f41888d8f;hpb=b8d06fca089fae4680c3a552fc55c512bfb02202;p=zfs.git diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 292aea2..c179693 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -38,6 +38,7 @@ #include #include #include +#include /* * The zfs intent log (ZIL) saves transaction records of system calls @@ -211,7 +212,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = dsl_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { @@ -287,7 +288,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); - error = arc_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { @@ -451,13 +452,14 @@ zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg) } static lwb_t * -zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg) +zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite) { lwb_t *lwb; lwb = kmem_cache_alloc(zil_lwb_cache, KM_PUSHPAGE); lwb->lwb_zilog = zilog; lwb->lwb_blk = *bp; + lwb->lwb_fastwrite = fastwrite; lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp)); lwb->lwb_max_txg = txg; lwb->lwb_zio = NULL; @@ -478,6 +480,38 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg) } /* + * Called when we create in-memory log transactions so that we know + * to cleanup the itxs at the end of spa_sync(). + */ +void +zilog_dirty(zilog_t *zilog, uint64_t txg) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); + + if (dsl_dataset_is_snapshot(ds)) + panic("dirtying snapshot!"); + + if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(ds->ds_dbuf, zilog); + } +} + +boolean_t +zilog_is_dirty(zilog_t *zilog) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + int t; + + for (t = 0; t < TXG_SIZE; t++) { + if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t)) + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Create an on-disk intent log. */ static lwb_t * @@ -489,6 +523,7 @@ zil_create(zilog_t *zilog) dmu_tx_t *tx = NULL; blkptr_t blk; int error = 0; + boolean_t fastwrite = FALSE; /* * Wait for any previous destroy to complete. @@ -516,8 +551,9 @@ zil_create(zilog_t *zilog) BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL, - ZIL_MIN_BLKSZ, zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + error = zio_alloc_zil(zilog->zl_spa, txg, &blk, + ZIL_MIN_BLKSZ, B_TRUE); + fastwrite = TRUE; if (error == 0) zil_init_log_chain(zilog, &blk); @@ -527,7 +563,7 @@ zil_create(zilog_t *zilog) * Allocate a log write buffer (lwb) for the first log block. */ if (error == 0) - lwb = zil_alloc_lwb(zilog, &blk, txg); + lwb = zil_alloc_lwb(zilog, &blk, txg, fastwrite); /* * If we just allocated the first log block, commit our transaction @@ -586,6 +622,10 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) ASSERT(zh->zh_claim_txg == 0); VERIFY(!keep_first); while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { + ASSERT(lwb->lwb_zio == NULL); + if (lwb->lwb_fastwrite) + metaslab_fastwrite_unmark(zilog->zl_spa, + &lwb->lwb_blk); list_remove(&zilog->zl_lwb_list, lwb); if (lwb->lwb_buf != NULL) zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); @@ -593,14 +633,21 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) kmem_cache_free(zil_lwb_cache, lwb); } } else if (!keep_first) { - (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zh->zh_claim_txg); + zil_destroy_sync(zilog, tx); } mutex_exit(&zilog->zl_lock); dmu_tx_commit(tx); } +void +zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) +{ + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + (void) zil_parse(zilog, zil_free_log_block, + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); +} + int zil_claim(const char *osname, void *txarg) { @@ -826,6 +873,8 @@ zil_lwb_write_done(zio_t *zio) */ zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); mutex_enter(&zilog->zl_lock); + lwb->lwb_zio = NULL; + lwb->lwb_fastwrite = FALSE; lwb->lwb_buf = NULL; lwb->lwb_tx = NULL; mutex_exit(&zilog->zl_lock); @@ -854,12 +903,21 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL, ZIO_FLAG_CANFAIL); } + + /* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */ + mutex_enter(&zilog->zl_lock); if (lwb->lwb_zio == NULL) { + if (!lwb->lwb_fastwrite) { + metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk); + lwb->lwb_fastwrite = 1; + } lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa, 0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE, - ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb); + ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | + ZIO_FLAG_FASTWRITE, &zb); } + mutex_exit(&zilog->zl_lock); } /* @@ -876,14 +934,13 @@ uint64_t zil_block_buckets[] = { }; /* - * Use the slog as long as the logbias is 'latency' and the current commit size - * is less than the limit or the total list size is less than 2X the limit. - * Limit checking is disabled by setting zil_slog_limit to UINT64_MAX. + * Use the slog as long as the current commit size is less than the + * limit or the total list size is less than 2X the limit. Limit + * checking is disabled by setting zil_slog_limit to UINT64_MAX. */ unsigned long zil_slog_limit = 1024 * 1024; -#define USE_SLOG(zilog) (((zilog)->zl_logbias == ZFS_LOGBIAS_LATENCY) && \ - (((zilog)->zl_cur_used < zil_slog_limit) || \ - ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1)))) +#define USE_SLOG(zilog) (((zilog)->zl_cur_used < zil_slog_limit) || \ + ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1))) /* * Start a log block write and advance to the next log block. @@ -956,10 +1013,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1); BP_ZERO(bp); - /* pass the old blkptr in order to spread log blocks across devs */ use_slog = USE_SLOG(zilog); - error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, - use_slog); + error = zio_alloc_zil(spa, txg, bp, zil_blksz, USE_SLOG(zilog)); if (use_slog) { ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); @@ -978,7 +1033,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) /* * Allocate a new log write buffer (lwb). */ - nlwb = zil_alloc_lwb(zilog, bp, txg); + nlwb = zil_alloc_lwb(zilog, bp, txg, TRUE); /* Record the block for later vdev flushing */ zil_add_block(zilog, &lwb->lwb_blk); @@ -1026,6 +1081,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) return (NULL); ASSERT(lwb->lwb_buf != NULL); + ASSERT(zilog_is_dirty(zilog) || + spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) dlen = P2ROUNDUP_TYPED( @@ -1106,7 +1163,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) lwb->lwb_nused += reclen + dlen; lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz); - ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0); + ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t))); return (lwb); } @@ -1256,7 +1313,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) zil_async_to_sync(zilog, itx->itx_oid); - if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) txg = ZILTEST_TXG; else txg = dmu_tx_get_txg(tx); @@ -1307,6 +1364,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) } itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx); + zilog_dirty(zilog, txg); mutex_exit(&itxg->itxg_lock); /* Release the old itxs now we've dropped the lock */ @@ -1316,7 +1374,10 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) /* * If there are any in-memory intent log transactions which have now been - * synced then start up a taskq to free them. + * synced then start up a taskq to free them. We should only do this after we + * have written out the uberblocks (i.e. txg has been comitted) so that + * don't inadvertently clean out in-memory log records that would be required + * by zil_commit(). */ void zil_clean(zilog_t *zilog, uint64_t synced_txg) @@ -1560,13 +1621,14 @@ zil_commit(zilog_t *zilog, uint64_t foid) zil_commit_writer(zilog); zilog->zl_com_batch = mybatch; zilog->zl_writer = B_FALSE; - mutex_exit(&zilog->zl_lock); /* wake up one thread to become the next writer */ cv_signal(&zilog->zl_cv_batch[(mybatch+1) & 1]); /* wake up all threads waiting for this batch to be committed */ cv_broadcast(&zilog->zl_cv_batch[mybatch & 1]); + + mutex_exit(&zilog->zl_lock); } /* @@ -1624,6 +1686,9 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) zh->zh_log = lwb->lwb_blk; if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg) break; + + ASSERT(lwb->lwb_zio == NULL); + list_remove(&zilog->zl_lwb_list, lwb); zio_free_zil(spa, txg, &lwb->lwb_blk); kmem_cache_free(zil_lwb_cache, lwb); @@ -1637,6 +1702,19 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) if (list_head(&zilog->zl_lwb_list) == NULL) BP_ZERO(&zh->zh_log); } + + /* + * Remove fastwrite on any blocks that have been pre-allocated for + * the next commit. This prevents fastwrite counter pollution by + * unused, long-lived LWBs. + */ + for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) { + if (lwb->lwb_fastwrite && !lwb->lwb_zio) { + metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); + lwb->lwb_fastwrite = 0; + } + } + mutex_exit(&zilog->zl_lock); } @@ -1804,6 +1882,7 @@ zil_close(zilog_t *zilog) mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); + ASSERT(!zilog_is_dirty(zilog)); taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; @@ -1816,6 +1895,9 @@ zil_close(zilog_t *zilog) lwb = list_head(&zilog->zl_lwb_list); if (lwb != NULL) { ASSERT(lwb == list_tail(&zilog->zl_lwb_list)); + ASSERT(lwb->lwb_zio == NULL); + if (lwb->lwb_fastwrite) + metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); list_remove(&zilog->zl_lwb_list, lwb); zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); kmem_cache_free(zil_lwb_cache, lwb); @@ -1874,7 +1956,7 @@ zil_resume(zilog_t *zilog) } typedef struct zil_replay_arg { - zil_replay_func_t **zr_replay; + zil_replay_func_t *zr_replay; void *zr_arg; boolean_t zr_byteswap; char *zr_lr; @@ -1993,7 +2075,7 @@ zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) * If this dataset has a non-empty intent log, replay it and destroy it. */ void -zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) +zil_replay(objset_t *os, void *arg, zil_replay_func_t replay_func[TX_MAX_TYPE]) { zilog_t *zilog = dmu_objset_zil(os); const zil_header_t *zh = zilog->zl_header;