X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu_tx.c;h=fd714135a853329edb9297485a60afb8539cf312;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=81c6dfea2eab7535e415d0e3a04c965dfc848956;hpb=b8d06fca089fae4680c3a552fc55c512bfb02202;p=zfs.git diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 81c6dfe..fd71413 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -20,9 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ -/* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -302,6 +301,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) delta = P2NPHASE(off, dn->dn_datablksz); } + min_ibs = max_ibs = dn->dn_indblkshift; if (dn->dn_maxblkid > 0) { /* * The blocksize can't change, @@ -309,13 +309,6 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) */ ASSERT(dn->dn_datablkshift != 0); min_bs = max_bs = dn->dn_datablkshift; - min_ibs = max_ibs = dn->dn_indblkshift; - } else if (dn->dn_indblkshift > max_ibs) { - /* - * This ensures that if we reduce DN_MAX_INDBLKSHIFT, - * the code will still work correctly on older pools. - */ - min_ibs = max_ibs = dn->dn_indblkshift; } /* @@ -447,6 +440,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; spa_t *spa = txh->txh_tx->tx_pool->dp_spa; int epbs; + uint64_t l0span = 0, nl1blks = 0; if (dn->dn_nlevels == 0) return; @@ -479,6 +473,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) nblks = dn->dn_maxblkid - blkid; } + l0span = nblks; /* save for later use to calc level > 1 overhead */ if (dn->dn_nlevels == 1) { int i; for (i = 0; i < nblks; i++) { @@ -491,24 +486,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } unref += BP_GET_ASIZE(bp); } + nl1blks = 1; nblks = 0; } - /* - * Add in memory requirements of higher-level indirects. - * This assumes a worst-possible scenario for dn_nlevels. - */ - { - uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); - int level = (dn->dn_nlevels > 1) ? 2 : 1; - - while (level++ < DN_MAX_LEVELS) { - txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; - blkcnt = 1 + (blkcnt >> epbs); - } - ASSERT(blkcnt <= dn->dn_nblkptr); - } - lastblk = blkid + nblks - 1; while (nblks) { dmu_buf_impl_t *dbuf; @@ -579,11 +560,35 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } dbuf_rele(dbuf, FTAG); + ++nl1blks; blkid += tochk; nblks -= tochk; } rw_exit(&dn->dn_struct_rwlock); + /* + * Add in memory requirements of higher-level indirects. + * This assumes a worst-possible scenario for dn_nlevels and a + * worst-possible distribution of l1-blocks over the region to free. + */ + { + uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs); + int level = 2; + /* + * Here we don't use DN_MAX_LEVEL, but calculate it with the + * given datablkshift and indblkshift. This makes the + * difference between 19 and 8 on large files. + */ + int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) / + (dn->dn_indblkshift - SPA_BLKPTRSHIFT); + + while (level++ < maxlevel) { + txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1) + << dn->dn_indblkshift; + blkcnt = 1 + (blkcnt >> epbs); + } + } + /* account for new level 1 indirect blocks that might show up */ if (skipped > 0) { txh->txh_fudge += skipped << dn->dn_indblkshift; @@ -693,7 +698,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) return; } - ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); + ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); if (dn->dn_maxblkid == 0 && !add) { blkptr_t *bp; @@ -768,12 +773,13 @@ void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) { dmu_tx_hold_t *txh; + ASSERT(tx->tx_txg == 0); txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE, space, 0); - - txh->txh_space_towrite += space; + if (txh) + txh->txh_space_towrite += space; } int @@ -918,7 +924,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) uint64_t memory, asize, fsize, usize; uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; - ASSERT3U(tx->tx_txg, ==, 0); + ASSERT0(tx->tx_txg); if (tx->tx_err) { DMU_TX_STAT_BUMP(dmu_tx_error); @@ -1084,12 +1090,15 @@ dmu_tx_unassign(dmu_tx_t *tx) int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) { + hrtime_t before, after; int err; ASSERT(tx->tx_txg == 0); ASSERT(txg_how != 0); ASSERT(!dsl_pool_sync_context(tx->tx_pool)); + before = gethrtime(); + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { dmu_tx_unassign(tx); @@ -1101,6 +1110,11 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) txg_rele_to_quiesce(&tx->tx_txgh); + after = gethrtime(); + + dsl_pool_tx_assign_add_usecs(tx->tx_pool, + (after - before) / NSEC_PER_USEC); + return (0); } @@ -1307,6 +1321,8 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_SPILL, 0, 0); + if (txh == NULL) + return; dn = txh->txh_dnode;