X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu_tx.c;h=e47d858ddfe6551cfdd76bbd232328c1ce5de3ec;hb=36f86f73f68548f46eb3229c8adf583d59fa9988;hp=50207bdbb6a36818e2d2b20993fa5c538ee59a63;hpb=1c5de20ae2511124613926e4f780572634818218;p=zfs.git diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 50207bd..e47d858 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -60,7 +62,7 @@ static kstat_t *dmu_tx_ksp; dmu_tx_t * dmu_tx_create_dd(dsl_dir_t *dd) { - dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); + dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE); tx->tx_dir = dd; if (dd) tx->tx_pool = dd->dd_pool; @@ -138,7 +140,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, } } - txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); + txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE); txh->txh_tx = tx; txh->txh_dnode = dn; #ifdef DEBUG_DMU_TX @@ -444,6 +446,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; spa_t *spa = txh->txh_tx->tx_pool->dp_spa; int epbs; + uint64_t l0span = 0, nl1blks = 0; if (dn->dn_nlevels == 0) return; @@ -476,6 +479,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) nblks = dn->dn_maxblkid - blkid; } + l0span = nblks; /* save for later use to calc level > 1 overhead */ if (dn->dn_nlevels == 1) { int i; for (i = 0; i < nblks; i++) { @@ -488,24 +492,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } unref += BP_GET_ASIZE(bp); } + nl1blks = 1; nblks = 0; } - /* - * Add in memory requirements of higher-level indirects. - * This assumes a worst-possible scenario for dn_nlevels. - */ - { - uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); - int level = (dn->dn_nlevels > 1) ? 2 : 1; - - while (level++ < DN_MAX_LEVELS) { - txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; - blkcnt = 1 + (blkcnt >> epbs); - } - ASSERT(blkcnt <= dn->dn_nblkptr); - } - lastblk = blkid + nblks - 1; while (nblks) { dmu_buf_impl_t *dbuf; @@ -576,11 +566,35 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } dbuf_rele(dbuf, FTAG); + ++nl1blks; blkid += tochk; nblks -= tochk; } rw_exit(&dn->dn_struct_rwlock); + /* + * Add in memory requirements of higher-level indirects. + * This assumes a worst-possible scenario for dn_nlevels and a + * worst-possible distribution of l1-blocks over the region to free. + */ + { + uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs); + int level = 2; + /* + * Here we don't use DN_MAX_LEVEL, but calculate it with the + * given datablkshift and indblkshift. This makes the + * difference between 19 and 8 on large files. + */ + int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) / + (dn->dn_indblkshift - SPA_BLKPTRSHIFT); + + while (level++ < maxlevel) { + txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1) + << dn->dn_indblkshift; + blkcnt = 1 + (blkcnt >> epbs); + } + } + /* account for new level 1 indirect blocks that might show up */ if (skipped > 0) { txh->txh_fudge += skipped << dn->dn_indblkshift; @@ -690,9 +704,11 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) return; } - ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); + ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); if (dn->dn_maxblkid == 0 && !add) { + blkptr_t *bp; + /* * If there is only one block (i.e. this is a micro-zap) * and we are not adding anything, the accounting is simple. @@ -707,14 +723,13 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) * Use max block size here, since we don't know how much * the size will change between now and the dbuf dirty call. */ + bp = &dn->dn_phys->dn_blkptr[0]; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - &dn->dn_phys->dn_blkptr[0], - dn->dn_phys->dn_blkptr[0].blk_birth)) { + bp, bp->blk_birth)) txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; - } else { + else txh->txh_space_towrite += SPA_MAXBLOCKSIZE; - } - if (dn->dn_phys->dn_blkptr[0].blk_birth) + if (!BP_IS_HOLE(bp)) txh->txh_space_tounref += SPA_MAXBLOCKSIZE; return; } @@ -1237,7 +1252,7 @@ dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { dmu_tx_callback_t *dcb; - dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); + dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE); dcb->dcb_func = func; dcb->dcb_data = data; @@ -1300,7 +1315,6 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) { dnode_t *dn; dmu_tx_hold_t *txh; - blkptr_t *bp; txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_SPILL, 0, 0); @@ -1311,17 +1325,18 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) return; /* If blkptr doesn't exist then add space to towrite */ - bp = &dn->dn_phys->dn_spill; - if (BP_IS_HOLE(bp)) { + if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { txh->txh_space_towrite += SPA_MAXBLOCKSIZE; - txh->txh_space_tounref = 0; } else { + blkptr_t *bp; + + bp = &dn->dn_phys->dn_spill; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, bp, bp->blk_birth)) txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; else txh->txh_space_towrite += SPA_MAXBLOCKSIZE; - if (bp->blk_birth) + if (!BP_IS_HOLE(bp)) txh->txh_space_tounref += SPA_MAXBLOCKSIZE; } }