X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu_tx.c;h=e47d858ddfe6551cfdd76bbd232328c1ce5de3ec;hb=36f86f73f68548f46eb3229c8adf583d59fa9988;hp=50207bdbb6a36818e2d2b20993fa5c538ee59a63;hpb=1c5de20ae2511124613926e4f780572634818218;p=zfs.git

diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index 50207bd..e47d858 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -20,6 +20,8 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -60,7 +62,7 @@ static kstat_t *dmu_tx_ksp;
 dmu_tx_t *
 dmu_tx_create_dd(dsl_dir_t *dd)
 {
-	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
+	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE);
 	tx->tx_dir = dd;
 	if (dd)
 		tx->tx_pool = dd->dd_pool;
@@ -138,7 +140,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
 		}
 	}
 
-	txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
+	txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE);
 	txh->txh_tx = tx;
 	txh->txh_dnode = dn;
 #ifdef DEBUG_DMU_TX
@@ -444,6 +446,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 	spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
 	int epbs;
+	uint64_t l0span = 0, nl1blks = 0;
 
 	if (dn->dn_nlevels == 0)
 		return;
@@ -476,6 +479,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 			nblks = dn->dn_maxblkid - blkid;
 
 	}
+	l0span = nblks;    /* save for later use to calc level > 1 overhead */
 	if (dn->dn_nlevels == 1) {
 		int i;
 		for (i = 0; i < nblks; i++) {
@@ -488,24 +492,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 			}
 			unref += BP_GET_ASIZE(bp);
 		}
+		nl1blks = 1;
 		nblks = 0;
 	}
 
-	/*
-	 * Add in memory requirements of higher-level indirects.
-	 * This assumes a worst-possible scenario for dn_nlevels.
-	 */
-	{
-		uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs);
-		int level = (dn->dn_nlevels > 1) ? 2 : 1;
-
-		while (level++ < DN_MAX_LEVELS) {
-			txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift;
-			blkcnt = 1 + (blkcnt >> epbs);
-		}
-		ASSERT(blkcnt <= dn->dn_nblkptr);
-	}
-
 	lastblk = blkid + nblks - 1;
 	while (nblks) {
 		dmu_buf_impl_t *dbuf;
@@ -576,11 +566,35 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 		}
 		dbuf_rele(dbuf, FTAG);
 
+		++nl1blks;
 		blkid += tochk;
 		nblks -= tochk;
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
+	/*
+	 * Add in memory requirements of higher-level indirects.
+	 * This assumes a worst-possible scenario for dn_nlevels and a
+	 * worst-possible distribution of l1-blocks over the region to free.
+	 */
+	{
+		uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
+		int level = 2;
+		/*
+		 * Here we don't use DN_MAX_LEVEL, but calculate it with the
+		 * given datablkshift and indblkshift. This makes the
+		 * difference between 19 and 8 on large files.
+		 */
+		int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
+		    (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
+
+		while (level++ < maxlevel) {
+			txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1)
+			    << dn->dn_indblkshift;
+			blkcnt = 1 + (blkcnt >> epbs);
+		}
+	}
+
 	/* account for new level 1 indirect blocks that might show up */
 	if (skipped > 0) {
 		txh->txh_fudge += skipped << dn->dn_indblkshift;
@@ -690,9 +704,11 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
 		return;
 	}
 
-	ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
+	ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
 
 	if (dn->dn_maxblkid == 0 && !add) {
+		blkptr_t *bp;
+
 		/*
 		 * If there is only one block  (i.e. this is a micro-zap)
 		 * and we are not adding anything, the accounting is simple.
@@ -707,14 +723,13 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
 		 * Use max block size here, since we don't know how much
 		 * the size will change between now and the dbuf dirty call.
 		 */
+		bp = &dn->dn_phys->dn_blkptr[0];
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
-		    &dn->dn_phys->dn_blkptr[0],
-		    dn->dn_phys->dn_blkptr[0].blk_birth)) {
+		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
-		} else {
+		else
 			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
-		}
-		if (dn->dn_phys->dn_blkptr[0].blk_birth)
+		if (!BP_IS_HOLE(bp))
 			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
 		return;
 	}
@@ -1237,7 +1252,7 @@ dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
 {
 	dmu_tx_callback_t *dcb;
 
-	dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP);
+	dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE);
 
 	dcb->dcb_func = func;
 	dcb->dcb_data = data;
@@ -1300,7 +1315,6 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
 {
 	dnode_t *dn;
 	dmu_tx_hold_t *txh;
-	blkptr_t *bp;
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
 	    THT_SPILL, 0, 0);
@@ -1311,17 +1325,18 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
 		return;
 
 	/* If blkptr doesn't exist then add space to towrite */
-	bp = &dn->dn_phys->dn_spill;
-	if (BP_IS_HOLE(bp)) {
+	if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
 		txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
-		txh->txh_space_tounref = 0;
 	} else {
+		blkptr_t *bp;
+
+		bp = &dn->dn_phys->dn_spill;
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
 		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
 		else
 			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
-		if (bp->blk_birth)
+		if (!BP_IS_HOLE(bp))
 			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
 	}
 }