/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
uint64_t arg1, uint64_t arg2);
+dmu_tx_stats_t dmu_tx_stats = {
+ { "dmu_tx_assigned", KSTAT_DATA_UINT64 },
+ { "dmu_tx_delay", KSTAT_DATA_UINT64 },
+ { "dmu_tx_error", KSTAT_DATA_UINT64 },
+ { "dmu_tx_suspended", KSTAT_DATA_UINT64 },
+ { "dmu_tx_group", KSTAT_DATA_UINT64 },
+ { "dmu_tx_how", KSTAT_DATA_UINT64 },
+ { "dmu_tx_memory_reserve", KSTAT_DATA_UINT64 },
+ { "dmu_tx_memory_reclaim", KSTAT_DATA_UINT64 },
+ { "dmu_tx_memory_inflight", KSTAT_DATA_UINT64 },
+ { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
+ { "dmu_tx_write_limit", KSTAT_DATA_UINT64 },
+ { "dmu_tx_quota", KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *dmu_tx_ksp;
dmu_tx_t *
dmu_tx_create_dd(dsl_dir_t *dd)
{
- dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
+ dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE);
tx->tx_dir = dd;
if (dd)
tx->tx_pool = dd->dd_pool;
offsetof(dmu_tx_hold_t, txh_node));
list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
offsetof(dmu_tx_callback_t, dcb_node));
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
refcount_create(&tx->tx_space_written);
refcount_create(&tx->tx_space_freed);
#endif
}
}
- txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
+ txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE);
txh->txh_tx = tx;
txh->txh_dnode = dn;
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
txh->txh_type = type;
txh->txh_arg1 = arg1;
txh->txh_arg2 = arg2;
uint64_t start, end, i;
int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
int err = 0;
+ int l;
if (len == 0)
return;
* we need to account for overwrites/unref.
*/
if (start <= dn->dn_maxblkid) {
- for (int l = 0; l < DN_MAX_LEVELS; l++)
+ for (l = 0; l < DN_MAX_LEVELS; l++)
history[l] = -1ULL;
}
while (start <= dn->dn_maxblkid) {
ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
if (dn->dn_maxblkid == 0 && !add) {
+ blkptr_t *bp;
+
/*
* If there is only one block (i.e. this is a micro-zap)
* and we are not adding anything, the accounting is simple.
* Use max block size here, since we don't know how much
* the size will change between now and the dbuf dirty call.
*/
+ bp = &dn->dn_phys->dn_blkptr[0];
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- &dn->dn_phys->dn_blkptr[0],
- dn->dn_phys->dn_blkptr[0].blk_birth)) {
+ bp, bp->blk_birth))
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
- } else {
+ else
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- }
- if (dn->dn_phys->dn_blkptr[0].blk_birth)
+ if (!BP_IS_HOLE(bp))
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
return;
}
return (holds);
}
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
void
dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
{
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
+ ASSERT(dn != NULL);
ASSERT(tx->tx_txg != 0);
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
ASSERT3U(dn->dn_object, ==, db->db.db_object);
for (txh = list_head(&tx->tx_holds); txh;
txh = list_next(&tx->tx_holds, txh)) {
- ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg);
+ ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
match_object = TRUE;
if (txh->txh_dnode == NULL || txh->txh_dnode == dn) {
ASSERT3U(tx->tx_txg, ==, 0);
- if (tx->tx_err)
+ if (tx->tx_err) {
+ DMU_TX_STAT_BUMP(dmu_tx_error);
return (tx->tx_err);
+ }
if (spa_suspended(spa)) {
+ DMU_TX_STAT_BUMP(dmu_tx_suspended);
+
/*
* If the user has indicated a blocking failure mode
* then return ERESTART which will block in dmu_tx_wait().
if (dn->dn_assigned_txg == tx->tx_txg - 1) {
mutex_exit(&dn->dn_mtx);
tx->tx_needassign_txh = txh;
+ DMU_TX_STAT_BUMP(dmu_tx_group);
return (ERESTART);
}
if (dn->dn_assigned_txg == 0)
* NB: This check must be after we've held the dnodes, so that
* the dmu_tx_unassign() logic will work properly
*/
- if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
+ if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) {
+ DMU_TX_STAT_BUMP(dmu_tx_how);
return (ERESTART);
+ }
/*
* If a snapshot has been taken since we made our estimates,
/* calculate memory footprint estimate */
memory = towrite + tooverwrite + tohold;
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
/*
* Add in 'tohold' to account for our dirty holds on this memory
* XXX - the "fudge" factor is to account for skipped blocks that
return (err);
}
+ DMU_TX_STAT_BUMP(dmu_tx_assigned);
+
return (0);
}
void
dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
{
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
if (tx->tx_dir == NULL || delta == 0)
return;
ASSERT(tx->tx_txg != 0);
- while (txh = list_head(&tx->tx_holds)) {
+ while ((txh = list_head(&tx->tx_holds))) {
dnode_t *dn = txh->txh_dnode;
list_remove(&tx->tx_holds, txh);
list_destroy(&tx->tx_callbacks);
list_destroy(&tx->tx_holds);
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
ASSERT(tx->tx_txg == 0);
- while (txh = list_head(&tx->tx_holds)) {
+ while ((txh = list_head(&tx->tx_holds))) {
dnode_t *dn = txh->txh_dnode;
list_remove(&tx->tx_holds, txh);
list_destroy(&tx->tx_callbacks);
list_destroy(&tx->tx_holds);
-#ifdef ZFS_DEBUG
+#ifdef DEBUG_DMU_TX
refcount_destroy_many(&tx->tx_space_written,
refcount_count(&tx->tx_space_written));
refcount_destroy_many(&tx->tx_space_freed,
{
dmu_tx_callback_t *dcb;
- dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP);
+ dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE);
dcb->dcb_func = func;
dcb->dcb_data = data;
{
dmu_tx_callback_t *dcb;
- while (dcb = list_head(cb_list)) {
+ while ((dcb = list_head(cb_list))) {
list_remove(cb_list, dcb);
dcb->dcb_func(dcb->dcb_data, error);
kmem_free(dcb, sizeof (dmu_tx_callback_t));
{
dnode_t *dn;
dmu_tx_hold_t *txh;
- blkptr_t *bp;
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
THT_SPILL, 0, 0);
return;
/* If blkptr doesn't exist then add space to towrite */
- bp = &dn->dn_phys->dn_spill;
- if (BP_IS_HOLE(bp)) {
+ if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- txh->txh_space_tounref = 0;
} else {
+ blkptr_t *bp;
+
+ bp = &dn->dn_phys->dn_spill;
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
bp, bp->blk_birth))
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
else
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- if (bp->blk_birth)
+ if (!BP_IS_HOLE(bp))
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
}
}
DB_DNODE_EXIT(db);
}
}
+
+void
+dmu_tx_init(void)
+{
+ dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc",
+ KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+
+ if (dmu_tx_ksp != NULL) {
+ dmu_tx_ksp->ks_data = &dmu_tx_stats;
+ kstat_install(dmu_tx_ksp);
+ }
+}
+
+void
+dmu_tx_fini(void)
+{
+ if (dmu_tx_ksp != NULL) {
+ kstat_delete(dmu_tx_ksp);
+ dmu_tx_ksp = NULL;
+ }
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_tx_create);
+EXPORT_SYMBOL(dmu_tx_hold_write);
+EXPORT_SYMBOL(dmu_tx_hold_free);
+EXPORT_SYMBOL(dmu_tx_hold_zap);
+EXPORT_SYMBOL(dmu_tx_hold_bonus);
+EXPORT_SYMBOL(dmu_tx_abort);
+EXPORT_SYMBOL(dmu_tx_assign);
+EXPORT_SYMBOL(dmu_tx_wait);
+EXPORT_SYMBOL(dmu_tx_commit);
+EXPORT_SYMBOL(dmu_tx_get_txg);
+EXPORT_SYMBOL(dmu_tx_callback_register);
+EXPORT_SYMBOL(dmu_tx_do_callbacks);
+EXPORT_SYMBOL(dmu_tx_hold_spill);
+EXPORT_SYMBOL(dmu_tx_hold_sa_create);
+EXPORT_SYMBOL(dmu_tx_hold_sa);
+#endif