* Use is subject to license terms.
*/
-#pragma ident "@(#)dsl_dataset.c 1.42 08/04/28 SMI"
-
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
#include <sys/spa.h>
+#include <sys/zfs_znode.h>
#include <sys/sunddi.h>
+static char *dsl_reaper = "the grim reaper";
+
static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
static dsl_checkfunc_t dsl_dataset_rollback_check;
#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
-/*
- * We use weighted reference counts to express the various forms of exclusion
- * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open
- * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
- * This makes the exclusion logic simple: the total refcnt for all opens cannot
- * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their
- * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume
- * just over half of the refcnt space, so there can't be more than one, but it
- * can peacefully coexist with any number of STANDARD opens.
- */
-static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
- 0, /* DS_MODE_NONE - invalid */
- 1, /* DS_MODE_STANDARD - unlimited number */
- (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */
- DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */
-};
+#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper)
+
/*
* Figure out how much of this delta should be propogated to the dsl_dir
* dsl_dir.
*/
ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
- dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
+ dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
used, compressed, uncompressed, tx);
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
return;
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
ds->ds_phys->ds_used_bytes += used;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
+ compressed, uncompressed, tx);
+ dsl_dir_transfer_space(ds->ds_dir, used - delta,
+ DD_USED_REFRSRV, DD_USED_HEAD, tx);
+ mutex_exit(&ds->ds_dir->dd_lock);
}
-void
+int
dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
dmu_tx_t *tx)
{
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
+ ASSERT(pio != NULL);
ASSERT(dmu_tx_is_syncing(tx));
/* No block pointer => nothing to free */
if (BP_IS_HOLE(bp))
- return;
+ return (0);
ASSERT(used > 0);
if (ds == NULL) {
* Account for the meta-objset space in its placeholder
* dataset.
*/
- err = arc_free(pio, tx->tx_pool->dp_spa,
- tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
+ err = dsl_free(pio, tx->tx_pool,
+ tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT);
ASSERT(err == 0);
- dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
+ dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
-used, -compressed, -uncompressed, tx);
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
- return;
+ return (used);
}
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
+ ASSERT(!dsl_dataset_is_snapshot(ds));
dmu_buf_will_dirty(ds->ds_dbuf, tx);
if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
int64_t delta;
dprintf_bp(bp, "freeing: %s", "");
- err = arc_free(pio, tx->tx_pool->dp_spa,
- tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
+ err = dsl_free(pio, tx->tx_pool,
+ tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT);
ASSERT(err == 0);
+ mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
!DS_UNIQUE_IS_ACCURATE(ds));
delta = parent_delta(ds, -used);
ds->ds_phys->ds_unique_bytes -= used;
mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir,
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
delta, -compressed, -uncompressed, tx);
+ dsl_dir_transfer_space(ds->ds_dir, -used - delta,
+ DD_USED_REFRSRV, DD_USED_HEAD, tx);
+ mutex_exit(&ds->ds_dir->dd_lock);
} else {
dprintf_bp(bp, "putting on dead list: %s", "");
VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
ds->ds_prev->ds_phys->ds_unique_bytes += used;
mutex_exit(&ds->ds_prev->ds_lock);
}
+ if (bp->blk_birth > ds->ds_origin_txg) {
+ dsl_dir_transfer_space(ds->ds_dir, used,
+ DD_USED_HEAD, DD_USED_SNAP, tx);
+ }
}
mutex_enter(&ds->ds_lock);
ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
mutex_exit(&ds->ds_lock);
+
+ return (used);
}
uint64_t
{
dsl_dataset_t *ds = dsv;
- /* open_refcount == DS_REF_MAX when deleting */
- ASSERT(ds->ds_open_refcount == 0 ||
- ds->ds_open_refcount == DS_REF_MAX);
+ ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
dprintf_ds(ds, "evicting %s\n", "");
ds->ds_user_evict_func(ds, ds->ds_user_ptr);
if (ds->ds_prev) {
- dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
+ dsl_dataset_drop_ref(ds->ds_prev, ds);
ds->ds_prev = NULL;
}
bplist_close(&ds->ds_deadlist);
- dsl_dir_close(ds->ds_dir, ds);
+ if (ds->ds_dir)
+ dsl_dir_close(ds->ds_dir, ds);
ASSERT(!list_link_active(&ds->ds_synced_link));
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
mutex_destroy(&ds->ds_deadlist.bpl_lock);
+ rw_destroy(&ds->ds_rwlock);
+ cv_destroy(&ds->ds_exclusive_cv);
kmem_free(ds, sizeof (dsl_dataset_t));
}
}
static int
-dsl_dataset_snap_lookup(objset_t *os, uint64_t flags,
- uint64_t snapnames_zapobj, const char *name, uint64_t *value)
+dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
{
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
matchtype_t mt;
int err;
- if (flags & DS_FLAG_CI_DATASET)
+ if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
mt = MT_FIRST;
else
mt = MT_EXACT;
- err = zap_lookup_norm(os, snapnames_zapobj, name, 8, 1,
+ err = zap_lookup_norm(mos, snapobj, name, 8, 1,
value, mt, NULL, 0, NULL);
if (err == ENOTSUP && mt == MT_FIRST)
- err = zap_lookup(os, snapnames_zapobj, name, 8, 1, value);
+ err = zap_lookup(mos, snapobj, name, 8, 1, value);
return (err);
}
static int
-dsl_dataset_snap_remove(objset_t *os, uint64_t flags,
- uint64_t snapnames_zapobj, char *name, dmu_tx_t *tx)
+dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
{
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
matchtype_t mt;
int err;
- if (flags & DS_FLAG_CI_DATASET)
+ if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
mt = MT_FIRST;
else
mt = MT_EXACT;
- err = zap_remove_norm(os, snapnames_zapobj, name, mt, tx);
+ err = zap_remove_norm(mos, snapobj, name, mt, tx);
if (err == ENOTSUP && mt == MT_FIRST)
- err = zap_remove(os, snapnames_zapobj, name, tx);
+ err = zap_remove(mos, snapobj, name, tx);
return (err);
}
-int
-dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
- int mode, void *tag, dsl_dataset_t **dsp)
+static int
+dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+ dsl_dataset_t **dsp)
{
- uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
dsl_dataset_t *ds;
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
NULL);
+ rw_init(&ds->ds_rwlock, 0, 0, 0);
+ cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
err = bplist_open(&ds->ds_deadlist,
mos, ds->ds_phys->ds_deadlist_obj);
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
mutex_destroy(&ds->ds_deadlist.bpl_lock);
+ rw_destroy(&ds->ds_rwlock);
+ cv_destroy(&ds->ds_exclusive_cv);
kmem_free(ds, sizeof (dsl_dataset_t));
dmu_buf_rele(dbuf, tag);
return (err);
}
- if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
+ if (!dsl_dataset_is_snapshot(ds)) {
ds->ds_snapname[0] = '\0';
if (ds->ds_phys->ds_prev_snap_obj) {
- err = dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, ds, &ds->ds_prev);
+ err = dsl_dataset_get_ref(dp,
+ ds->ds_phys->ds_prev_snap_obj,
+ ds, &ds->ds_prev);
}
- } else {
- if (snapname) {
-#ifdef ZFS_DEBUG
- dsl_dataset_phys_t *headphys;
- dmu_buf_t *headdbuf;
- err = dmu_bonus_hold(mos,
- ds->ds_dir->dd_phys->dd_head_dataset_obj,
- FTAG, &headdbuf);
+
+ if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) {
+ dsl_dataset_t *origin;
+
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_origin_obj,
+ FTAG, &origin);
if (err == 0) {
- uint64_t foundobj;
-
- headphys = headdbuf->db_data;
- err = dsl_dataset_snap_lookup(
- dp->dp_meta_objset,
- headphys->ds_flags,
- headphys->ds_snapnames_zapobj,
- snapname, &foundobj);
- ASSERT3U(foundobj, ==, dsobj);
- dmu_buf_rele(headdbuf, FTAG);
+ ds->ds_origin_txg =
+ origin->ds_phys->ds_creation_txg;
+ dsl_dataset_rele(origin, FTAG);
}
-#endif
- (void) strcat(ds->ds_snapname, snapname);
- } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
- err = dsl_dataset_get_snapname(ds);
}
+ } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
+ err = dsl_dataset_get_snapname(ds);
}
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
/*
* In sync context, we're called with either no lock
* or with the write lock. If we're not syncing,
if (need_lock)
rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_ds_locked(ds->ds_dir,
+ err = dsl_prop_get_ds(ds,
"refreservation", sizeof (uint64_t), 1,
&ds->ds_reserved, NULL);
if (err == 0) {
- err = dsl_prop_get_ds_locked(ds->ds_dir,
+ err = dsl_prop_get_ds(ds,
"refquota", sizeof (uint64_t), 1,
&ds->ds_quota, NULL);
}
}
if (err || winner) {
bplist_close(&ds->ds_deadlist);
- if (ds->ds_prev) {
- dsl_dataset_close(ds->ds_prev,
- DS_MODE_NONE, ds);
- }
+ if (ds->ds_prev)
+ dsl_dataset_drop_ref(ds->ds_prev, ds);
dsl_dir_close(ds->ds_dir, ds);
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
mutex_destroy(&ds->ds_deadlist.bpl_lock);
+ rw_destroy(&ds->ds_rwlock);
+ cv_destroy(&ds->ds_exclusive_cv);
kmem_free(ds, sizeof (dsl_dataset_t));
if (err) {
dmu_buf_rele(dbuf, tag);
}
ASSERT3P(ds->ds_dbuf, ==, dbuf);
ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
-
+ ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
+ spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
+ dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
mutex_enter(&ds->ds_lock);
- if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
- (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
- !DS_MODE_IS_INCONSISTENT(mode)) ||
- (ds->ds_open_refcount + weight > DS_REF_MAX)) {
+ if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
mutex_exit(&ds->ds_lock);
- dsl_dataset_close(ds, DS_MODE_NONE, tag);
- return (EBUSY);
+ dmu_buf_rele(ds->ds_dbuf, tag);
+ return (ENOENT);
}
- ds->ds_open_refcount += weight;
mutex_exit(&ds->ds_lock);
-
*dsp = ds;
return (0);
}
+static int
+dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ /*
+ * In syncing context we don't want the rwlock lock: there
+ * may be an existing writer waiting for sync phase to
+ * finish. We don't need to worry about such writers, since
+ * sync phase is single-threaded, so the writer can't be
+ * doing anything while we are active.
+ */
+ if (dsl_pool_sync_context(dp)) {
+ ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
+ return (0);
+ }
+
+ /*
+ * Normal users will hold the ds_rwlock as a READER until they
+ * are finished (i.e., call dsl_dataset_rele()). "Owners" will
+ * drop their READER lock after they set the ds_owner field.
+ *
+ * If the dataset is being destroyed, the destroy thread will
+ * obtain a WRITER lock for exclusive access after it's done its
+ * open-context work and then change the ds_owner to
+ * dsl_reaper once destruction is assured. So threads
+ * may block here temporarily, until the "destructability" of
+ * the dataset is determined.
+ */
+ ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
+ mutex_enter(&ds->ds_lock);
+ while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
+ rw_exit(&dp->dp_config_rwlock);
+ cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
+ if (DSL_DATASET_IS_DESTROYED(ds)) {
+ mutex_exit(&ds->ds_lock);
+ dsl_dataset_drop_ref(ds, tag);
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ return (ENOENT);
+ }
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ }
+ mutex_exit(&ds->ds_lock);
+ return (0);
+}
+
int
-dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
- void *tag, dsl_dataset_t **dsp)
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+ dsl_dataset_t **dsp)
+{
+ int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
+
+ if (err)
+ return (err);
+ return (dsl_dataset_hold_ref(*dsp, tag));
+}
+
+int
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
+ dsl_dataset_t **dsp)
+{
+ int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
+
+ ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
+
+ if (err)
+ return (err);
+ if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
+ dsl_dataset_rele(*dsp, owner);
+ return (EBUSY);
+ }
+ return (0);
+}
+
+int
+dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
{
dsl_dir_t *dd;
dsl_pool_t *dp;
- const char *tail;
+ const char *snapname;
uint64_t obj;
- dsl_dataset_t *ds = NULL;
int err = 0;
- err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
+ err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
if (err)
return (err);
dp = dd->dd_pool;
obj = dd->dd_phys->dd_head_dataset_obj;
rw_enter(&dp->dp_config_rwlock, RW_READER);
- if (obj == 0) {
- /* A dataset with no associated objset */
+ if (obj)
+ err = dsl_dataset_get_ref(dp, obj, tag, dsp);
+ else
err = ENOENT;
+ if (err)
goto out;
- }
- if (tail != NULL) {
- objset_t *mos = dp->dp_meta_objset;
- uint64_t flags;
+ err = dsl_dataset_hold_ref(*dsp, tag);
- err = dsl_dataset_open_obj(dp, obj, NULL,
- DS_MODE_NONE, tag, &ds);
- if (err)
- goto out;
- flags = ds->ds_phys->ds_flags;
- obj = ds->ds_phys->ds_snapnames_zapobj;
- dsl_dataset_close(ds, DS_MODE_NONE, tag);
- ds = NULL;
+ /* we may be looking for a snapshot */
+ if (err == 0 && snapname != NULL) {
+ dsl_dataset_t *ds = NULL;
- if (tail[0] != '@') {
+ if (*snapname++ != '@') {
+ dsl_dataset_rele(*dsp, tag);
err = ENOENT;
goto out;
}
- tail++;
- /* Look for a snapshot */
- if (!DS_MODE_IS_READONLY(mode)) {
- err = EROFS;
- goto out;
+ dprintf("looking for snapshot '%s'\n", snapname);
+ err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
+ if (err == 0)
+ err = dsl_dataset_get_ref(dp, obj, tag, &ds);
+ dsl_dataset_rele(*dsp, tag);
+
+ ASSERT3U((err == 0), ==, (ds != NULL));
+
+ if (ds) {
+ mutex_enter(&ds->ds_lock);
+ if (ds->ds_snapname[0] == 0)
+ (void) strlcpy(ds->ds_snapname, snapname,
+ sizeof (ds->ds_snapname));
+ mutex_exit(&ds->ds_lock);
+ err = dsl_dataset_hold_ref(ds, tag);
+ *dsp = err ? NULL : ds;
}
- dprintf("looking for snapshot '%s'\n", tail);
- err = dsl_dataset_snap_lookup(mos, flags, obj, tail, &obj);
- if (err)
- goto out;
}
- err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
-
out:
rw_exit(&dp->dp_config_rwlock);
dsl_dir_close(dd, FTAG);
-
- ASSERT3U((err == 0), ==, (ds != NULL));
- /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
-
- *dsp = ds;
return (err);
}
int
-dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
+dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
{
- return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
+ int err = dsl_dataset_hold(name, owner, dsp);
+ if (err)
+ return (err);
+ if ((*dsp)->ds_phys->ds_num_children > 0 &&
+ !DS_MODE_IS_READONLY(flags)) {
+ dsl_dataset_rele(*dsp, owner);
+ return (EROFS);
+ }
+ if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
+ dsl_dataset_rele(*dsp, owner);
+ return (EBUSY);
+ }
+ return (0);
}
void
VERIFY(0 == dsl_dataset_get_snapname(ds));
if (ds->ds_snapname[0]) {
(void) strcat(name, "@");
+ /*
+ * We use a "recursive" mutex so that we
+ * can call dprintf_ds() with ds_lock held.
+ */
if (!MUTEX_HELD(&ds->ds_lock)) {
- /*
- * We use a "recursive" mutex so that we
- * can call dprintf_ds() with ds_lock held.
- */
mutex_enter(&ds->ds_lock);
(void) strcat(name, ds->ds_snapname);
mutex_exit(&ds->ds_lock);
if (ds->ds_snapname[0]) {
++result; /* adding one for the @-sign */
if (!MUTEX_HELD(&ds->ds_lock)) {
- /* see dsl_datset_name */
mutex_enter(&ds->ds_lock);
result += strlen(ds->ds_snapname);
mutex_exit(&ds->ds_lock);
}
void
-dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
+dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
{
- uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
- mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_open_refcount, >=, weight);
- ds->ds_open_refcount -= weight;
- mutex_exit(&ds->ds_lock);
-
dmu_buf_rele(ds->ds_dbuf, tag);
}
void
-dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode)
+dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
+{
+ if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
+ rw_exit(&ds->ds_rwlock);
+ }
+ dsl_dataset_drop_ref(ds, tag);
+}
+
+void
+dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
{
- uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
- uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
+ ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
+ (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
+
mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_open_refcount, >=, oldweight);
- ASSERT3U(oldweight, >=, newweight);
- ds->ds_open_refcount -= oldweight;
- ds->ds_open_refcount += newweight;
+ ds->ds_owner = NULL;
+ if (RW_WRITE_HELD(&ds->ds_rwlock)) {
+ rw_exit(&ds->ds_rwlock);
+ cv_broadcast(&ds->ds_exclusive_cv);
+ }
mutex_exit(&ds->ds_lock);
+ if (ds->ds_dbuf)
+ dsl_dataset_drop_ref(ds, owner);
+ else
+ dsl_dataset_evict(ds->ds_dbuf, ds);
}
boolean_t
-dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode)
+dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
{
- boolean_t rv;
- uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
- uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
+ boolean_t gotit = FALSE;
+
mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_open_refcount, >=, oldweight);
- ASSERT3U(newweight, >=, oldweight);
- if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) {
- rv = B_FALSE;
- } else {
- ds->ds_open_refcount -= oldweight;
- ds->ds_open_refcount += newweight;
- rv = B_TRUE;
+ if (ds->ds_owner == NULL &&
+ (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
+ ds->ds_owner = owner;
+ if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
+ rw_exit(&ds->ds_rwlock);
+ gotit = TRUE;
}
mutex_exit(&ds->ds_lock);
- return (rv);
+ return (gotit);
}
void
-dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
+dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
{
- objset_t *mos = dp->dp_meta_objset;
- dmu_buf_t *dbuf;
- dsl_dataset_phys_t *dsphys;
- dsl_dataset_t *ds;
- uint64_t dsobj;
- dsl_dir_t *dd;
-
- dsl_dir_create_root(mos, ddobjp, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
-
- dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
- DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsphys = dbuf->db_data;
- dsphys->ds_dir_obj = dd->dd_object;
- dsphys->ds_fsid_guid = unique_create();
- (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
- sizeof (dsphys->ds_guid));
- dsphys->ds_snapnames_zapobj =
- zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
- DMU_OT_NONE, 0, tx);
- dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
- dsphys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
- dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
- dmu_buf_rele(dbuf, FTAG);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_head_dataset_obj = dsobj;
- dsl_dir_close(dd, FTAG);
-
- VERIFY(0 ==
- dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
- (void) dmu_objset_create_impl(dp->dp_spa, ds,
- &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+ ASSERT3P(owner, ==, ds->ds_owner);
+ if (!RW_WRITE_HELD(&ds->ds_rwlock))
+ rw_enter(&ds->ds_rwlock, RW_WRITER);
}
uint64_t
-dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
+dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx)
{
dsl_pool_t *dp = dd->dd_pool;
uint64_t dsobj;
objset_t *mos = dp->dp_meta_objset;
+ if (origin == NULL)
+ origin = dp->dp_origin_snap;
+
ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
ASSERT(dmu_tx_is_syncing(tx));
VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
+ bzero(dsphys, sizeof (dsl_dataset_phys_t));
dsphys->ds_dir_obj = dd->dd_object;
dsphys->ds_flags = flags;
dsphys->ds_fsid_guid = unique_create();
zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
DMU_OT_NONE, 0, tx);
dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
+ dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
dsphys->ds_deadlist_obj =
bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
dmu_buf_will_dirty(origin->ds_dbuf, tx);
origin->ds_phys->ds_num_children++;
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
+ if (origin->ds_phys->ds_next_clones_obj == 0) {
+ origin->ds_phys->ds_next_clones_obj =
+ zap_create(mos,
+ DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
+ }
+ VERIFY(0 == zap_add_int(mos,
+ origin->ds_phys->ds_next_clones_obj,
+ dsobj, tx));
+ }
+
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dd->dd_phys->dd_origin_obj = origin->ds_object;
}
ASSERT(lastname[0] != '@');
- ddobj = dsl_dir_create_sync(pdd, lastname, tx);
+ ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
- dsobj = dsl_dataset_create_sync_impl(dd, origin, flags, tx);
+ dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
dsl_deleg_set_create_perms(dd, tx, cr);
(void) strcat(name, "@");
(void) strcat(name, da->snapname);
- err = dsl_dataset_open(name,
- DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
+ err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
da->dstg, &ds);
cp = strchr(name, '@');
*cp = '\0';
- if (err == ENOENT)
- return (0);
- if (err) {
+ if (err == 0) {
+ dsl_dataset_make_exclusive(ds, da->dstg);
+ if (ds->ds_user_ptr) {
+ ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+ ds->ds_user_ptr = NULL;
+ }
+ dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
+ dsl_dataset_destroy_sync, ds, da->dstg, 0);
+ } else if (err == ENOENT) {
+ err = 0;
+ } else {
(void) strcpy(da->failed, name);
- return (err);
}
-
- dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, ds, da->dstg, 0);
- return (0);
+ return (err);
}
/*
for (dst = list_head(&da.dstg->dstg_tasks); dst;
dst = list_next(&da.dstg->dstg_tasks, dst)) {
dsl_dataset_t *ds = dst->dst_arg1;
+ /*
+ * Return the file system name that triggered the error
+ */
if (dst->dst_err) {
dsl_dataset_name(ds, fsname);
*strchr(fsname, '@') = '\0';
}
- /*
- * If it was successful, destroy_sync would have
- * closed the ds
- */
- if (err)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
+ dsl_dataset_disown(ds, da.dstg);
}
dsl_sync_task_group_destroy(da.dstg);
}
/*
- * ds must be opened EXCLUSIVE or PRIMARY. on return (whether
- * successful or not), ds will be closed and caller can no longer
- * dereference it.
+ * ds must be opened as OWNER. On return (whether successful or not),
+ * ds will be closed and caller can no longer dereference it.
*/
int
dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
dsl_dir_t *dd;
uint64_t obj;
- if (ds->ds_open_refcount != DS_REF_MAX) {
- if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY,
- DS_MODE_EXCLUSIVE) == 0) {
- dsl_dataset_close(ds, DS_MODE_PRIMARY, tag);
- return (EBUSY);
- }
- }
-
if (dsl_dataset_is_snapshot(ds)) {
/* Destroying a snapshot is simpler */
+ dsl_dataset_make_exclusive(ds, tag);
+
+ if (ds->ds_user_ptr) {
+ ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+ ds->ds_user_ptr = NULL;
+ }
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
ds, tag, 0);
*/
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
ds->ds_phys->ds_prev_snap_txg)) {
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
- dmu_tx_hold_bonus(tx, obj);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- /*
- * Perhaps there is not enough disk
- * space. Just deal with it from
- * dsl_dataset_destroy_sync().
- */
- dmu_tx_abort(tx);
- continue;
- }
- VERIFY(0 == dmu_object_free(os, obj, tx));
- dmu_tx_commit(tx);
+ /*
+ * Ignore errors, if there is not enough disk space
+ * we will deal with it in dsl_dataset_destroy_sync().
+ */
+ (void) dmu_free_object(os, obj);
}
- /* Make sure it's not dirty before we finish destroying it. */
- txg_wait_synced(dd->dd_pool, 0);
dmu_objset_close(os);
if (err != ESRCH)
goto out;
- if (ds->ds_user_ptr) {
- ds->ds_user_evict_func(ds, ds->ds_user_ptr);
- ds->ds_user_ptr = NULL;
- }
-
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
rw_exit(&dd->dd_pool->dp_config_rwlock);
if (err)
goto out;
+ if (ds->ds_user_ptr) {
+ /*
+ * We need to sync out all in-flight IO before we try
+ * to evict (the dataset evict func is trying to clear
+ * the cached entries for this dataset in the ARC).
+ */
+ txg_wait_synced(dd->dd_pool, 0);
+ }
+
/*
* Blow away the dsl_dir + head dataset.
*/
+ dsl_dataset_make_exclusive(ds, tag);
+ if (ds->ds_user_ptr) {
+ ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+ ds->ds_user_ptr = NULL;
+ }
dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
dsl_dataset_destroy_sync, ds, tag, 0);
dsl_dir_destroy_sync, dd, FTAG, 0);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
- /* if it is successful, *destroy_sync will close the ds+dd */
+ /* if it is successful, dsl_dir_destroy_sync will close the dd */
if (err)
dsl_dir_close(dd, FTAG);
out:
- if (err)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
+ dsl_dataset_disown(ds, tag);
return (err);
}
int
dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
{
- ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
+ int err;
+
+ ASSERT(ds->ds_owner);
- return (dsl_sync_task_do(ds->ds_dir->dd_pool,
+ dsl_dataset_make_exclusive(ds, ds->ds_owner);
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool,
dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
- ds, &ost, 0));
+ ds, &ost, 0);
+ /* drop exclusive access */
+ mutex_enter(&ds->ds_lock);
+ rw_exit(&ds->ds_rwlock);
+ cv_broadcast(&ds->ds_exclusive_cv);
+ mutex_exit(&ds->ds_lock);
+ return (err);
}
void *
}
struct killarg {
- int64_t *usedp;
- int64_t *compressedp;
- int64_t *uncompressedp;
+ dsl_dataset_t *ds;
zio_t *zio;
dmu_tx_t *tx;
};
+/* ARGSUSED */
static int
-kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
+ const dnode_phys_t *dnp, void *arg)
{
struct killarg *ka = arg;
- blkptr_t *bp = &bc->bc_blkptr;
- ASSERT3U(bc->bc_errno, ==, 0);
+ if (bp == NULL)
+ return (0);
+
+ ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
+ (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx);
- /*
- * Since this callback is not called concurrently, no lock is
- * needed on the accounting values.
- */
- *ka->usedp += bp_get_dasize(spa, bp);
- *ka->compressedp += BP_GET_PSIZE(bp);
- *ka->uncompressedp += BP_GET_UCSIZE(bp);
- /* XXX check for EIO? */
- (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
- ARC_NOWAIT);
return (0);
}
return (EINVAL);
/*
- * If we made changes this txg, traverse_dsl_dataset won't find
+ * If we made changes this txg, traverse_dataset won't find
* them. Try again.
*/
if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
* We need to make sure that the objset_impl_t is reopened after
* we do the rollback, otherwise it will have the wrong
* objset_phys_t. Normally this would happen when this
- * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
+ * dataset-open is closed, thus causing the
* dataset to be immediately evicted. But when doing "zfs recv
* -F", we reopen the objset before that, so that there is no
* window where the dataset is closed and inconsistent.
ds->ds_user_ptr = NULL;
}
+ /* Transfer space that was freed since last snap back to the head. */
+ {
+ uint64_t used;
+
+ VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist,
+ ds->ds_origin_txg, UINT64_MAX, &used));
+ dsl_dir_transfer_space(ds->ds_dir, used,
+ DD_USED_SNAP, DD_USED_HEAD, tx);
+ }
+
/* Zero out the deadlist. */
bplist_close(&ds->ds_deadlist);
bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
{
/* Free blkptrs that we gave birth to */
zio_t *zio;
- int64_t used = 0, compressed = 0, uncompressed = 0;
struct killarg ka;
- int64_t delta;
zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
ZIO_FLAG_MUSTSUCCEED);
- ka.usedp = &used;
- ka.compressedp = &compressed;
- ka.uncompressedp = &uncompressed;
+ ka.ds = ds;
ka.zio = zio;
ka.tx = tx;
- (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- ADVANCE_POST, kill_blkptr, &ka);
+ (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
+ TRAVERSE_POST, kill_blkptr, &ka);
(void) zio_wait(zio);
-
- /* only deduct space beyond any refreservation */
- delta = parent_delta(ds, -used);
- dsl_dir_diduse_space(ds->ds_dir,
- delta, -compressed, -uncompressed, tx);
}
- if (ds->ds_prev) {
+ ASSERT(!(ds->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) ||
+ ds->ds_phys->ds_unique_bytes == 0);
+
+ if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) {
/* Change our contents to that of the prev snapshot */
+
ASSERT3U(ds->ds_prev->ds_object, ==,
ds->ds_phys->ds_prev_snap_obj);
+ ASSERT3U(ds->ds_phys->ds_used_bytes, <=,
+ ds->ds_prev->ds_phys->ds_used_bytes);
+
ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
ds->ds_phys->ds_used_bytes =
ds->ds_prev->ds_phys->ds_used_bytes;
ds->ds_phys->ds_uncompressed_bytes =
ds->ds_prev->ds_phys->ds_uncompressed_bytes;
ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
- ds->ds_phys->ds_unique_bytes = 0;
if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
ds->ds_prev->ds_phys->ds_unique_bytes = 0;
}
} else {
- /* Zero out our contents, recreate objset */
+ objset_impl_t *osi;
+
+ ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0);
+ ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0);
+ ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0);
+
bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
- ds->ds_phys->ds_used_bytes = 0;
- ds->ds_phys->ds_compressed_bytes = 0;
- ds->ds_phys->ds_uncompressed_bytes = 0;
ds->ds_phys->ds_flags = 0;
ds->ds_phys->ds_unique_bytes = 0;
- (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
+ SPA_VERSION_UNIQUE_ACCURATE)
+ ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
+
+ osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
&ds->ds_phys->ds_bp, *ost, tx);
+#ifdef _KERNEL
+ zfs_create_fs(&osi->os, kcred, NULL, tx);
+#endif
}
spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
{
dsl_dataset_t *ds = arg1;
+ /* we have an owner hold, so noone else can destroy us */
+ ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
+
/* Can't delete a branch point. */
if (ds->ds_phys->ds_num_children > 1)
return (EEXIST);
return (0);
}
+struct refsarg {
+ kmutex_t lock;
+ boolean_t gone;
+ kcondvar_t cv;
+};
+
+/* ARGSUSED */
+static void
+dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
+{
+ struct refsarg *arg = argv;
+
+ mutex_enter(&arg->lock);
+ arg->gone = TRUE;
+ cv_signal(&arg->cv);
+ mutex_exit(&arg->lock);
+}
+
+static void
+dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
+{
+ struct refsarg arg;
+
+ mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
+ arg.gone = FALSE;
+ (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
+ dsl_dataset_refs_gone);
+ dmu_buf_rele(ds->ds_dbuf, tag);
+ mutex_enter(&arg.lock);
+ while (!arg.gone)
+ cv_wait(&arg.cv, &arg.lock);
+ ASSERT(arg.gone);
+ mutex_exit(&arg.lock);
+ ds->ds_dbuf = NULL;
+ ds->ds_phys = NULL;
+ mutex_destroy(&arg.lock);
+ cv_destroy(&arg.cv);
+}
+
void
dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
- int64_t used = 0, compressed = 0, uncompressed = 0;
zio_t *zio;
int err;
int after_branch_point = FALSE;
dsl_dataset_t *ds_prev = NULL;
uint64_t obj;
- ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
+ ASSERT(ds->ds_owner);
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
+ /* signal any waiters that this dataset is going away */
+ mutex_enter(&ds->ds_lock);
+ ds->ds_owner = dsl_reaper;
+ cv_broadcast(&ds->ds_exclusive_cv);
+ mutex_exit(&ds->ds_lock);
+
/* Remove our reservation */
if (ds->ds_reserved != 0) {
uint64_t val = 0;
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
+ dsl_pool_ds_destroyed(ds, tx);
+
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
if (ds->ds_prev) {
ds_prev = ds->ds_prev;
} else {
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_prev));
+ VERIFY(0 == dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
}
after_branch_point =
(ds_prev->ds_phys->ds_next_snap_obj != obj);
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
if (after_branch_point &&
+ ds_prev->ds_phys->ds_next_clones_obj != 0) {
+ VERIFY(0 == zap_remove_int(mos,
+ ds_prev->ds_phys->ds_next_clones_obj, obj, tx));
+ if (ds->ds_phys->ds_next_snap_obj != 0) {
+ VERIFY(0 == zap_add_int(mos,
+ ds_prev->ds_phys->ds_next_clones_obj,
+ ds->ds_phys->ds_next_snap_obj, tx));
+ }
+ }
+ if (after_branch_point &&
ds->ds_phys->ds_next_snap_obj == 0) {
/* This clone is toast. */
ASSERT(ds_prev->ds_phys->ds_num_children > 1);
dsl_dataset_t *ds_next;
uint64_t itor = 0;
uint64_t old_unique;
+ int64_t used = 0, compressed = 0, uncompressed = 0;
- spa_scrub_restart(dp->dp_spa, tx->tx_txg);
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_next_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_next));
+ VERIFY(0 == dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
old_unique = dsl_dataset_unique(ds_next);
*
* XXX we're doing this long task with the config lock held
*/
- while (bplist_iterate(&ds_next->ds_deadlist, &itor,
- &bp) == 0) {
+ while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) {
if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
&bp, tx));
compressed += BP_GET_PSIZE(&bp);
uncompressed += BP_GET_UCSIZE(&bp);
/* XXX check return value? */
- (void) arc_free(zio, dp->dp_spa, tx->tx_txg,
+ (void) dsl_free(zio, dp, tx->tx_txg,
&bp, NULL, NULL, ARC_NOWAIT);
}
}
+ ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
+
+ /* change snapused */
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
+ -used, -compressed, -uncompressed, tx);
+
/* free next's deadlist */
bplist_close(&ds_next->ds_deadlist);
bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
/* set next's deadlist to our deadlist */
+ bplist_close(&ds->ds_deadlist);
ds_next->ds_phys->ds_deadlist_obj =
ds->ds_phys->ds_deadlist_obj;
VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
* config lock held
*/
dsl_dataset_t *ds_after_next;
+ uint64_t space;
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds_next->ds_phys->ds_next_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_after_next));
- itor = 0;
- while (bplist_iterate(&ds_after_next->ds_deadlist,
- &itor, &bp) == 0) {
- if (bp.blk_birth >
- ds->ds_phys->ds_prev_snap_txg &&
- bp.blk_birth <=
- ds->ds_phys->ds_creation_txg) {
- ds_next->ds_phys->ds_unique_bytes +=
- bp_get_dasize(dp->dp_spa, &bp);
- }
- }
+ VERIFY(0 == dsl_dataset_hold_obj(dp,
+ ds_next->ds_phys->ds_next_snap_obj,
+ FTAG, &ds_after_next));
- dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
+ VERIFY(0 ==
+ bplist_space_birthrange(&ds_after_next->ds_deadlist,
+ ds->ds_phys->ds_prev_snap_txg,
+ ds->ds_phys->ds_creation_txg, &space));
+ ds_next->ds_phys->ds_unique_bytes += space;
+
+ dsl_dataset_rele(ds_after_next, FTAG);
ASSERT3P(ds_next->ds_prev, ==, NULL);
} else {
ASSERT3P(ds_next->ds_prev, ==, ds);
- dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
- ds_next);
+ dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
+ ds_next->ds_prev = NULL;
if (ds_prev) {
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, ds_next, &ds_next->ds_prev));
- } else {
- ds_next->ds_prev = NULL;
+ VERIFY(0 == dsl_dataset_get_ref(dp,
+ ds->ds_phys->ds_prev_snap_obj,
+ ds_next, &ds_next->ds_prev));
}
dsl_dataset_recalc_head_uniq(ds_next);
ASSERT(old_unique <= new_unique);
mrsdelta = MIN(new_unique - old_unique,
ds_next->ds_reserved - old_unique);
- dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
- 0, 0, tx);
+ dsl_dir_diduse_space(ds->ds_dir,
+ DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
}
}
- dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
-
- /*
- * NB: unique_bytes might not be accurate for the head objset.
- * Before SPA_VERSION 9, we didn't update its value when we
- * deleted the most recent snapshot.
- */
- ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
+ dsl_dataset_rele(ds_next, FTAG);
} else {
/*
* There's no next snapshot, so this is a head dataset.
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
- * XXX we're doing this long task with the config lock held
+ * NB: this should be very quick, because we already
+ * freed all the objects in open context.
*/
- ka.usedp = &used;
- ka.compressedp = &compressed;
- ka.uncompressedp = &uncompressed;
+ ka.ds = ds;
ka.zio = zio;
ka.tx = tx;
- err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- ADVANCE_POST, kill_blkptr, &ka);
+ err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
+ TRAVERSE_POST, kill_blkptr, &ka);
ASSERT3U(err, ==, 0);
- ASSERT(spa_version(dp->dp_spa) <
- SPA_VERSION_UNIQUE_ACCURATE ||
- used == ds->ds_phys->ds_unique_bytes);
+ ASSERT(spa_version(dp->dp_spa) < SPA_VERSION_UNIQUE_ACCURATE ||
+ ds->ds_phys->ds_unique_bytes == 0);
}
err = zio_wait(zio);
ASSERT3U(err, ==, 0);
- dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
-
- if (ds->ds_phys->ds_snapnames_zapobj) {
- err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
- ASSERT(err == 0);
- }
-
if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
- /* Erase the link in the dataset */
+ /* Erase the link in the dir */
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
- /*
- * dsl_dir_sync_destroy() called us, they'll destroy
- * the dataset.
- */
+ ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
+ err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
+ ASSERT(err == 0);
} else {
/* remove from snapshot namespace */
dsl_dataset_t *ds_head;
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_head));
+ ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
+ VERIFY(0 == dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
VERIFY(0 == dsl_dataset_get_snapname(ds));
#ifdef ZFS_DEBUG
{
uint64_t val;
- err = dsl_dataset_snap_lookup(mos,
- ds_head->ds_phys->ds_flags,
- ds_head->ds_phys->ds_snapnames_zapobj,
+ err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
ASSERT3U(err, ==, 0);
ASSERT3U(val, ==, obj);
}
#endif
- err = dsl_dataset_snap_remove(mos,
- ds_head->ds_phys->ds_flags,
- ds_head->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, tx);
+ err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
ASSERT(err == 0);
- dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
+ dsl_dataset_rele(ds_head, FTAG);
}
if (ds_prev && ds->ds_prev != ds_prev)
- dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
+ dsl_dataset_rele(ds_prev, FTAG);
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
cr, "dataset = %llu", ds->ds_object);
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
+ if (ds->ds_phys->ds_next_clones_obj != 0) {
+ uint64_t count;
+ ASSERT(0 == zap_count(mos,
+ ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
+ VERIFY(0 == dmu_object_free(mos,
+ ds->ds_phys->ds_next_clones_obj, tx));
+ }
+ if (ds->ds_phys->ds_props_obj != 0)
+ VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
+ dsl_dir_close(ds->ds_dir, ds);
+ ds->ds_dir = NULL;
+ dsl_dataset_drain_refs(ds, tag);
VERIFY(0 == dmu_object_free(mos, obj, tx));
-
}
static int
{
dsl_dataset_t *ds = arg1;
const char *snapname = arg2;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int err;
uint64_t value;
/*
* Check for conflicting name snapshot name.
*/
- err = dsl_dataset_snap_lookup(mos, ds->ds_phys->ds_flags,
- ds->ds_phys->ds_snapnames_zapobj, snapname, &value);
+ err = dsl_dataset_snap_lookup(ds, snapname, &value);
if (err == 0)
return (EEXIST);
if (err != ENOENT)
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dmu_buf_t *dbuf;
dsl_dataset_phys_t *dsphys;
- uint64_t dsobj;
+ uint64_t dsobj, crtxg;
objset_t *mos = dp->dp_meta_objset;
int err;
- spa_scrub_restart(dp->dp_spa, tx->tx_txg);
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
+ /*
+ * The origin's ds_creation_txg has to be < TXG_INITIAL
+ */
+ if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
+ crtxg = 1;
+ else
+ crtxg = tx->tx_txg;
+
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
+ bzero(dsphys, sizeof (dsl_dataset_phys_t));
dsphys->ds_dir_obj = ds->ds_dir->dd_object;
dsphys->ds_fsid_guid = unique_create();
(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
dsphys->ds_next_snap_obj = ds->ds_object;
dsphys->ds_num_children = 1;
dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
+ dsphys->ds_creation_txg = crtxg;
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
if (ds->ds_prev) {
+ uint64_t next_clones_obj =
+ ds->ds_prev->ds_phys->ds_next_clones_obj;
ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
ds->ds_object ||
ds->ds_prev->ds_phys->ds_num_children > 1);
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
ds->ds_prev->ds_phys->ds_creation_txg);
ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
+ } else if (next_clones_obj != 0) {
+ VERIFY3U(0, ==, zap_remove_int(mos,
+ next_clones_obj, dsphys->ds_next_snap_obj, tx));
+ VERIFY3U(0, ==, zap_add_int(mos,
+ next_clones_obj, dsobj, tx));
}
}
*/
if (ds->ds_reserved) {
int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
- dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
+ add, 0, 0, tx);
}
bplist_close(&ds->ds_deadlist);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
ds->ds_phys->ds_prev_snap_obj = dsobj;
- ds->ds_phys->ds_prev_snap_txg = tx->tx_txg;
+ ds->ds_phys->ds_prev_snap_txg = crtxg;
ds->ds_phys->ds_unique_bytes = 0;
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
ASSERT(err == 0);
if (ds->ds_prev)
- dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, snapname,
- DS_MODE_NONE, ds, &ds->ds_prev));
+ dsl_dataset_drop_ref(ds->ds_prev, ds);
+ VERIFY(0 == dsl_dataset_get_ref(dp,
+ ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
+
+ dsl_pool_ds_snapshotted(ds, tx);
spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
"dataset = %llu", dsobj);
ds->ds_quota);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
ds->ds_reserved);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
+ ds->ds_phys->ds_guid);
if (ds->ds_phys->ds_next_snap_obj) {
/*
/* clone origin is really a dsl_dir thing... */
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- if (ds->ds_dir->dd_phys->dd_origin_obj) {
+ if (dsl_dir_is_clone(ds->ds_dir)) {
dsl_dataset_t *ods;
- VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
- ds->ds_dir->dd_phys->dd_origin_obj,
- NULL, DS_MODE_NONE, FTAG, &ods));
+ VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
+ ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
dsl_dataset_name(ods, stat->dds_origin);
- dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
+ dsl_dataset_drop_ref(ods, FTAG);
}
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
}
dsl_dataset_t *ds = arg1;
char *newsnapname = arg2;
dsl_dir_t *dd = ds->ds_dir;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
dsl_dataset_t *hds;
uint64_t val;
int err;
- err = dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
+ err = dsl_dataset_hold_obj(dd->dd_pool,
+ dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
if (err)
return (err);
/* new name better not be in use */
- err = dsl_dataset_snap_lookup(mos, hds->ds_phys->ds_flags,
- hds->ds_phys->ds_snapnames_zapobj, newsnapname, &val);
- dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
+ err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
+ dsl_dataset_rele(hds, FTAG);
if (err == 0)
err = EEXIST;
ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
+ VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
+ dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
VERIFY(0 == dsl_dataset_get_snapname(ds));
- err = dsl_dataset_snap_remove(mos, hds->ds_phys->ds_flags,
- hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx);
+ err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
ASSERT3U(err, ==, 0);
mutex_enter(&ds->ds_lock);
(void) strcpy(ds->ds_snapname, newsnapname);
spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
cr, "dataset = %llu", ds->ds_object);
- dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
+ dsl_dataset_rele(hds, FTAG);
}
struct renamesnaparg {
* For recursive snapshot renames the parent won't be changing
* so we just pass name for both the to/from argument.
*/
- if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
- (void) strcpy(ra->failed, name);
- return (err);
- }
-
- err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
- ra->dstg, &ds);
+ err = zfs_secpolicy_rename_perms(name, name, CRED());
if (err == ENOENT) {
- *cp = '\0';
return (0);
- }
- if (err) {
+ } else if (err) {
(void) strcpy(ra->failed, name);
- *cp = '\0';
- dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
return (err);
}
#ifdef _KERNEL
- /* for all filesystems undergoing rename, we'll need to unmount it */
+ /*
+ * For all filesystems undergoing rename, we'll need to unmount it.
+ */
(void) zfs_unmount_snap(name, NULL);
#endif
-
+ err = dsl_dataset_hold(name, ra->dstg, &ds);
*cp = '\0';
+ if (err == ENOENT) {
+ return (0);
+ } else if (err) {
+ (void) strcpy(ra->failed, name);
+ return (err);
+ }
dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
(void) strcat(ra->failed, "@");
(void) strcat(ra->failed, ra->newsnap);
}
- dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
+ dsl_dataset_rele(ds, ra->dstg);
}
if (err)
#pragma weak dmu_objset_rename = dsl_dataset_rename
int
-dsl_dataset_rename(char *oldname, const char *newname,
- boolean_t recursive)
+dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
if (tail == NULL) {
int delta = strlen(newname) - strlen(oldname);
- /* if we're growing, validate child size lengths */
+ /* if we're growing, validate child name lengths */
if (delta > 0)
err = dmu_objset_find(oldname, dsl_valid_rename,
&delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (recursive) {
err = dsl_recursive_rename(oldname, newname);
} else {
- err = dsl_dataset_open(oldname,
- DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
+ err = dsl_dataset_hold(oldname, FTAG, &ds);
if (err)
return (err);
dsl_dataset_snapshot_rename_check,
dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
- dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
+ dsl_dataset_rele(ds, FTAG);
}
return (err);
}
+struct promotenode {
+ list_node_t link;
+ dsl_dataset_t *ds;
+};
+
struct promotearg {
- uint64_t used, comp, uncomp, unique;
- uint64_t ds_flags, newnext_obj, snapnames_obj;
+ list_t shared_snaps, origin_snaps, clone_snaps;
+ dsl_dataset_t *origin_origin, *origin_head;
+ uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
};
+static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
+
/* ARGSUSED */
static int
dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *hds = arg1;
struct promotearg *pa = arg2;
- dsl_dir_t *dd = hds->ds_dir;
- dsl_pool_t *dp = hds->ds_dir->dd_pool;
- dsl_dir_t *odd = NULL;
- dsl_dataset_t *ds = NULL;
- dsl_dataset_t *origin_ds = NULL;
- dsl_dataset_t *newnext_ds = NULL;
+ struct promotenode *snap = list_head(&pa->shared_snaps);
+ dsl_dataset_t *origin_ds = snap->ds;
int err;
- char *name = NULL;
- uint64_t itor = 0;
- blkptr_t bp;
- bzero(pa, sizeof (*pa));
-
- /* Check that it is a clone */
- if (dd->dd_phys->dd_origin_obj == 0)
+ /* Check that it is a real clone */
+ if (!dsl_dir_is_clone(hds->ds_dir))
return (EINVAL);
/* Since this is so expensive, don't do the preliminary check */
if (!dmu_tx_is_syncing(tx))
return (0);
- if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
- NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds))
- goto out;
- odd = origin_ds->ds_dir;
-
- {
- dsl_dataset_t *phds;
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- odd->dd_phys->dd_head_dataset_obj,
- NULL, DS_MODE_NONE, FTAG, &phds))
- goto out;
- pa->ds_flags = phds->ds_phys->ds_flags;
- pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
- dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
- }
-
- if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
- err = EXDEV;
- goto out;
- }
-
- /* find origin's new next ds */
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
- NULL, DS_MODE_NONE, FTAG, &newnext_ds));
- while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
- dsl_dataset_t *prev;
-
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- newnext_ds->ds_phys->ds_prev_snap_obj,
- NULL, DS_MODE_NONE, FTAG, &prev))
- goto out;
- dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
- newnext_ds = prev;
- }
- pa->newnext_obj = newnext_ds->ds_object;
+ if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
+ return (EXDEV);
/* compute origin's new unique space */
- while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
- &itor, &bp)) == 0) {
- if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
- pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
- }
- if (err != ENOENT)
- goto out;
+ snap = list_tail(&pa->clone_snaps);
+ ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
+ err = bplist_space_birthrange(&snap->ds->ds_deadlist,
+ origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique);
+ if (err)
+ return (err);
- /* Walk the snapshots that we are moving */
- name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ds = origin_ds;
- /* CONSTCOND */
- while (TRUE) {
+ /*
+ * Walk the snapshots that we are moving
+ *
+ * Compute space to transfer. Consider the incremental changes
+ * to used for each snapshot:
+ * (my used) = (prev's used) + (blocks born) - (blocks killed)
+ * So each snapshot gave birth to:
+ * (blocks born) = (my used) - (prev's used) + (blocks killed)
+ * So a sequence would look like:
+ * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
+ * Which simplifies to:
+ * uN + kN + kN-1 + ... + k1 + k0
+ * Note however, if we stop before we reach the ORIGIN we get:
+ * uN + kN + kN-1 + ... + kM - uM-1
+ */
+ pa->used = origin_ds->ds_phys->ds_used_bytes;
+ pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
+ pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
+ for (snap = list_head(&pa->shared_snaps); snap;
+ snap = list_next(&pa->shared_snaps, snap)) {
uint64_t val, dlused, dlcomp, dluncomp;
- dsl_dataset_t *prev;
+ dsl_dataset_t *ds = snap->ds;
/* Check that the snapshot name does not conflict */
- dsl_dataset_name(ds, name);
- err = dsl_dataset_snap_lookup(dd->dd_pool->dp_meta_objset,
- hds->ds_phys->ds_flags, hds->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, &val);
- if (err != ENOENT) {
- if (err == 0)
- err = EEXIST;
- goto out;
- }
-
- /*
- * compute space to transfer. Each snapshot gave birth to:
- * (my used) - (prev's used) + (deadlist's used)
- */
- pa->used += ds->ds_phys->ds_used_bytes;
- pa->comp += ds->ds_phys->ds_compressed_bytes;
- pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
+ VERIFY(0 == dsl_dataset_get_snapname(ds));
+ err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
+ if (err == 0)
+ return (EEXIST);
+ if (err != ENOENT)
+ return (err);
- /* If we reach the first snapshot, we're done. */
+ /* The very first snapshot does not have a deadlist */
if (ds->ds_phys->ds_prev_snap_obj == 0)
- break;
+ continue;
if (err = bplist_space(&ds->ds_deadlist,
&dlused, &dlcomp, &dluncomp))
- goto out;
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
- FTAG, &prev))
- goto out;
- pa->used += dlused - prev->ds_phys->ds_used_bytes;
- pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
- pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
+ return (err);
+ pa->used += dlused;
+ pa->comp += dlcomp;
+ pa->uncomp += dluncomp;
+ }
- /*
- * We could be a clone of a clone. If we reach our
- * parent's branch point, we're done.
- */
- if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
- dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
- break;
- }
- if (ds != origin_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- ds = prev;
+ /*
+ * If we are a clone of a clone then we never reached ORIGIN,
+ * so we need to subtract out the clone origin's used space.
+ */
+ if (pa->origin_origin) {
+ pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
+ pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
+ pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
/* Check that there is enough space here */
- err = dsl_dir_transfer_possible(odd, dd, pa->used);
+ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
+ pa->used);
+ if (err)
+ return (err);
-out:
- if (ds && ds != origin_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- if (origin_ds)
- dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
- if (newnext_ds)
- dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
- if (name)
- kmem_free(name, MAXPATHLEN);
- return (err);
+ /*
+ * Compute the amounts of space that will be used by snapshots
+ * after the promotion (for both origin and clone). For each,
+ * it is the amount of space that will be on all of their
+ * deadlists (that was not born before their new origin).
+ */
+ if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ uint64_t space;
+
+ /*
+ * Note, typically this will not be a clone of a clone,
+ * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so
+ * these snaplist_space() -> bplist_space_birthrange()
+ * calls will be fast because they do not have to
+ * iterate over all bps.
+ */
+ snap = list_head(&pa->origin_snaps);
+ err = snaplist_space(&pa->shared_snaps,
+ snap->ds->ds_origin_txg, &pa->cloneusedsnap);
+ if (err)
+ return (err);
+
+ err = snaplist_space(&pa->clone_snaps,
+ snap->ds->ds_origin_txg, &space);
+ if (err)
+ return (err);
+ pa->cloneusedsnap += space;
+ }
+ if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ err = snaplist_space(&pa->origin_snaps,
+ origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
+ if (err)
+ return (err);
+ }
+
+ return (0);
}
static void
{
dsl_dataset_t *hds = arg1;
struct promotearg *pa = arg2;
+ struct promotenode *snap = list_head(&pa->shared_snaps);
+ dsl_dataset_t *origin_ds = snap->ds;
+ dsl_dataset_t *origin_head;
dsl_dir_t *dd = hds->ds_dir;
dsl_pool_t *dp = hds->ds_dir->dd_pool;
dsl_dir_t *odd = NULL;
- dsl_dataset_t *ds, *origin_ds;
- char *name;
+ uint64_t oldnext_obj;
+ int64_t delta;
- ASSERT(dd->dd_phys->dd_origin_obj != 0);
ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
- VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
- NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds));
+ snap = list_head(&pa->origin_snaps);
+ origin_head = snap->ds;
+
/*
* We need to explicitly open odd, since origin_ds's dd will be
* changing.
VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
NULL, FTAG, &odd));
- /* move snapshots to this dir */
- name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ds = origin_ds;
- /* CONSTCOND */
- while (TRUE) {
- dsl_dataset_t *prev;
+ /* change origin's next snap */
+ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
+ oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
+ snap = list_tail(&pa->clone_snaps);
+ ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
+ origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
+
+ /* change the origin's next clone */
+ if (origin_ds->ds_phys->ds_next_clones_obj) {
+ VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+ origin_ds->ds_phys->ds_next_clones_obj,
+ origin_ds->ds_phys->ds_next_snap_obj, tx));
+ VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
+ origin_ds->ds_phys->ds_next_clones_obj,
+ oldnext_obj, tx));
+ }
+
+ /* change origin */
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
+ dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
+ hds->ds_origin_txg = origin_head->ds_origin_txg;
+ dmu_buf_will_dirty(odd->dd_dbuf, tx);
+ odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
+ origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg;
+ /* move snapshots to this dir */
+ for (snap = list_head(&pa->shared_snaps); snap;
+ snap = list_next(&pa->shared_snaps, snap)) {
+ dsl_dataset_t *ds = snap->ds;
+
+ /* unregister props as dsl_dir is changing */
+ if (ds->ds_user_ptr) {
+ ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+ ds->ds_user_ptr = NULL;
+ }
/* move snap name entry */
- dsl_dataset_name(ds, name);
- VERIFY(0 == dsl_dataset_snap_remove(dp->dp_meta_objset,
- pa->ds_flags, pa->snapnames_obj, ds->ds_snapname, tx));
+ VERIFY(0 == dsl_dataset_get_snapname(ds));
+ VERIFY(0 == dsl_dataset_snap_remove(origin_head,
+ ds->ds_snapname, tx));
VERIFY(0 == zap_add(dp->dp_meta_objset,
hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
8, 1, &ds->ds_object, tx));
-
/* change containing dsl_dir */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
NULL, ds, &ds->ds_dir));
ASSERT3U(dsl_prop_numcb(ds), ==, 0);
-
- if (ds->ds_phys->ds_prev_snap_obj == 0)
- break;
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
- FTAG, &prev));
-
- if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
- dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
- break;
- }
- if (ds != origin_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- ds = prev;
}
- if (ds != origin_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- /* change origin's next snap */
- dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
- origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
+ /*
+ * Change space accounting.
+ * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
+ * both be valid, or both be 0 (resulting in delta == 0). This
+ * is true for each of {clone,origin} independently.
+ */
- /* change origin */
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
- dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
- dmu_buf_will_dirty(odd->dd_dbuf, tx);
- odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
+ delta = pa->cloneusedsnap -
+ dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
+ ASSERT3S(delta, >=, 0);
+ ASSERT3U(pa->used, >=, delta);
+ dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
+ dsl_dir_diduse_space(dd, DD_USED_HEAD,
+ pa->used - delta, pa->comp, pa->uncomp, tx);
+
+ delta = pa->originusedsnap -
+ odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
+ ASSERT3S(delta, <=, 0);
+ ASSERT3U(pa->used, >=, -delta);
+ dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
+ dsl_dir_diduse_space(odd, DD_USED_HEAD,
+ -pa->used - delta, -pa->comp, -pa->uncomp, tx);
- /* change space accounting */
- dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
- dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
origin_ds->ds_phys->ds_unique_bytes = pa->unique;
/* log history record */
spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
- cr, "dataset = %llu", ds->ds_object);
+ cr, "dataset = %llu", hds->ds_object);
dsl_dir_close(odd, FTAG);
- dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
- kmem_free(name, MAXPATHLEN);
}
+static char *snaplist_tag = "snaplist";
+/*
+ * Make a list of dsl_dataset_t's for the snapshots between first_obj
+ * (exclusive) and last_obj (inclusive). The list will be in reverse
+ * order (last_obj will be the list_head()). If first_obj == 0, do all
+ * snapshots back to this dataset's origin.
+ */
+static int
+snaplist_make(dsl_pool_t *dp, boolean_t own,
+ uint64_t first_obj, uint64_t last_obj, list_t *l)
+{
+ uint64_t obj = last_obj;
+
+ ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
+
+ list_create(l, sizeof (struct promotenode),
+ offsetof(struct promotenode, link));
+
+ while (obj != first_obj) {
+ dsl_dataset_t *ds;
+ struct promotenode *snap;
+ int err;
+
+ if (own) {
+ err = dsl_dataset_own_obj(dp, obj,
+ 0, snaplist_tag, &ds);
+ if (err == 0)
+ dsl_dataset_make_exclusive(ds, snaplist_tag);
+ } else {
+ err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
+ }
+ if (err == ENOENT) {
+ /* lost race with snapshot destroy */
+ struct promotenode *last = list_tail(l);
+ ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
+ obj = last->ds->ds_phys->ds_prev_snap_obj;
+ continue;
+ } else if (err) {
+ return (err);
+ }
+
+ if (first_obj == 0)
+ first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
+
+ snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
+ snap->ds = ds;
+ list_insert_tail(l, snap);
+ obj = ds->ds_phys->ds_prev_snap_obj;
+ }
+
+ return (0);
+}
+
+static int
+snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
+{
+ struct promotenode *snap;
+
+ *spacep = 0;
+ for (snap = list_head(l); snap; snap = list_next(l, snap)) {
+ uint64_t used;
+ int err = bplist_space_birthrange(&snap->ds->ds_deadlist,
+ mintxg, UINT64_MAX, &used);
+ if (err)
+ return (err);
+ *spacep += used;
+ }
+ return (0);
+}
+
+static void
+snaplist_destroy(list_t *l, boolean_t own)
+{
+ struct promotenode *snap;
+
+ if (!list_link_active(&l->list_head))
+ return;
+
+ while ((snap = list_tail(l)) != NULL) {
+ list_remove(l, snap);
+ if (own)
+ dsl_dataset_disown(snap->ds, snaplist_tag);
+ else
+ dsl_dataset_rele(snap->ds, snaplist_tag);
+ kmem_free(snap, sizeof (struct promotenode));
+ }
+ list_destroy(l);
+}
+
+/*
+ * Promote a clone. Nomenclature note:
+ * "clone" or "cds": the original clone which is being promoted
+ * "origin" or "ods": the snapshot which is originally clone's origin
+ * "origin head" or "ohds": the dataset which is the head
+ * (filesystem/volume) for the origin
+ * "origin origin": the origin of the origin's filesystem (typically
+ * NULL, indicating that the clone is not a clone of a clone).
+ */
int
dsl_dataset_promote(const char *name)
{
dsl_dataset_t *ds;
- int err;
+ dsl_dir_t *dd;
+ dsl_pool_t *dp;
dmu_object_info_t doi;
- struct promotearg pa;
+ struct promotearg pa = { 0 };
+ struct promotenode *snap;
+ int err;
- err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
+ err = dsl_dataset_hold(name, FTAG, &ds);
if (err)
return (err);
+ dd = ds->ds_dir;
+ dp = dd->dd_pool;
- err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
+ err = dmu_object_info(dp->dp_meta_objset,
ds->ds_phys->ds_snapnames_zapobj, &doi);
if (err) {
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+ dsl_dataset_rele(ds, FTAG);
return (err);
}
+ if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
+
+ /*
+ * We are going to inherit all the snapshots taken before our
+ * origin (i.e., our new origin will be our parent's origin).
+ * Take ownership of them so that we can rename them into our
+ * namespace.
+ */
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+
+ err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
+ &pa.shared_snaps);
+ if (err != 0)
+ goto out;
+
+ err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
+ if (err != 0)
+ goto out;
+
+ snap = list_head(&pa.shared_snaps);
+ ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
+ err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
+ snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
+ if (err != 0)
+ goto out;
+
+ if (dsl_dir_is_clone(snap->ds->ds_dir)) {
+ err = dsl_dataset_own_obj(dp,
+ snap->ds->ds_dir->dd_phys->dd_origin_obj,
+ 0, FTAG, &pa.origin_origin);
+ if (err != 0)
+ goto out;
+ }
+
+out:
+ rw_exit(&dp->dp_config_rwlock);
+
/*
* Add in 128x the snapnames zapobj size, since we will be moving
* a bunch of snapnames to the promoted ds, and dirtying their
* bonus buffers.
*/
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_promote_check,
- dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+ if (err == 0) {
+ err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
+ dsl_dataset_promote_sync, ds, &pa,
+ 2 + 2 * doi.doi_physical_blks);
+ }
+
+ snaplist_destroy(&pa.shared_snaps, B_TRUE);
+ snaplist_destroy(&pa.clone_snaps, B_FALSE);
+ snaplist_destroy(&pa.origin_snaps, B_FALSE);
+ if (pa.origin_origin)
+ dsl_dataset_disown(pa.origin_origin, FTAG);
+ dsl_dataset_rele(ds, FTAG);
return (err);
}
{
struct cloneswaparg *csa = arg1;
dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
- uint64_t itor = 0;
- blkptr_t bp;
- uint64_t unique = 0;
- int err;
ASSERT(csa->cds->ds_reserved == 0);
ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota);
csa->ohds->ds_user_ptr = NULL;
}
- /* compute unique space */
- while ((err = bplist_iterate(&csa->cds->ds_deadlist,
- &itor, &bp)) == 0) {
- if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
- unique += bp_get_dasize(dp->dp_spa, &bp);
- }
- VERIFY(err == ENOENT);
-
/* reset origin's unique bytes */
- csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
+ VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
+ csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX,
+ &csa->cds->ds_prev->ds_phys->ds_unique_bytes));
/* swap blkptrs */
{
uint64_t cdl_used, cdl_comp, cdl_uncomp;
uint64_t odl_used, odl_comp, odl_uncomp;
+ ASSERT3U(csa->cds->ds_dir->dd_phys->
+ dd_used_breakdown[DD_USED_SNAP], ==, 0);
+
VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
&cdl_comp, &cdl_uncomp));
VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
&odl_comp, &odl_uncomp));
+
dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
(csa->ohds->ds_phys->ds_used_bytes + odl_used);
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
cdl_uncomp -
(csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
- dsl_dir_diduse_space(csa->ohds->ds_dir,
+ dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
dused, dcomp, duncomp, tx);
- dsl_dir_diduse_space(csa->cds->ds_dir,
+ dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
-dused, -dcomp, -duncomp, tx);
+
+ /*
+ * The difference in the space used by snapshots is the
+ * difference in snapshot space due to the head's
+ * deadlist (since that's the only thing that's
+ * changing that affects the snapused).
+ */
+ VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
+ csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used));
+ VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist,
+ csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used));
+ dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
+ DD_USED_HEAD, DD_USED_SNAP, tx);
}
#define SWITCH64(x, y) \
csa->cds->ds_phys->ds_unique_bytes);
/* apply any parent delta for change in unconsumed refreservation */
- dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta,
- 0, 0, tx);
+ dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
+ csa->unused_refres_delta, 0, 0, tx);
/* swap deadlists */
bplist_close(&csa->cds->ds_deadlist);
csa->cds->ds_phys->ds_deadlist_obj));
VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
csa->ohds->ds_phys->ds_deadlist_obj));
+
+ dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx);
}
/*
- * Swap 'clone' with its origin head file system.
+ * Swap 'clone' with its origin head file system. Used at the end
+ * of "online recv" to swizzle the file system to the new version.
*/
int
dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force)
{
struct cloneswaparg csa;
+ int error;
- ASSERT(clone->ds_open_refcount == DS_REF_MAX);
- ASSERT(origin_head->ds_open_refcount == DS_REF_MAX);
-
+ ASSERT(clone->ds_owner);
+ ASSERT(origin_head->ds_owner);
+retry:
+ /* Need exclusive access for the swap */
+ rw_enter(&clone->ds_rwlock, RW_WRITER);
+ if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
+ rw_exit(&clone->ds_rwlock);
+ rw_enter(&origin_head->ds_rwlock, RW_WRITER);
+ if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
+ rw_exit(&origin_head->ds_rwlock);
+ goto retry;
+ }
+ }
csa.cds = clone;
csa.ohds = origin_head;
csa.force = force;
- return (dsl_sync_task_do(clone->ds_dir->dd_pool,
+ error = dsl_sync_task_do(clone->ds_dir->dd_pool,
dsl_dataset_clone_swap_check,
- dsl_dataset_clone_swap_sync, &csa, NULL, 9));
+ dsl_dataset_clone_swap_sync, &csa, NULL, 9);
+ return (error);
}
/*
{
spa_t *spa;
dsl_pool_t *dp;
- dsl_dataset_t *ds = NULL;
+ dsl_dataset_t *ds;
int error;
if ((error = spa_open(pname, &spa, FTAG)) != 0)
return (error);
dp = spa_get_dsl(spa);
rw_enter(&dp->dp_config_rwlock, RW_READER);
- if ((error = dsl_dataset_open_obj(dp, obj,
- NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- return (error);
+ if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
+ dsl_dataset_name(ds, buf);
+ dsl_dataset_rele(ds, FTAG);
}
- dsl_dataset_name(ds, buf);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
rw_exit(&dp->dp_config_rwlock);
spa_close(spa, FTAG);
- return (0);
+ return (error);
}
int
dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
- uint64_t asize, uint64_t inflight, uint64_t *used,
- uint64_t *ref_rsrv)
+ uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
{
int error = 0;
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- mutex_enter(&ds->ds_lock);
ds->ds_quota = new_quota;
- mutex_exit(&ds->ds_lock);
dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
tx, cr, "%lld dataset = %llu ",
- (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj);
+ (longlong_t)new_quota, ds->ds_object);
}
int
dsl_dataset_t *ds;
int err;
- err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
+ err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
ds, "a, 0);
}
- dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
+ dsl_dataset_rele(ds, FTAG);
return (err);
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
unique = dsl_dataset_unique(ds);
delta = MAX(0, (int64_t)(new_reservation - unique)) -
ds->ds_reserved = new_reservation;
mutex_exit(&ds->ds_lock);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
+ mutex_exit(&ds->ds_dir->dd_lock);
dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
new_reservation, cr, tx);
- dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
-
spa_history_internal_log(LOG_DS_REFRESERV,
ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
- (longlong_t)new_reservation,
- ds->ds_dir->dd_phys->dd_head_dataset_obj);
+ (longlong_t)new_reservation, ds->ds_object);
}
int
dsl_dataset_t *ds;
int err;
- err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
+ err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
dsl_dataset_set_reservation_check,
dsl_dataset_set_reservation_sync, ds, &reservation, 0);
- dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
+ dsl_dataset_rele(ds, FTAG);
return (err);
}