#include <sys/zfs_ioctl.h>
#include <sys/spa.h>
#include <sys/zfs_znode.h>
+#include <sys/zfs_onexit.h>
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
#include <sys/dsl_deadlist.h>
-/*
- * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
- */
-int zfs_dedup_prefetch = 1;
-
static char *dsl_reaper = "the grim reaper";
static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
void
dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
{
- int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
- int compressed = BP_GET_PSIZE(bp);
- int uncompressed = BP_GET_UCSIZE(bp);
+ int used, compressed, uncompressed;
int64_t delta;
+ used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+ compressed = BP_GET_PSIZE(bp);
+ uncompressed = BP_GET_UCSIZE(bp);
+
dprintf_bp(bp, "ds=%p", ds);
ASSERT(dmu_tx_is_syncing(tx));
dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
boolean_t async)
{
+ int used, compressed, uncompressed;
+
if (BP_IS_HOLE(bp))
return (0);
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(bp->blk_birth <= tx->tx_txg);
- int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
- int compressed = BP_GET_PSIZE(bp);
- int uncompressed = BP_GET_UCSIZE(bp);
+ used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+ compressed = BP_GET_PSIZE(bp);
+ uncompressed = BP_GET_UCSIZE(bp);
ASSERT(used > 0);
if (ds == NULL) {
if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
return (B_FALSE);
- if (zfs_dedup_prefetch && bp && BP_GET_DEDUP(bp))
- ddt_prefetch(dsl_dataset_get_spa(ds), bp);
+ ddt_prefetch(dsl_dataset_get_spa(ds), bp);
return (B_TRUE);
}
dmu_buf_t *dbuf;
dsl_dataset_t *ds;
int err;
+ dmu_object_info_t doi;
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
dsl_pool_sync_context(dp));
err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
if (err)
return (err);
+
+ /* Make sure dsobj has the correct object type. */
+ dmu_object_info_from_db(dbuf, &doi);
+ if (doi.doi_type != DMU_OT_DSL_DATASET)
+ return (EINVAL);
+
ds = dmu_buf_get_user(dbuf);
if (ds == NULL) {
- dsl_dataset_t *winner;
+ dsl_dataset_t *winner = NULL;
ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
ds->ds_dbuf = dbuf;
ds->ds_object = dsobj;
ds->ds_phys = dbuf->db_data;
+ list_link_init(&ds->ds_synced_link);
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
- rw_init(&ds->ds_rwlock, 0, 0, 0);
+ rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL);
cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
bplist_create(&ds->ds_pending_deadlist);
dsl_dir_close(dd, FTAG);
+ /*
+ * If we are creating a clone, make sure we zero out any stale
+ * data from the origin snapshots zil header.
+ */
+ if (origin != NULL) {
+ dsl_dataset_t *ds;
+ objset_t *os;
+
+ VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+ VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
+ bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+ dsl_dataset_dirty(ds, tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
return (dsobj);
}
dsl_dir_t *dd;
uint64_t obj;
struct dsl_ds_destroyarg dsda = { 0 };
- dsl_dataset_t dummy_ds = { 0 };
+ dsl_dataset_t *dummy_ds;
dsda.ds = ds;
}
dd = ds->ds_dir;
- dummy_ds.ds_dir = dd;
- dummy_ds.ds_object = ds->ds_object;
+ dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
+ dummy_ds->ds_dir = dd;
+ dummy_ds->ds_object = ds->ds_object;
/*
* Check for errors and mark this ds as inconsistent, in
err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
if (err)
- goto out;
+ goto out_free;
err = dmu_objset_from_ds(ds, &os);
if (err)
- goto out;
+ goto out_free;
/*
* remove the objects in open context, so that we won't
*/
(void) dmu_free_object(os, obj);
}
+ if (err != ESRCH)
+ goto out_free;
/*
- * We need to sync out all in-flight IO before we try to evict
- * (the dataset evict func is trying to clear the cached entries
- * for this dataset in the ARC).
+ * Only the ZIL knows how to free log blocks.
+ */
+ zil_destroy(dmu_objset_zil(os), B_FALSE);
+
+ /*
+ * Sync out all in-flight IO.
*/
txg_wait_synced(dd->dd_pool, 0);
*/
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
dmu_objset_userused_enabled(os)) {
- uint64_t count;
-
+ ASSERTV(uint64_t count);
ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
count == 0);
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
count == 0);
}
- if (err != ESRCH)
- goto out;
-
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
rw_exit(&dd->dd_pool->dp_config_rwlock);
if (err)
- goto out;
+ goto out_free;
/*
* Blow away the dsl_dir + head dataset.
err = dsl_dataset_origin_rm_prep(&dsda, tag);
if (err) {
dsl_dir_close(dd, FTAG);
- goto out;
+ goto out_free;
}
}
dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
dsl_dataset_destroy_sync, &dsda, tag, 0);
dsl_sync_task_create(dstg, dsl_dir_destroy_check,
- dsl_dir_destroy_sync, &dummy_ds, FTAG, 0);
+ dsl_dir_destroy_sync, dummy_ds, FTAG, 0);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
/* if it is successful, dsl_dir_destroy_sync will close the dd */
if (err)
dsl_dir_close(dd, FTAG);
+
+out_free:
+ kmem_free(dummy_ds, sizeof (dsl_dataset_t));
out:
dsl_dataset_disown(ds, tag);
return (err);
return (0);
}
+/*
+ * If you add new checks here, you may need to add
+ * additional checks to the "temporary" case in
+ * snapshot_check() in dmu_objset.c.
+ */
/* ARGSUSED */
int
dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- uint64_t count;
int err;
+ ASSERTV(uint64_t count);
ASSERT(ds->ds_phys->ds_num_children >= 2);
err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
dsl_dataset_t *ds_prev = NULL;
+ boolean_t wont_destroy;
uint64_t obj;
- ASSERT(ds->ds_owner);
+ wont_destroy = (dsda->defer &&
+ (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
+
+ ASSERT(ds->ds_owner || wont_destroy);
ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
ASSERT(ds->ds_prev == NULL ||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
- if (dsda->defer) {
+ if (wont_destroy) {
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
- if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) {
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
- return;
- }
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+ return;
}
/* signal any waiters that this dataset is going away */
cv_broadcast(&ds->ds_exclusive_cv);
mutex_exit(&ds->ds_lock);
- if (ds->ds_objset) {
- dmu_objset_evict(ds->ds_objset);
- ds->ds_objset = NULL;
- }
-
/* Remove our reservation */
if (ds->ds_reserved != 0) {
dsl_prop_setarg_t psa;
if (dsl_dataset_is_snapshot(ds_next)) {
dsl_dataset_t *ds_nextnext;
+ dsl_dataset_t *hds;
/*
* Update next's unique to include blocks which
ASSERT3P(ds_next->ds_prev, ==, NULL);
/* Collapse range in this head. */
- dsl_dataset_t *hds;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj,
FTAG, &hds));
}
}
+ /*
+ * This must be done after the dsl_traverse(), because it will
+ * re-open the objset.
+ */
+ if (ds->ds_objset) {
+ dmu_objset_evict(ds->ds_objset);
+ ds->ds_objset = NULL;
+ }
+
if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
/* Erase the link in the dir */
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
"dataset = %llu", ds->ds_object);
if (ds->ds_phys->ds_next_clones_obj != 0) {
- uint64_t count;
+ ASSERTV(uint64_t count);
ASSERT(0 == zap_count(mos,
ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
VERIFY(0 == dmu_object_free(mos,
*/
ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
- if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
+ if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
return (ENOSPC);
/*
boolean_t
dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
dsl_pool_sync_context(dp));
if (ds->ds_prev == NULL)
return (B_FALSE);
if (ds->ds_phys->ds_bp.blk_birth >
- ds->ds_prev->ds_phys->ds_creation_txg)
- return (B_TRUE);
+ ds->ds_prev->ds_phys->ds_creation_txg) {
+ objset_t *os, *os_prev;
+ /*
+ * It may be that only the ZIL differs, because it was
+ * reset in the head. Don't count that as being
+ * modified.
+ */
+ if (dmu_objset_from_ds(ds, &os) != 0)
+ return (B_TRUE);
+ if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
+ return (B_TRUE);
+ return (bcmp(&os->os_phys->os_meta_dnode,
+ &os_prev->os_phys->os_meta_dnode,
+ sizeof (os->os_phys->os_meta_dnode)) != 0);
+ }
return (B_FALSE);
}
};
static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
-static boolean_t snaplist_unstable(list_t *l);
static int
dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_t *dd;
dsl_pool_t *dp;
dmu_object_info_t doi;
- struct promotearg pa = { 0 };
+ struct promotearg pa;
struct promotenode *snap;
int err;
+ bzero(&pa, sizeof(struct promotearg));
err = dsl_dataset_hold(name, FTAG, &ds);
if (err)
return (err);
ASSERT(clone->ds_owner);
ASSERT(origin_head->ds_owner);
retry:
- /* Need exclusive access for the swap */
- rw_enter(&clone->ds_rwlock, RW_WRITER);
- if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
+ /*
+ * Need exclusive access for the swap. If we're swapping these
+ * datasets back after an error, we already hold the locks.
+ */
+ if (!RW_WRITE_HELD(&clone->ds_rwlock))
+ rw_enter(&clone->ds_rwlock, RW_WRITER);
+ if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
+ !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
rw_exit(&clone->ds_rwlock);
rw_enter(&origin_head->ds_rwlock, RW_WRITER);
if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
return (err);
}
-struct dsl_ds_holdarg {
- dsl_sync_task_group_t *dstg;
- char *htag;
- char *snapname;
- boolean_t recursive;
- boolean_t gotone;
- boolean_t temphold;
- char failed[MAXPATHLEN];
-};
+typedef struct zfs_hold_cleanup_arg {
+ dsl_pool_t *dp;
+ uint64_t dsobj;
+ char htag[MAXNAMELEN];
+} zfs_hold_cleanup_arg_t;
+
+static void
+dsl_dataset_user_release_onexit(void *arg)
+{
+ zfs_hold_cleanup_arg_t *ca = arg;
+
+ (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
+ B_TRUE);
+ kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
+}
+
+void
+dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
+ minor_t minor)
+{
+ zfs_hold_cleanup_arg_t *ca;
+
+ ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
+ ca->dp = ds->ds_dir->dd_pool;
+ ca->dsobj = ds->ds_object;
+ (void) strlcpy(ca->htag, htag, sizeof (ca->htag));
+ VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
+ dsl_dataset_user_release_onexit, ca, NULL));
+}
/*
- * The max length of a temporary tag prefix is the number of hex digits
- * required to express UINT64_MAX plus one for the hyphen.
+ * If you add new checks here, you may need to add
+ * additional checks to the "temporary" case in
+ * snapshot_check() in dmu_objset.c.
*/
-#define MAX_TAG_PREFIX_LEN 17
-
static int
dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
return (error);
}
-static void
+void
dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
}
int
+dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
+ boolean_t temphold)
+{
+ struct dsl_ds_holdarg *ha;
+ int error;
+
+ ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+ ha->htag = htag;
+ ha->temphold = temphold;
+ error = dsl_sync_task_do(ds->ds_dir->dd_pool,
+ dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
+ ds, ha, 0);
+ kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+
+ return (error);
+}
+
+int
dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
- boolean_t recursive, boolean_t temphold)
+ boolean_t recursive, boolean_t temphold, int cleanup_fd)
{
struct dsl_ds_holdarg *ha;
dsl_sync_task_t *dst;
spa_t *spa;
int error;
+ minor_t minor = 0;
+
+ if (cleanup_fd != -1) {
+ /* Currently we only support cleanup-on-exit of tempholds. */
+ if (!temphold)
+ return (EINVAL);
+ error = zfs_onexit_fd_hold(cleanup_fd, &minor);
+ if (error)
+ return (error);
+ }
ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
error = spa_open(dsname, &spa, FTAG);
if (error) {
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+ if (cleanup_fd != -1)
+ zfs_onexit_fd_rele(cleanup_fd);
return (error);
}
ha->snapname = snapname;
ha->recursive = recursive;
ha->temphold = temphold;
+
if (recursive) {
error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
ha, DS_FIND_CHILDREN);
if (dst->dst_err) {
dsl_dataset_name(ds, ha->failed);
*strchr(ha->failed, '@') = '\0';
+ } else if (error == 0 && minor != 0 && temphold) {
+ /*
+ * If this hold is to be released upon process exit,
+ * register that action now.
+ */
+ dsl_register_onexit_hold_cleanup(ds, htag, minor);
}
dsl_dataset_rele(ds, ha->dstg);
}
(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
dsl_sync_task_group_destroy(ha->dstg);
+
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
spa_close(spa, FTAG);
+ if (cleanup_fd != -1)
+ zfs_onexit_fd_rele(cleanup_fd);
return (error);
}
uint64_t refs;
int error;
- if (ds->ds_objset) {
- dmu_objset_evict(ds->ds_objset);
- ds->ds_objset = NULL;
- }
-
mutex_enter(&ds->ds_lock);
ds->ds_userrefs--;
refs = ds->ds_userrefs;
}
/*
- * Called at spa_load time to release a stale temporary user hold.
+ * Called at spa_load time (with retry == B_FALSE) to release a stale
+ * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
*/
int
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag)
+dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
+ boolean_t retry)
{
dsl_dataset_t *ds;
char *snap;
int namelen;
int error;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- if (error)
- return (error);
- namelen = dsl_dataset_namelen(ds)+1;
- name = kmem_alloc(namelen, KM_SLEEP);
- dsl_dataset_name(ds, name);
- dsl_dataset_rele(ds, FTAG);
+ do {
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ rw_exit(&dp->dp_config_rwlock);
+ if (error)
+ return (error);
+ namelen = dsl_dataset_namelen(ds)+1;
+ name = kmem_alloc(namelen, KM_SLEEP);
+ dsl_dataset_name(ds, name);
+ dsl_dataset_rele(ds, FTAG);
- snap = strchr(name, '@');
- *snap = '\0';
- ++snap;
- return (dsl_dataset_user_release(name, snap, htag, B_FALSE));
+ snap = strchr(name, '@');
+ *snap = '\0';
+ ++snap;
+ error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
+ kmem_free(name, namelen);
+
+ /*
+ * The object can't have been destroyed because we have a hold,
+ * but it might have been renamed, resulting in ENOENT. Retry
+ * if we've been requested to do so.
+ *
+ * It would be nice if we could use the dsobj all the way
+ * through and avoid ENOENT entirely. But we might need to
+ * unmount the snapshot, and there's currently no way to lookup
+ * a vfsp using a ZFS object id.
+ */
+ } while ((error == ENOENT) && retry);
+
+ return (error);
}
int