Illumos #1092: zfs refratio property
[zfs.git] / module / zfs / dsl_dataset.c
index ddd8357..26362c9 100644 (file)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dsl_deadlist.h>
 
-/*
- * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
- */
-int zfs_dedup_prefetch = 1;
-
 static char *dsl_reaper = "the grim reaper";
 
 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
@@ -89,11 +86,13 @@ parent_delta(dsl_dataset_t *ds, int64_t delta)
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
-       int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
-       int compressed = BP_GET_PSIZE(bp);
-       int uncompressed = BP_GET_UCSIZE(bp);
+       int used, compressed, uncompressed;
        int64_t delta;
 
+       used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+       compressed = BP_GET_PSIZE(bp);
+       uncompressed = BP_GET_UCSIZE(bp);
+
        dprintf_bp(bp, "ds=%p", ds);
 
        ASSERT(dmu_tx_is_syncing(tx));
@@ -134,15 +133,17 @@ int
 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
     boolean_t async)
 {
+       int used, compressed, uncompressed;
+
        if (BP_IS_HOLE(bp))
                return (0);
 
        ASSERT(dmu_tx_is_syncing(tx));
        ASSERT(bp->blk_birth <= tx->tx_txg);
 
-       int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
-       int compressed = BP_GET_PSIZE(bp);
-       int uncompressed = BP_GET_UCSIZE(bp);
+       used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+       compressed = BP_GET_PSIZE(bp);
+       uncompressed = BP_GET_UCSIZE(bp);
 
        ASSERT(used > 0);
        if (ds == NULL) {
@@ -253,8 +254,7 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
        if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
                return (B_FALSE);
 
-       if (zfs_dedup_prefetch && bp && BP_GET_DEDUP(bp))
-               ddt_prefetch(dsl_dataset_get_spa(ds), bp);
+       ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 
        return (B_TRUE);
 }
@@ -372,6 +372,7 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
        dmu_buf_t *dbuf;
        dsl_dataset_t *ds;
        int err;
+       dmu_object_info_t doi;
 
        ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
            dsl_pool_sync_context(dp));
@@ -379,19 +380,26 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
        err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
        if (err)
                return (err);
+
+       /* Make sure dsobj has the correct object type. */
+       dmu_object_info_from_db(dbuf, &doi);
+       if (doi.doi_type != DMU_OT_DSL_DATASET)
+               return (EINVAL);
+
        ds = dmu_buf_get_user(dbuf);
        if (ds == NULL) {
-               dsl_dataset_t *winner;
+               dsl_dataset_t *winner = NULL;
 
                ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
                ds->ds_dbuf = dbuf;
                ds->ds_object = dsobj;
                ds->ds_phys = dbuf->db_data;
+               list_link_init(&ds->ds_synced_link);
 
                mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
                mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
                mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
-               rw_init(&ds->ds_rwlock, 0, 0, 0);
+               rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL);
                cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 
                bplist_create(&ds->ds_pending_deadlist);
@@ -881,6 +889,21 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
 
        dsl_dir_close(dd, FTAG);
 
+       /*
+        * If we are creating a clone, make sure we zero out any stale
+        * data from the origin snapshots zil header.
+        */
+       if (origin != NULL) {
+               dsl_dataset_t *ds;
+               objset_t *os;
+
+               VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+               VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
+               bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+               dsl_dataset_dirty(ds, tx);
+               dsl_dataset_rele(ds, FTAG);
+       }
+
        return (dsobj);
 }
 
@@ -1033,7 +1056,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
        dsl_dir_t *dd;
        uint64_t obj;
        struct dsl_ds_destroyarg dsda = { 0 };
-       dsl_dataset_t dummy_ds = { 0 };
+       dsl_dataset_t *dummy_ds;
 
        dsda.ds = ds;
 
@@ -1053,8 +1076,9 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
        }
 
        dd = ds->ds_dir;
-       dummy_ds.ds_dir = dd;
-       dummy_ds.ds_object = ds->ds_object;
+       dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
+       dummy_ds->ds_dir = dd;
+       dummy_ds->ds_object = ds->ds_object;
 
        /*
         * Check for errors and mark this ds as inconsistent, in
@@ -1063,11 +1087,11 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
        err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
            dsl_dataset_destroy_begin_sync, ds, NULL, 0);
        if (err)
-               goto out;
+               goto out_free;
 
        err = dmu_objset_from_ds(ds, &os);
        if (err)
-               goto out;
+               goto out_free;
 
        /*
         * remove the objects in open context, so that we won't
@@ -1081,11 +1105,16 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
                 */
                (void) dmu_free_object(os, obj);
        }
+       if (err != ESRCH)
+               goto out_free;
+
+       /*
+        * Only the ZIL knows how to free log blocks.
+        */
+       zil_destroy(dmu_objset_zil(os), B_FALSE);
 
        /*
-        * We need to sync out all in-flight IO before we try to evict
-        * (the dataset evict func is trying to clear the cached entries
-        * for this dataset in the ARC).
+        * Sync out all in-flight IO.
         */
        txg_wait_synced(dd->dd_pool, 0);
 
@@ -1095,23 +1124,19 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
         */
        if (ds->ds_phys->ds_bp.blk_fill == 0 &&
            dmu_objset_userused_enabled(os)) {
-               uint64_t count;
-
+               ASSERTV(uint64_t count);
                ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
                    count == 0);
                ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
                    count == 0);
        }
 
-       if (err != ESRCH)
-               goto out;
-
        rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
        err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
        rw_exit(&dd->dd_pool->dp_config_rwlock);
 
        if (err)
-               goto out;
+               goto out_free;
 
        /*
         * Blow away the dsl_dir + head dataset.
@@ -1127,7 +1152,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
                        err = dsl_dataset_origin_rm_prep(&dsda, tag);
                        if (err) {
                                dsl_dir_close(dd, FTAG);
-                               goto out;
+                               goto out_free;
                        }
                }
 
@@ -1135,7 +1160,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
                dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
                    dsl_dataset_destroy_sync, &dsda, tag, 0);
                dsl_sync_task_create(dstg, dsl_dir_destroy_check,
-                   dsl_dir_destroy_sync, &dummy_ds, FTAG, 0);
+                   dsl_dir_destroy_sync, dummy_ds, FTAG, 0);
                err = dsl_sync_task_group_wait(dstg);
                dsl_sync_task_group_destroy(dstg);
 
@@ -1158,6 +1183,9 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
        /* if it is successful, dsl_dir_destroy_sync will close the dd */
        if (err)
                dsl_dir_close(dd, FTAG);
+
+out_free:
+       kmem_free(dummy_ds, sizeof (dsl_dataset_t));
 out:
        dsl_dataset_disown(ds, tag);
        return (err);
@@ -1356,6 +1384,11 @@ dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
        return (0);
 }
 
+/*
+ * If you add new checks here, you may need to add
+ * additional checks to the "temporary" case in
+ * snapshot_check() in dmu_objset.c.
+ */
 /* ARGSUSED */
 int
 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
@@ -1467,8 +1500,8 @@ static void
 remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
 {
        objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-       uint64_t count;
        int err;
+       ASSERTV(uint64_t count);
 
        ASSERT(ds->ds_phys->ds_num_children >= 2);
        err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
@@ -1597,21 +1630,23 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
        dsl_pool_t *dp = ds->ds_dir->dd_pool;
        objset_t *mos = dp->dp_meta_objset;
        dsl_dataset_t *ds_prev = NULL;
+       boolean_t wont_destroy;
        uint64_t obj;
 
-       ASSERT(ds->ds_owner);
+       wont_destroy = (dsda->defer &&
+           (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
+
+       ASSERT(ds->ds_owner || wont_destroy);
        ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
        ASSERT(ds->ds_prev == NULL ||
            ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
        ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 
-       if (dsda->defer) {
+       if (wont_destroy) {
                ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
-               if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) {
-                       dmu_buf_will_dirty(ds->ds_dbuf, tx);
-                       ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
-                       return;
-               }
+               dmu_buf_will_dirty(ds->ds_dbuf, tx);
+               ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+               return;
        }
 
        /* signal any waiters that this dataset is going away */
@@ -1620,11 +1655,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
        cv_broadcast(&ds->ds_exclusive_cv);
        mutex_exit(&ds->ds_lock);
 
-       if (ds->ds_objset) {
-               dmu_objset_evict(ds->ds_objset);
-               ds->ds_objset = NULL;
-       }
-
        /* Remove our reservation */
        if (ds->ds_reserved != 0) {
                dsl_prop_setarg_t psa;
@@ -1751,6 +1781,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 
                if (dsl_dataset_is_snapshot(ds_next)) {
                        dsl_dataset_t *ds_nextnext;
+                       dsl_dataset_t *hds;
 
                        /*
                         * Update next's unique to include blocks which
@@ -1773,7 +1804,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
                        ASSERT3P(ds_next->ds_prev, ==, NULL);
 
                        /* Collapse range in this head. */
-                       dsl_dataset_t *hds;
                        VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
                            ds->ds_dir->dd_phys->dd_head_dataset_obj,
                            FTAG, &hds));
@@ -1850,6 +1880,15 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
                }
        }
 
+       /*
+        * This must be done after the dsl_traverse(), because it will
+        * re-open the objset.
+        */
+       if (ds->ds_objset) {
+               dmu_objset_evict(ds->ds_objset);
+               ds->ds_objset = NULL;
+       }
+
        if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
                /* Erase the link in the dir */
                dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
@@ -1887,7 +1926,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
            "dataset = %llu", ds->ds_object);
 
        if (ds->ds_phys->ds_next_clones_obj != 0) {
-               uint64_t count;
+               ASSERTV(uint64_t count);
                ASSERT(0 == zap_count(mos,
                    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
                VERIFY(0 == dmu_object_free(mos,
@@ -1928,7 +1967,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
         */
        ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
        asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
-       if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
+       if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
                return (ENOSPC);
 
        /*
@@ -2115,7 +2154,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
-       uint64_t refd, avail, uobjs, aobjs;
+       uint64_t refd, avail, uobjs, aobjs, ratio;
 
        dsl_dir_stats(ds->ds_dir, nv);
 
@@ -2142,6 +2181,11 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
        dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
            DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
 
+       ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
+           (ds->ds_phys->ds_uncompressed_bytes * 100 /
+           ds->ds_phys->ds_compressed_bytes);
+       dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+
        if (ds->ds_phys->ds_next_snap_obj) {
                /*
                 * This is a snapshot; override the dd's space used with
@@ -2149,10 +2193,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
                 */
                dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
                    ds->ds_phys->ds_unique_bytes);
-               dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
-                   ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
-                   (ds->ds_phys->ds_uncompressed_bytes * 100 /
-                   ds->ds_phys->ds_compressed_bytes));
+               dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
        }
 }
 
@@ -2217,15 +2258,28 @@ dsl_dataset_space(dsl_dataset_t *ds,
 boolean_t
 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
 {
-       dsl_pool_t *dp = ds->ds_dir->dd_pool;
+       ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
 
        ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
            dsl_pool_sync_context(dp));
        if (ds->ds_prev == NULL)
                return (B_FALSE);
        if (ds->ds_phys->ds_bp.blk_birth >
-           ds->ds_prev->ds_phys->ds_creation_txg)
-               return (B_TRUE);
+           ds->ds_prev->ds_phys->ds_creation_txg) {
+               objset_t *os, *os_prev;
+               /*
+                * It may be that only the ZIL differs, because it was
+                * reset in the head.  Don't count that as being
+                * modified.
+                */
+               if (dmu_objset_from_ds(ds, &os) != 0)
+                       return (B_TRUE);
+               if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
+                       return (B_TRUE);
+               return (bcmp(&os->os_phys->os_meta_dnode,
+                   &os_prev->os_phys->os_meta_dnode,
+                   sizeof (os->os_phys->os_meta_dnode)) != 0);
+       }
        return (B_FALSE);
 }
 
@@ -2319,7 +2373,8 @@ dsl_snapshot_rename_one(const char *name, void *arg)
                return (err == ENOENT ? 0 : err);
        }
 
-#ifdef _KERNEL
+/* XXX: Ignore for SPL version until mounting the FS is supported */
+#if defined(_KERNEL) && !defined(HAVE_SPL)
        /*
         * For all filesystems undergoing rename, we'll need to unmount it.
         */
@@ -2475,7 +2530,6 @@ struct promotearg {
 };
 
 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
-static boolean_t snaplist_unstable(list_t *l);
 
 static int
 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
@@ -2876,10 +2930,11 @@ dsl_dataset_promote(const char *name, char *conflsnap)
        dsl_dir_t *dd;
        dsl_pool_t *dp;
        dmu_object_info_t doi;
-       struct promotearg pa = { 0 };
+       struct promotearg pa;
        struct promotenode *snap;
        int err;
 
+       bzero(&pa, sizeof(struct promotearg));
        err = dsl_dataset_hold(name, FTAG, &ds);
        if (err)
                return (err);
@@ -3144,9 +3199,14 @@ dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
        ASSERT(clone->ds_owner);
        ASSERT(origin_head->ds_owner);
 retry:
-       /* Need exclusive access for the swap */
-       rw_enter(&clone->ds_rwlock, RW_WRITER);
-       if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
+       /*
+        * Need exclusive access for the swap. If we're swapping these
+        * datasets back after an error, we already hold the locks.
+        */
+       if (!RW_WRITE_HELD(&clone->ds_rwlock))
+               rw_enter(&clone->ds_rwlock, RW_WRITER);
+       if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
+           !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
                rw_exit(&clone->ds_rwlock);
                rw_enter(&origin_head->ds_rwlock, RW_WRITER);
                if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
@@ -3411,22 +3471,41 @@ dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
        return (err);
 }
 
-struct dsl_ds_holdarg {
-       dsl_sync_task_group_t *dstg;
-       char *htag;
-       char *snapname;
-       boolean_t recursive;
-       boolean_t gotone;
-       boolean_t temphold;
-       char failed[MAXPATHLEN];
-};
+typedef struct zfs_hold_cleanup_arg {
+       dsl_pool_t *dp;
+       uint64_t dsobj;
+       char htag[MAXNAMELEN];
+} zfs_hold_cleanup_arg_t;
+
+static void
+dsl_dataset_user_release_onexit(void *arg)
+{
+       zfs_hold_cleanup_arg_t *ca = arg;
+
+       (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
+           B_TRUE);
+       kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
+}
+
+void
+dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
+    minor_t minor)
+{
+       zfs_hold_cleanup_arg_t *ca;
+
+       ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
+       ca->dp = ds->ds_dir->dd_pool;
+       ca->dsobj = ds->ds_object;
+       (void) strlcpy(ca->htag, htag, sizeof (ca->htag));
+       VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
+           dsl_dataset_user_release_onexit, ca, NULL));
+}
 
 /*
- * The max length of a temporary tag prefix is the number of hex digits
- * required to express UINT64_MAX plus one for the hyphen.
+ * If you add new checks here, you may need to add
+ * additional checks to the "temporary" case in
+ * snapshot_check() in dmu_objset.c.
  */
-#define        MAX_TAG_PREFIX_LEN      17
-
 static int
 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
@@ -3461,7 +3540,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
        return (error);
 }
 
-static void
+void
 dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
        dsl_dataset_t *ds = arg1;
@@ -3524,13 +3603,41 @@ dsl_dataset_user_hold_one(const char *dsname, void *arg)
 }
 
 int
+dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
+    boolean_t temphold)
+{
+       struct dsl_ds_holdarg *ha;
+       int error;
+
+       ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+       ha->htag = htag;
+       ha->temphold = temphold;
+       error = dsl_sync_task_do(ds->ds_dir->dd_pool,
+           dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
+           ds, ha, 0);
+       kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+
+       return (error);
+}
+
+int
 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
-    boolean_t recursive, boolean_t temphold)
+    boolean_t recursive, boolean_t temphold, int cleanup_fd)
 {
        struct dsl_ds_holdarg *ha;
        dsl_sync_task_t *dst;
        spa_t *spa;
        int error;
+       minor_t minor = 0;
+
+       if (cleanup_fd != -1) {
+               /* Currently we only support cleanup-on-exit of tempholds. */
+               if (!temphold)
+                       return (EINVAL);
+               error = zfs_onexit_fd_hold(cleanup_fd, &minor);
+               if (error)
+                       return (error);
+       }
 
        ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
@@ -3539,6 +3646,8 @@ dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
        error = spa_open(dsname, &spa, FTAG);
        if (error) {
                kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+               if (cleanup_fd != -1)
+                       zfs_onexit_fd_rele(cleanup_fd);
                return (error);
        }
 
@@ -3547,6 +3656,7 @@ dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
        ha->snapname = snapname;
        ha->recursive = recursive;
        ha->temphold = temphold;
+
        if (recursive) {
                error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
                    ha, DS_FIND_CHILDREN);
@@ -3563,6 +3673,12 @@ dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
                if (dst->dst_err) {
                        dsl_dataset_name(ds, ha->failed);
                        *strchr(ha->failed, '@') = '\0';
+               } else if (error == 0 && minor != 0 && temphold) {
+                       /*
+                        * If this hold is to be released upon process exit,
+                        * register that action now.
+                        */
+                       dsl_register_onexit_hold_cleanup(ds, htag, minor);
                }
                dsl_dataset_rele(ds, ha->dstg);
        }
@@ -3574,8 +3690,11 @@ dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
                (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
        dsl_sync_task_group_destroy(ha->dstg);
+
        kmem_free(ha, sizeof (struct dsl_ds_holdarg));
        spa_close(spa, FTAG);
+       if (cleanup_fd != -1)
+               zfs_onexit_fd_rele(cleanup_fd);
        return (error);
 }
 
@@ -3667,11 +3786,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
        uint64_t refs;
        int error;
 
-       if (ds->ds_objset) {
-               dmu_objset_evict(ds->ds_objset);
-               ds->ds_objset = NULL;
-       }
-
        mutex_enter(&ds->ds_lock);
        ds->ds_userrefs--;
        refs = ds->ds_userrefs;
@@ -3831,10 +3945,12 @@ top:
 }
 
 /*
- * Called at spa_load time to release a stale temporary user hold.
+ * Called at spa_load time (with retry == B_FALSE) to release a stale
+ * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
  */
 int
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag)
+dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
+    boolean_t retry)
 {
        dsl_dataset_t *ds;
        char *snap;
@@ -3842,20 +3958,36 @@ dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag)
        int namelen;
        int error;
 
-       rw_enter(&dp->dp_config_rwlock, RW_READER);
-       error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
-       rw_exit(&dp->dp_config_rwlock);
-       if (error)
-               return (error);
-       namelen = dsl_dataset_namelen(ds)+1;
-       name = kmem_alloc(namelen, KM_SLEEP);
-       dsl_dataset_name(ds, name);
-       dsl_dataset_rele(ds, FTAG);
+       do {
+               rw_enter(&dp->dp_config_rwlock, RW_READER);
+               error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+               rw_exit(&dp->dp_config_rwlock);
+               if (error)
+                       return (error);
+               namelen = dsl_dataset_namelen(ds)+1;
+               name = kmem_alloc(namelen, KM_SLEEP);
+               dsl_dataset_name(ds, name);
+               dsl_dataset_rele(ds, FTAG);
+
+               snap = strchr(name, '@');
+               *snap = '\0';
+               ++snap;
+               error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
+               kmem_free(name, namelen);
 
-       snap = strchr(name, '@');
-       *snap = '\0';
-       ++snap;
-       return (dsl_dataset_user_release(name, snap, htag, B_FALSE));
+               /*
+                * The object can't have been destroyed because we have a hold,
+                * but it might have been renamed, resulting in ENOENT.  Retry
+                * if we've been requested to do so.
+                *
+                * It would be nice if we could use the dsobj all the way
+                * through and avoid ENOENT entirely.  But we might need to
+                * unmount the snapshot, and there's currently no way to lookup
+                * a vfsp using a ZFS object id.
+                */
+       } while ((error == ENOENT) && retry);
+
+       return (error);
 }
 
 int
@@ -3908,3 +4040,51 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
        }
        return (0);
 }
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_dataset_hold);
+EXPORT_SYMBOL(dsl_dataset_hold_obj);
+EXPORT_SYMBOL(dsl_dataset_own);
+EXPORT_SYMBOL(dsl_dataset_own_obj);
+EXPORT_SYMBOL(dsl_dataset_name);
+EXPORT_SYMBOL(dsl_dataset_rele);
+EXPORT_SYMBOL(dsl_dataset_disown);
+EXPORT_SYMBOL(dsl_dataset_drop_ref);
+EXPORT_SYMBOL(dsl_dataset_tryown);
+EXPORT_SYMBOL(dsl_dataset_make_exclusive);
+EXPORT_SYMBOL(dsl_dataset_create_sync);
+EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
+EXPORT_SYMBOL(dsl_dataset_destroy);
+EXPORT_SYMBOL(dsl_snapshots_destroy);
+EXPORT_SYMBOL(dsl_dataset_destroy_check);
+EXPORT_SYMBOL(dsl_dataset_destroy_sync);
+EXPORT_SYMBOL(dsl_dataset_snapshot_check);
+EXPORT_SYMBOL(dsl_dataset_snapshot_sync);
+EXPORT_SYMBOL(dsl_dataset_rename);
+EXPORT_SYMBOL(dsl_dataset_promote);
+EXPORT_SYMBOL(dsl_dataset_clone_swap);
+EXPORT_SYMBOL(dsl_dataset_user_hold);
+EXPORT_SYMBOL(dsl_dataset_user_release);
+EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
+EXPORT_SYMBOL(dsl_dataset_get_holds);
+EXPORT_SYMBOL(dsl_dataset_get_blkptr);
+EXPORT_SYMBOL(dsl_dataset_set_blkptr);
+EXPORT_SYMBOL(dsl_dataset_get_spa);
+EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap);
+EXPORT_SYMBOL(dsl_dataset_sync);
+EXPORT_SYMBOL(dsl_dataset_block_born);
+EXPORT_SYMBOL(dsl_dataset_block_kill);
+EXPORT_SYMBOL(dsl_dataset_block_freeable);
+EXPORT_SYMBOL(dsl_dataset_prev_snap_txg);
+EXPORT_SYMBOL(dsl_dataset_dirty);
+EXPORT_SYMBOL(dsl_dataset_stats);
+EXPORT_SYMBOL(dsl_dataset_fast_stat);
+EXPORT_SYMBOL(dsl_dataset_space);
+EXPORT_SYMBOL(dsl_dataset_fsid_guid);
+EXPORT_SYMBOL(dsl_dsobj_to_dsname);
+EXPORT_SYMBOL(dsl_dataset_check_quota);
+EXPORT_SYMBOL(dsl_dataset_set_quota);
+EXPORT_SYMBOL(dsl_dataset_set_quota_sync);
+EXPORT_SYMBOL(dsl_dataset_set_reservation);
+EXPORT_SYMBOL(dsl_destroy_inconsistent);
+#endif