+
+struct dsl_ds_holdarg {
+ dsl_sync_task_group_t *dstg;
+ char *htag;
+ char *snapname;
+ boolean_t recursive;
+ boolean_t gotone;
+ boolean_t temphold;
+ char failed[MAXPATHLEN];
+};
+
+/*
+ * The max length of a temporary tag prefix is the number of hex digits
+ * required to express UINT64_MAX plus one for the hyphen.
+ */
+#define MAX_TAG_PREFIX_LEN 17
+
+static int
+dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ struct dsl_ds_holdarg *ha = arg2;
+ char *htag = ha->htag;
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ int error = 0;
+
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
+ return (ENOTSUP);
+
+ if (!dsl_dataset_is_snapshot(ds))
+ return (EINVAL);
+
+ /* tags must be unique */
+ mutex_enter(&ds->ds_lock);
+ if (ds->ds_phys->ds_userrefs_obj) {
+ error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
+ 8, 1, tx);
+ if (error == 0)
+ error = EEXIST;
+ else if (error == ENOENT)
+ error = 0;
+ }
+ mutex_exit(&ds->ds_lock);
+
+ if (error == 0 && ha->temphold &&
+ strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
+ error = E2BIG;
+
+ return (error);
+}
+
+static void
+dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ struct dsl_ds_holdarg *ha = arg2;
+ char *htag = ha->htag;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t now = gethrestime_sec();
+ uint64_t zapobj;
+
+ mutex_enter(&ds->ds_lock);
+ if (ds->ds_phys->ds_userrefs_obj == 0) {
+ /*
+ * This is the first user hold for this dataset. Create
+ * the userrefs zap object.
+ */
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ zapobj = ds->ds_phys->ds_userrefs_obj =
+ zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
+ } else {
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ }
+ ds->ds_userrefs++;
+ mutex_exit(&ds->ds_lock);
+
+ VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
+
+ if (ha->temphold) {
+ VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
+ htag, &now, tx));
+ }
+
+ spa_history_log_internal(LOG_DS_USER_HOLD,
+ dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
+ (int)ha->temphold, ds->ds_object);
+}
+
+static int
+dsl_dataset_user_hold_one(const char *dsname, void *arg)
+{
+ struct dsl_ds_holdarg *ha = arg;
+ dsl_dataset_t *ds;
+ int error;
+ char *name;
+
+ /* alloc a buffer to hold dsname@snapname plus terminating NULL */
+ name = kmem_asprintf("%s@%s", dsname, ha->snapname);
+ error = dsl_dataset_hold(name, ha->dstg, &ds);
+ strfree(name);
+ if (error == 0) {
+ ha->gotone = B_TRUE;
+ dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
+ dsl_dataset_user_hold_sync, ds, ha, 0);
+ } else if (error == ENOENT && ha->recursive) {
+ error = 0;
+ } else {
+ (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+ }
+ return (error);
+}
+
+int
+dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
+ boolean_t recursive, boolean_t temphold)
+{
+ struct dsl_ds_holdarg *ha;
+ dsl_sync_task_t *dst;
+ spa_t *spa;
+ int error;
+
+ ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+
+ (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+
+ error = spa_open(dsname, &spa, FTAG);
+ if (error) {
+ kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+ return (error);
+ }
+
+ ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+ ha->htag = htag;
+ ha->snapname = snapname;
+ ha->recursive = recursive;
+ ha->temphold = temphold;
+ if (recursive) {
+ error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
+ ha, DS_FIND_CHILDREN);
+ } else {
+ error = dsl_dataset_user_hold_one(dsname, ha);
+ }
+ if (error == 0)
+ error = dsl_sync_task_group_wait(ha->dstg);
+
+ for (dst = list_head(&ha->dstg->dstg_tasks); dst;
+ dst = list_next(&ha->dstg->dstg_tasks, dst)) {
+ dsl_dataset_t *ds = dst->dst_arg1;
+
+ if (dst->dst_err) {
+ dsl_dataset_name(ds, ha->failed);
+ *strchr(ha->failed, '@') = '\0';
+ }
+ dsl_dataset_rele(ds, ha->dstg);
+ }
+
+ if (error == 0 && recursive && !ha->gotone)
+ error = ENOENT;
+
+ if (error)
+ (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
+
+ dsl_sync_task_group_destroy(ha->dstg);
+ kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+ spa_close(spa, FTAG);
+ return (error);
+}
+
+struct dsl_ds_releasearg {
+ dsl_dataset_t *ds;
+ const char *htag;
+ boolean_t own; /* do we own or just hold ds? */
+};
+
+static int
+dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
+ boolean_t *might_destroy)
+{
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ uint64_t zapobj;
+ uint64_t tmp;
+ int error;
+
+ *might_destroy = B_FALSE;
+
+ mutex_enter(&ds->ds_lock);
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ if (zapobj == 0) {
+ /* The tag can't possibly exist */
+ mutex_exit(&ds->ds_lock);
+ return (ESRCH);
+ }
+
+ /* Make sure the tag exists */
+ error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
+ if (error) {
+ mutex_exit(&ds->ds_lock);
+ if (error == ENOENT)
+ error = ESRCH;
+ return (error);
+ }
+
+ if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
+ DS_IS_DEFER_DESTROY(ds))
+ *might_destroy = B_TRUE;
+
+ mutex_exit(&ds->ds_lock);
+ return (0);
+}
+
+static int
+dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
+{
+ struct dsl_ds_releasearg *ra = arg1;
+ dsl_dataset_t *ds = ra->ds;
+ boolean_t might_destroy;
+ int error;
+
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
+ return (ENOTSUP);
+
+ error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
+ if (error)
+ return (error);
+
+ if (might_destroy) {
+ struct dsl_ds_destroyarg dsda = {0};
+
+ if (dmu_tx_is_syncing(tx)) {
+ /*
+ * If we're not prepared to remove the snapshot,
+ * we can't allow the release to happen right now.
+ */
+ if (!ra->own)
+ return (EBUSY);
+ }
+ dsda.ds = ds;
+ dsda.releasing = B_TRUE;
+ return (dsl_dataset_destroy_check(&dsda, tag, tx));
+ }
+
+ return (0);
+}
+
+static void
+dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
+{
+ struct dsl_ds_releasearg *ra = arg1;
+ dsl_dataset_t *ds = ra->ds;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t zapobj;
+ uint64_t dsobj = ds->ds_object;
+ uint64_t refs;
+ int error;
+
+ if (ds->ds_objset) {
+ dmu_objset_evict(ds->ds_objset);
+ ds->ds_objset = NULL;
+ }
+
+ mutex_enter(&ds->ds_lock);
+ ds->ds_userrefs--;
+ refs = ds->ds_userrefs;
+ mutex_exit(&ds->ds_lock);
+ error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
+ VERIFY(error == 0 || error == ENOENT);
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
+ if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
+ DS_IS_DEFER_DESTROY(ds)) {
+ struct dsl_ds_destroyarg dsda = {0};
+
+ ASSERT(ra->own);
+ dsda.ds = ds;
+ dsda.releasing = B_TRUE;
+ /* We already did the destroy_check */
+ dsl_dataset_destroy_sync(&dsda, tag, tx);
+ }
+
+ spa_history_log_internal(LOG_DS_USER_RELEASE,
+ dp->dp_spa, tx, "<%s> %lld dataset = %llu",
+ ra->htag, (longlong_t)refs, dsobj);
+}
+
+static int
+dsl_dataset_user_release_one(const char *dsname, void *arg)
+{
+ struct dsl_ds_holdarg *ha = arg;
+ struct dsl_ds_releasearg *ra;
+ dsl_dataset_t *ds;
+ int error;
+ void *dtag = ha->dstg;
+ char *name;
+ boolean_t own = B_FALSE;
+ boolean_t might_destroy;
+
+ /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
+ name = kmem_asprintf("%s@%s", dsname, ha->snapname);
+ error = dsl_dataset_hold(name, dtag, &ds);
+ strfree(name);
+ if (error == ENOENT && ha->recursive)
+ return (0);
+ (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+ if (error)
+ return (error);
+
+ ha->gotone = B_TRUE;
+
+ ASSERT(dsl_dataset_is_snapshot(ds));
+
+ error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
+ if (error) {
+ dsl_dataset_rele(ds, dtag);
+ return (error);
+ }
+
+ if (might_destroy) {
+#ifdef _KERNEL
+ name = kmem_asprintf("%s@%s", dsname, ha->snapname);
+ error = zfs_unmount_snap(name, NULL);
+ strfree(name);
+ if (error) {
+ dsl_dataset_rele(ds, dtag);
+ return (error);
+ }
+#endif
+ if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
+ dsl_dataset_rele(ds, dtag);
+ return (EBUSY);
+ } else {
+ own = B_TRUE;
+ dsl_dataset_make_exclusive(ds, dtag);
+ }
+ }
+
+ ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
+ ra->ds = ds;
+ ra->htag = ha->htag;
+ ra->own = own;
+ dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
+ dsl_dataset_user_release_sync, ra, dtag, 0);
+
+ return (0);
+}
+
+int
+dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
+ boolean_t recursive)
+{
+ struct dsl_ds_holdarg *ha;
+ dsl_sync_task_t *dst;
+ spa_t *spa;
+ int error;
+
+top:
+ ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+
+ (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+
+ error = spa_open(dsname, &spa, FTAG);
+ if (error) {
+ kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+ return (error);
+ }
+
+ ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+ ha->htag = htag;
+ ha->snapname = snapname;
+ ha->recursive = recursive;
+ if (recursive) {
+ error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
+ ha, DS_FIND_CHILDREN);
+ } else {
+ error = dsl_dataset_user_release_one(dsname, ha);
+ }
+ if (error == 0)
+ error = dsl_sync_task_group_wait(ha->dstg);
+
+ for (dst = list_head(&ha->dstg->dstg_tasks); dst;
+ dst = list_next(&ha->dstg->dstg_tasks, dst)) {
+ struct dsl_ds_releasearg *ra = dst->dst_arg1;
+ dsl_dataset_t *ds = ra->ds;
+
+ if (dst->dst_err)
+ dsl_dataset_name(ds, ha->failed);
+
+ if (ra->own)
+ dsl_dataset_disown(ds, ha->dstg);
+ else
+ dsl_dataset_rele(ds, ha->dstg);
+
+ kmem_free(ra, sizeof (struct dsl_ds_releasearg));
+ }
+
+ if (error == 0 && recursive && !ha->gotone)
+ error = ENOENT;
+
+ if (error && error != EBUSY)
+ (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
+
+ dsl_sync_task_group_destroy(ha->dstg);
+ kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+ spa_close(spa, FTAG);
+
+ /*
+ * We can get EBUSY if we were racing with deferred destroy and
+ * dsl_dataset_user_release_check() hadn't done the necessary
+ * open context setup. We can also get EBUSY if we're racing
+ * with destroy and that thread is the ds_owner. Either way
+ * the busy condition should be transient, and we should retry
+ * the release operation.
+ */
+ if (error == EBUSY)
+ goto top;
+
+ return (error);
+}
+
+/*
+ * Called at spa_load time to release a stale temporary user hold.
+ */
+int
+dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag)
+{
+ dsl_dataset_t *ds;
+ char *snap;
+ char *name;
+ int namelen;
+ int error;
+
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ rw_exit(&dp->dp_config_rwlock);
+ if (error)
+ return (error);
+ namelen = dsl_dataset_namelen(ds)+1;
+ name = kmem_alloc(namelen, KM_SLEEP);
+ dsl_dataset_name(ds, name);
+ dsl_dataset_rele(ds, FTAG);
+
+ snap = strchr(name, '@');
+ *snap = '\0';
+ ++snap;
+ return (dsl_dataset_user_release(name, snap, htag, B_FALSE));
+}
+
+int
+dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
+{
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_hold(dsname, FTAG, &ds);
+ if (err)
+ return (err);
+
+ VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
+ if (ds->ds_phys->ds_userrefs_obj != 0) {
+ zap_attribute_t *za;
+ zap_cursor_t zc;
+
+ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+ for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_phys->ds_userrefs_obj);
+ zap_cursor_retrieve(&zc, za) == 0;
+ zap_cursor_advance(&zc)) {
+ VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
+ za->za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+ kmem_free(za, sizeof (zap_attribute_t));
+ }
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+/*
+ * Note, this fuction is used as the callback for dmu_objset_find(). We
+ * always return 0 so that we will continue to find and process
+ * inconsistent datasets, even if we encounter an error trying to
+ * process one of them.
+ */
+/* ARGSUSED */
+int
+dsl_destroy_inconsistent(const char *dsname, void *arg)
+{
+ dsl_dataset_t *ds;
+
+ if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
+ if (DS_IS_INCONSISTENT(ds))
+ (void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
+ else
+ dsl_dataset_disown(ds, FTAG);
+ }
+ return (0);
+}