+static void
+ztest_zd_fini(ztest_ds_t *zd)
+{
+ int l;
+
+ mutex_destroy(&zd->zd_dirobj_lock);
+ rw_destroy(&zd->zd_zilog_lock);
+
+ for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_object_lock[l]);
+
+ for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_range_lock[l]);
+}
+
+#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
+
+static uint64_t
+ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
+{
+ uint64_t txg;
+ int error;
+
+ /*
+ * Attempt to assign tx to some transaction group.
+ */
+ error = dmu_tx_assign(tx, txg_how);
+ if (error) {
+ if (error == ERESTART) {
+ ASSERT(txg_how == TXG_NOWAIT);
+ dmu_tx_wait(tx);
+ } else {
+ ASSERT3U(error, ==, ENOSPC);
+ ztest_record_enospc(tag);
+ }
+ dmu_tx_abort(tx);
+ return (0);
+ }
+ txg = dmu_tx_get_txg(tx);
+ ASSERT(txg != 0);
+ return (txg);
+}
+
+static void
+ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
+{
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+
+ while (ip < ip_end)
+ *ip++ = value;
+}
+
+#ifndef NDEBUG
+static boolean_t
+ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
+{
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+ uint64_t diff = 0;
+
+ while (ip < ip_end)
+ diff |= (value - *ip++);
+
+ return (diff == 0);
+}
+#endif
+
+static void
+ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ bt->bt_magic = BT_MAGIC;
+ bt->bt_objset = dmu_objset_id(os);
+ bt->bt_object = object;
+ bt->bt_offset = offset;
+ bt->bt_gen = gen;
+ bt->bt_txg = txg;
+ bt->bt_crtxg = crtxg;
+}
+
+static void
+ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ ASSERT(bt->bt_magic == BT_MAGIC);
+ ASSERT(bt->bt_objset == dmu_objset_id(os));
+ ASSERT(bt->bt_object == object);
+ ASSERT(bt->bt_offset == offset);
+ ASSERT(bt->bt_gen <= gen);
+ ASSERT(bt->bt_txg <= txg);
+ ASSERT(bt->bt_crtxg == crtxg);
+}
+
+static ztest_block_tag_t *
+ztest_bt_bonus(dmu_buf_t *db)
+{
+ dmu_object_info_t doi;
+ ztest_block_tag_t *bt;
+
+ dmu_object_info_from_db(db, &doi);
+ ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
+ ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
+ bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
+
+ return (bt);
+}
+
+/*
+ * ZIL logging ops
+ */
+
+#define lrz_type lr_mode
+#define lrz_blocksize lr_uid
+#define lrz_ibshift lr_gid
+#define lrz_bonustype lr_rdev
+#define lrz_bonuslen lr_crtime[1]
+
+static void
+ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
+
+ itx->itx_oid = object;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
+{
+ itx_t *itx;
+ itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ if (lr->lr_length > ZIL_MAX_LOG_DATA)
+ write_state = WR_INDIRECT;
+
+ itx = zil_itx_create(TX_WRITE,
+ sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
+
+ if (write_state == WR_COPIED &&
+ dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
+ zil_itx_destroy(itx);
+ itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+ write_state = WR_NEED_COPY;
+ }
+ itx->itx_private = zd;
+ itx->itx_wr_state = write_state;
+ itx->itx_sync = (ztest_random(8) == 0);
+ itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);
+
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
+{
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
+{
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+/*
+ * ZIL replay ops
+ */
+static int
+ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ ztest_block_tag_t *bbt;
+ dmu_buf_t *db;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ int error = 0;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+ } else {
+ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+ }
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0)
+ return (ENOSPC);
+
+ ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = zap_create(os,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = zap_create_claim(os, lr->lr_foid,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ } else {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = dmu_object_alloc(os,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = dmu_object_claim(os, lr->lr_foid,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ }
+
+ if (error) {
+ ASSERT3U(error, ==, EEXIST);
+ ASSERT(zd->zd_zilog->zl_replay);
+ dmu_tx_commit(tx);
+ return (error);
+ }
+
+ ASSERT(lr->lr_foid != 0);
+
+ if (lr->lrz_type != DMU_OT_ZAP_OTHER)
+ VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
+ lr->lrz_blocksize, lr->lrz_ibshift, tx));
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+ bbt = ztest_bt_bonus(db);
+ dmu_buf_will_dirty(db, tx);
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
+ dmu_buf_rele(db, FTAG);
+
+ VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
+ &lr->lr_foid, tx));
+
+ (void) ztest_log_create(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ return (0);
+}
+
+static int
+ztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ dmu_object_info_t doi;
+ dmu_tx_t *tx;
+ uint64_t object, txg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
+ VERIFY3U(0, ==,
+ zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
+ ASSERT(object != 0);
+
+ ztest_object_lock(zd, object, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
+ dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_object_unlock(zd, object);
+ return (ENOSPC);
+ }
+
+ if (doi.doi_type == DMU_OT_ZAP_OTHER) {
+ VERIFY3U(0, ==, zap_destroy(os, object, tx));
+ } else {
+ VERIFY3U(0, ==, dmu_object_free(os, object, tx));
+ }
+
+ VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
+
+ (void) ztest_log_remove(zd, tx, lr, object);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, object);
+
+ return (0);
+}
+
+static int
+ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ void *data = lr + 1; /* data follows lr */
+ uint64_t offset, length;
+ ztest_block_tag_t *bt = data;
+ ztest_block_tag_t *bbt;
+ uint64_t gen, txg, lrtxg, crtxg;
+ dmu_object_info_t doi;
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ arc_buf_t *abuf = NULL;
+ rl_t *rl;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+
+ /* If it's a dmu_sync() block, write the whole block */
+ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+ uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
+ if (length < blocksize) {
+ offset -= offset % blocksize;
+ length = blocksize;
+ }
+ }
+
+ if (bt->bt_magic == BSWAP_64(BT_MAGIC))
+ byteswap_uint64_array(bt, sizeof (*bt));
+
+ if (bt->bt_magic != BT_MAGIC)
+ bt = NULL;
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ dmu_object_info_from_db(db, &doi);
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ gen = bbt->bt_gen;
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
+
+ if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
+ P2PHASE(offset, length) == 0)
+ abuf = dmu_request_arcbuf(db, length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ if (abuf != NULL)
+ dmu_return_arcbuf(abuf);
+ dmu_buf_rele(db, FTAG);
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ if (bt != NULL) {
+ /*
+ * Usually, verify the old data before writing new data --
+ * but not always, because we also want to verify correct
+ * behavior when the data was not recently read into cache.
+ */
+ ASSERT(offset % doi.doi_data_block_size == 0);
+ if (ztest_random(4) != 0) {
+ int prefetch = ztest_random(2) ?
+ DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
+ ztest_block_tag_t rbt;
+
+ VERIFY(dmu_read(os, lr->lr_foid, offset,
+ sizeof (rbt), &rbt, prefetch) == 0);
+ if (rbt.bt_magic == BT_MAGIC) {
+ ztest_bt_verify(&rbt, os, lr->lr_foid,
+ offset, gen, txg, crtxg);
+ }
+ }
+
+ /*
+ * Writes can appear to be newer than the bonus buffer because
+ * the ztest_get_data() callback does a dmu_read() of the
+ * open-context data, which may be different than the data
+ * as it was when the write was generated.
+ */
+ if (zd->zd_zilog->zl_replay) {
+ ztest_bt_verify(bt, os, lr->lr_foid, offset,
+ MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
+ bt->bt_crtxg);
+ }
+
+ /*
+ * Set the bt's gen/txg to the bonus buffer's gen/txg
+ * so that all of the usual ASSERTs will work.
+ */
+ ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
+ }
+
+ if (abuf == NULL) {
+ dmu_write(os, lr->lr_foid, offset, length, data, tx);
+ } else {
+ bcopy(data, abuf->b_data, length);
+ dmu_assign_arcbuf(db, offset, abuf, tx);
+ }
+
+ (void) ztest_log_write(zd, tx, lr);
+
+ dmu_buf_rele(db, FTAG);
+
+ dmu_tx_commit(tx);
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
+ lr->lr_length, tx) == 0);
+
+ (void) ztest_log_truncate(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ uint64_t txg, lrtxg, crtxg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, lr->lr_foid);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
+ if (zd->zd_zilog->zl_replay) {
+ ASSERT(lr->lr_size != 0);
+ ASSERT(lr->lr_mode != 0);
+ ASSERT(lrtxg != 0);
+ } else {
+ /*
+ * Randomly change the size and increment the generation.
+ */
+ lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
+ sizeof (*bbt);
+ lr->lr_mode = bbt->bt_gen + 1;
+ ASSERT(lrtxg == 0);
+ }
+
+ /*
+ * Verify that the current bonus buffer is not newer than our txg.
+ */
+ ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
+ MAX(txg, lrtxg), crtxg);
+
+ dmu_buf_will_dirty(db, tx);
+
+ ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
+ ASSERT3U(lr->lr_size, <=, db->db_size);
+ VERIFY3U(dmu_set_bonus(db, lr->lr_size, tx), ==, 0);
+ bbt = ztest_bt_bonus(db);
+
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
+
+ dmu_buf_rele(db, FTAG);
+
+ (void) ztest_log_setattr(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+zil_replay_func_t ztest_replay_vector[TX_MAX_TYPE] = {
+ NULL, /* 0 no such transaction type */
+ (zil_replay_func_t)ztest_replay_create, /* TX_CREATE */
+ NULL, /* TX_MKDIR */
+ NULL, /* TX_MKXATTR */
+ NULL, /* TX_SYMLINK */
+ (zil_replay_func_t)ztest_replay_remove, /* TX_REMOVE */
+ NULL, /* TX_RMDIR */
+ NULL, /* TX_LINK */
+ NULL, /* TX_RENAME */
+ (zil_replay_func_t)ztest_replay_write, /* TX_WRITE */
+ (zil_replay_func_t)ztest_replay_truncate, /* TX_TRUNCATE */
+ (zil_replay_func_t)ztest_replay_setattr, /* TX_SETATTR */
+ NULL, /* TX_ACL */
+ NULL, /* TX_CREATE_ACL */
+ NULL, /* TX_CREATE_ATTR */
+ NULL, /* TX_CREATE_ACL_ATTR */
+ NULL, /* TX_MKDIR_ACL */
+ NULL, /* TX_MKDIR_ATTR */
+ NULL, /* TX_MKDIR_ACL_ATTR */
+ NULL, /* TX_WRITE2 */
+};
+
+/*
+ * ZIL get_data callbacks
+ */
+
+static void
+ztest_get_done(zgd_t *zgd, int error)
+{
+ ztest_ds_t *zd = zgd->zgd_private;
+ uint64_t object = zgd->zgd_rl->rl_object;
+
+ if (zgd->zgd_db)
+ dmu_buf_rele(zgd->zgd_db, zgd);
+
+ ztest_range_unlock(zgd->zgd_rl);
+ ztest_object_unlock(zd, object);
+
+ if (error == 0 && zgd->zgd_bp)
+ zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
+
+ umem_free(zgd, sizeof (*zgd));
+}
+
+static int
+ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
+{
+ ztest_ds_t *zd = arg;
+ objset_t *os = zd->zd_os;
+ uint64_t object = lr->lr_foid;
+ uint64_t offset = lr->lr_offset;
+ uint64_t size = lr->lr_length;
+ blkptr_t *bp = &lr->lr_blkptr;
+ uint64_t txg = lr->lr_common.lrc_txg;
+ uint64_t crtxg;
+ dmu_object_info_t doi;
+ dmu_buf_t *db;
+ zgd_t *zgd;
+ int error;
+
+ ztest_object_lock(zd, object, RL_READER);
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error) {
+ ztest_object_unlock(zd, object);
+ return (error);
+ }
+
+ crtxg = ztest_bt_bonus(db)->bt_crtxg;
+
+ if (crtxg == 0 || crtxg > txg) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, object);
+ return (ENOENT);
+ }
+
+ dmu_object_info_from_db(db, &doi);
+ dmu_buf_rele(db, FTAG);
+ db = NULL;
+
+ zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
+ zgd->zgd_zilog = zd->zd_zilog;
+ zgd->zgd_private = zd;
+
+ if (buf != NULL) { /* immediate write */
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_read(os, object, offset, size, buf,
+ DMU_READ_NO_PREFETCH);
+ ASSERT(error == 0);
+ } else {
+ size = doi.doi_data_block_size;
+ if (ISP2(size)) {
+ offset = P2ALIGN(offset, size);
+ } else {
+ ASSERT(offset < size);
+ offset = 0;
+ }
+
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_buf_hold(os, object, offset, zgd, &db,
+ DMU_READ_NO_PREFETCH);
+
+ if (error == 0) {
+ zgd->zgd_db = db;
+ zgd->zgd_bp = bp;
+
+ ASSERT(db->db_offset == offset);
+ ASSERT(db->db_size == size);
+
+ error = dmu_sync(zio, lr->lr_common.lrc_txg,
+ ztest_get_done, zgd);
+
+ if (error == 0)
+ return (0);
+ }
+ }
+
+ ztest_get_done(zgd, error);
+
+ return (error);
+}
+
+static void *
+ztest_lr_alloc(size_t lrsize, char *name)
+{
+ char *lr;
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
+
+ if (name)
+ bcopy(name, lr + lrsize, namesize);
+
+ return (lr);
+}
+
+void
+ztest_lr_free(void *lr, size_t lrsize, char *name)
+{
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ umem_free(lr, lrsize + namesize);
+}
+
+/*
+ * Lookup a bunch of objects. Returns the number of objects not found.
+ */
+static int
+ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+ int i;
+
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
+
+ for (i = 0; i < count; i++, od++) {
+ od->od_object = 0;
+ error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
+ sizeof (uint64_t), 1, &od->od_object);
+ if (error) {
+ ASSERT(error == ENOENT);
+ ASSERT(od->od_object == 0);
+ missing++;
+ } else {
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ dmu_object_info_t doi;
+
+ ASSERT(od->od_object != 0);
+ ASSERT(missing == 0); /* there should be no gaps */
+
+ ztest_object_lock(zd, od->od_object, RL_READER);
+ VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
+ od->od_object, FTAG, &db));
+ dmu_object_info_from_db(db, &doi);
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ od->od_type = doi.doi_type;
+ od->od_blocksize = doi.doi_data_block_size;
+ od->od_gen = bbt->bt_gen;
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, od->od_object);
+ }
+ }
+
+ return (missing);
+}
+
+static int
+ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int i;
+
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
+
+ for (i = 0; i < count; i++, od++) {
+ if (missing) {
+ od->od_object = 0;
+ missing++;
+ continue;
+ }
+
+ lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+ lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */
+ lr->lrz_type = od->od_crtype;
+ lr->lrz_blocksize = od->od_crblocksize;
+ lr->lrz_ibshift = ztest_random_ibshift();
+ lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
+ lr->lrz_bonuslen = dmu_bonus_max();
+ lr->lr_gen = od->od_crgen;
+ lr->lr_crtime[0] = time(NULL);
+
+ if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
+ ASSERT(missing == 0);
+ od->od_object = 0;
+ missing++;
+ } else {
+ od->od_object = lr->lr_foid;
+ od->od_type = od->od_crtype;
+ od->od_blocksize = od->od_crblocksize;
+ od->od_gen = od->od_crgen;
+ ASSERT(od->od_object != 0);
+ }
+
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+ int i;
+
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
+
+ od += count - 1;
+
+ for (i = count - 1; i >= 0; i--, od--) {
+ if (missing) {
+ missing++;
+ continue;
+ }
+
+ if (od->od_object == 0)
+ continue;
+
+ lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+
+ if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
+ ASSERT3U(error, ==, ENOSPC);
+ missing++;
+ } else {
+ od->od_object = 0;
+ }
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
+ void *data)
+{
+ lr_write_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+ lr->lr_blkoff = 0;
+ BP_ZERO(&lr->lr_blkptr);
+
+ bcopy(data, lr + 1, size);
+
+ error = ztest_replay_write(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr) + size, NULL);
+
+ return (error);
+}
+
+static int
+ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ lr_truncate_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+
+ error = ztest_replay_truncate(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static int
+ztest_setattr(ztest_ds_t *zd, uint64_t object)
+{
+ lr_setattr_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_size = 0;
+ lr->lr_mode = 0;
+
+ error = ztest_replay_setattr(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static void
+ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ txg_wait_synced(dmu_objset_pool(os), 0);
+
+ ztest_object_lock(zd, object, RL_READER);
+ rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_write(tx, object, offset, size);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+
+ if (txg != 0) {
+ dmu_prealloc(os, object, offset, size, tx);
+ dmu_tx_commit(tx);
+ txg_wait_synced(dmu_objset_pool(os), txg);
+ } else {
+ (void) dmu_free_long_range(os, object, offset, size);
+ }
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, object);
+}
+
+static void
+ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
+{
+ ztest_block_tag_t wbt;
+ dmu_object_info_t doi;
+ enum ztest_io_type io_type;
+ uint64_t blocksize;
+ void *data;
+
+ VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
+ blocksize = doi.doi_data_block_size;
+ data = umem_alloc(blocksize, UMEM_NOFAIL);
+
+ /*
+ * Pick an i/o type at random, biased toward writing block tags.
+ */
+ io_type = ztest_random(ZTEST_IO_TYPES);
+ if (ztest_random(2) == 0)
+ io_type = ZTEST_IO_WRITE_TAG;
+
+ (void) rw_enter(&zd->zd_zilog_lock, RW_READER);
+
+ switch (io_type) {
+
+ case ZTEST_IO_WRITE_TAG:
+ ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
+ (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
+ break;
+
+ case ZTEST_IO_WRITE_PATTERN:
+ (void) memset(data, 'a' + (object + offset) % 5, blocksize);
+ if (ztest_random(2) == 0) {
+ /*
+ * Induce fletcher2 collisions to ensure that
+ * zio_ddt_collision() detects and resolves them
+ * when using fletcher2-verify for deduplication.
+ */
+ ((uint64_t *)data)[0] ^= 1ULL << 63;
+ ((uint64_t *)data)[4] ^= 1ULL << 63;
+ }
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_WRITE_ZEROES:
+ bzero(data, blocksize);
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_TRUNCATE:
+ (void) ztest_truncate(zd, object, offset, blocksize);
+ break;
+
+ case ZTEST_IO_SETATTR:
+ (void) ztest_setattr(zd, object);
+ break;
+ default:
+ break;
+ }
+
+ (void) rw_exit(&zd->zd_zilog_lock);
+
+ umem_free(data, blocksize);
+}
+
+/*
+ * Initialize an object description template.
+ */
+static void
+ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
+ dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
+{
+ od->od_dir = ZTEST_DIROBJ;
+ od->od_object = 0;
+
+ od->od_crtype = type;
+ od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
+ od->od_crgen = gen;
+
+ od->od_type = DMU_OT_NONE;
+ od->od_blocksize = 0;
+ od->od_gen = 0;
+
+ (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
+ tag, (longlong_t)id, (u_longlong_t)index);
+}
+
+/*
+ * Lookup or create the objects for a test using the od template.
+ * If the objects do not all exist, or if 'remove' is specified,
+ * remove any existing objects and create new ones. Otherwise,
+ * use the existing objects.
+ */
+static int
+ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
+{
+ int count = size / sizeof (*od);
+ int rv = 0;
+
+ mutex_enter(&zd->zd_dirobj_lock);
+ if ((ztest_lookup(zd, od, count) != 0 || remove) &&
+ (ztest_remove(zd, od, count) != 0 ||
+ ztest_create(zd, od, count) != 0))
+ rv = -1;
+ zd->zd_od = od;
+ mutex_exit(&zd->zd_dirobj_lock);
+
+ return (rv);
+}
+
+/* ARGSUSED */
+void
+ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
+{
+ zilog_t *zilog = zd->zd_zilog;
+
+ (void) rw_enter(&zd->zd_zilog_lock, RW_READER);
+
+ zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
+
+ /*
+ * Remember the committed values in zd, which is in parent/child
+ * shared memory. If we die, the next iteration of ztest_run()
+ * will verify that the log really does contain this record.
+ */
+ mutex_enter(&zilog->zl_lock);
+ ASSERT(zd->zd_shared != NULL);
+ ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
+ zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
+ mutex_exit(&zilog->zl_lock);
+
+ (void) rw_exit(&zd->zd_zilog_lock);
+}
+
+/*
+ * This function is designed to simulate the operations that occur during a
+ * mount/unmount operation. We hold the dataset across these operations in an
+ * attempt to expose any implicit assumptions about ZIL management.
+ */
+/* ARGSUSED */
+void
+ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
+{
+ objset_t *os = zd->zd_os;
+
+ mutex_enter(&zd->zd_dirobj_lock);
+ (void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
+
+ /* zfs_sb_teardown() */
+ zil_close(zd->zd_zilog);
+
+ /* zfsvfs_setup() */
+ VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
+ zil_replay(os, zd, ztest_replay_vector);
+
+ (void) rw_exit(&zd->zd_zilog_lock);
+ mutex_exit(&zd->zd_dirobj_lock);
+}
+
+/*
+ * Verify that we can't destroy an active pool, create an existing pool,
+ * or create a pool with a bad vdev spec.
+ */
+/* ARGSUSED */
+void
+ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_opts_t *zo = &ztest_opts;
+ spa_t *spa;
+ nvlist_t *nvroot;
+
+ /*
+ * Attempt to create using a bad file.
+ */
+ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
+ nvlist_free(nvroot);
+
+ /*
+ * Attempt to create using a bad mirror.
+ */
+ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
+ nvlist_free(nvroot);
+
+ /*
+ * Attempt to create an existing pool. It shouldn't matter
+ * what's in the nvroot; we should fail with EEXIST.
+ */
+ (void) rw_enter(&ztest_name_lock, RW_READER);
+ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
+ VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
+ nvlist_free(nvroot);
+ VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
+ VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
+ spa_close(spa, FTAG);
+
+ (void) rw_exit(&ztest_name_lock);
+}
+
+/* ARGSUSED */
+void
+ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
+{
+ spa_t *spa;
+ uint64_t initial_version = SPA_VERSION_INITIAL;
+ uint64_t version, newversion;
+ nvlist_t *nvroot, *props;
+ char *name;
+
+ mutex_enter(&ztest_vdev_lock);
+ name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
+
+ /*
+ * Clean up from previous runs.
+ */
+ (void) spa_destroy(name);
+
+ nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
+ 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
+
+ /*
+ * If we're configuring a RAIDZ device then make sure that the
+ * the initial version is capable of supporting that feature.
+ */
+ switch (ztest_opts.zo_raidz_parity) {
+ case 0:
+ case 1:
+ initial_version = SPA_VERSION_INITIAL;
+ break;
+ case 2:
+ initial_version = SPA_VERSION_RAIDZ2;
+ break;
+ case 3:
+ initial_version = SPA_VERSION_RAIDZ3;
+ break;
+ }
+
+ /*
+ * Create a pool with a spa version that can be upgraded. Pick
+ * a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
+ */
+ do {
+ version = ztest_random_spa_version(initial_version);
+ } while (version > SPA_VERSION_BEFORE_FEATURES);
+
+ props = fnvlist_alloc();
+ fnvlist_add_uint64(props,
+ zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
+ VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
+ fnvlist_free(nvroot);
+ fnvlist_free(props);
+
+ VERIFY3S(spa_open(name, &spa, FTAG), ==, 0);
+ VERIFY3U(spa_version(spa), ==, version);
+ newversion = ztest_random_spa_version(version + 1);
+
+ if (ztest_opts.zo_verbose >= 4) {
+ (void) printf("upgrading spa version from %llu to %llu\n",
+ (u_longlong_t)version, (u_longlong_t)newversion);
+ }
+
+ spa_upgrade(spa, newversion);
+ VERIFY3U(spa_version(spa), >, version);
+ VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
+ zpool_prop_to_name(ZPOOL_PROP_VERSION)));
+ spa_close(spa, FTAG);
+
+ strfree(name);
+ mutex_exit(&ztest_vdev_lock);
+}
+
+static vdev_t *
+vdev_lookup_by_path(vdev_t *vd, const char *path)
+{
+ vdev_t *mvd;
+ int c;
+
+ if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
+ return (vd);
+
+ for (c = 0; c < vd->vdev_children; c++)
+ if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
+ NULL)
+ return (mvd);
+
+ return (NULL);
+}
+
+/*
+ * Find the first available hole which can be used as a top-level.
+ */
+int
+find_vdev_hole(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ int c;
+
+ ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
+
+ for (c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *cvd = rvd->vdev_child[c];
+
+ if (cvd->vdev_ishole)
+ break;
+ }
+ return (c);
+}
+
+/*
+ * Verify that vdev_add() works as expected.
+ */
+/* ARGSUSED */
+void
+ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = ztest_spa;
+ uint64_t leaves;
+ uint64_t guid;
+ nvlist_t *nvroot;
+ int error;
+
+ mutex_enter(&ztest_vdev_lock);
+ leaves =
+ MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+ ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
+
+ /*
+ * If we have slogs then remove them 1/4 of the time.
+ */
+ if (spa_has_slogs(spa) && ztest_random(4) == 0) {
+ /*
+ * Grab the guid from the head of the log class rotor.
+ */
+ guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
+
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ /*
+ * We have to grab the zs_name_lock as writer to
+ * prevent a race between removing a slog (dmu_objset_find)
+ * and destroying a dataset. Removing the slog will
+ * grab a reference on the dataset which may cause
+ * dmu_objset_destroy() to fail with EBUSY thus
+ * leaving the dataset in an inconsistent state.
+ */
+ rw_enter(&ztest_name_lock, RW_WRITER);
+ error = spa_vdev_remove(spa, guid, B_FALSE);
+ rw_exit(&ztest_name_lock);
+
+ if (error && error != EEXIST)
+ fatal(0, "spa_vdev_remove() = %d", error);
+ } else {
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ /*
+ * Make 1/4 of the devices be log devices.
+ */
+ nvroot = make_vdev_root(NULL, NULL, NULL,
+ ztest_opts.zo_vdev_size, 0,
+ ztest_random(4) == 0, ztest_opts.zo_raidz,
+ zs->zs_mirrors, 1);
+
+ error = spa_vdev_add(spa, nvroot);
+ nvlist_free(nvroot);
+
+ if (error == ENOSPC)
+ ztest_record_enospc("spa_vdev_add");
+ else if (error != 0)
+ fatal(0, "spa_vdev_add() = %d", error);
+ }
+
+ mutex_exit(&ztest_vdev_lock);
+}
+
+/*
+ * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
+ */
+/* ARGSUSED */