#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/zfeature.h>
+#include <sys/zvol.h>
#ifdef _KERNEL
#include <sys/bootprops.h>
#include "zfs_comutil.h"
typedef enum zti_modes {
- zti_mode_fixed, /* value is # of threads (min 1) */
- zti_mode_online_percent, /* value is % of online CPUs */
- zti_mode_batch, /* cpu-intensive; value is ignored */
- zti_mode_null, /* don't create a taskq */
- zti_nmodes
+ ZTI_MODE_FIXED, /* value is # of threads (min 1) */
+ ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
+ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
+ ZTI_MODE_NULL, /* don't create a taskq */
+ ZTI_NMODES
} zti_modes_t;
-#define ZTI_FIX(n) { zti_mode_fixed, (n) }
-#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
-#define ZTI_BATCH { zti_mode_batch, 0 }
-#define ZTI_NULL { zti_mode_null, 0 }
+#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
+#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
+#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
+#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
-#define ZTI_ONE ZTI_FIX(1)
+#define ZTI_N(n) ZTI_P(n, 1)
+#define ZTI_ONE ZTI_N(1)
typedef struct zio_taskq_info {
- enum zti_modes zti_mode;
+ zti_modes_t zti_mode;
uint_t zti_value;
+ uint_t zti_count;
} zio_taskq_info_t;
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
};
/*
- * Define the taskq threads for the following I/O types:
- * NULL, READ, WRITE, FREE, CLAIM, and IOCTL
+ * This table defines the taskq settings for each ZFS I/O type. When
+ * initializing a pool, we use this table to create an appropriately sized
+ * taskq. Some operations are low volume and therefore have a small, static
+ * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
+ * macros. Other operations process a large amount of data; the ZTI_BATCH
+ * macro causes us to create a taskq oriented for throughput. Some operations
+ * are so high frequency and short-lived that the taskq itself can become a a
+ * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
+ * additional degree of parallelism specified by the number of threads per-
+ * taskq and the number of taskqs; when dispatching an event in this case, the
+ * particular taskq is chosen at random.
+ *
+ * The different taskq priorities are to handle the different contexts (issue
+ * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
+ * need to be handled with minimum delay.
*/
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
- { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(16), ZTI_FIX(5) },
- { ZTI_PCT(100), ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
+ { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
+ { ZTI_BATCH, ZTI_N(5), ZTI_N(16), ZTI_N(5) }, /* WRITE */
+ { ZTI_P(4, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
+static dsl_checkfunc_t spa_change_guid_check;
+static dsl_syncfunc_t spa_change_guid_sync;
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
}
}
+/*ARGSUSED*/
+static int
+spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ vdev_t *rvd = spa->spa_root_vdev;
+ uint64_t vdev_state;
+ ASSERTV(uint64_t *newguid = arg2);
+
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
+ vdev_state = rvd->vdev_state;
+ spa_config_exit(spa, SCL_STATE, FTAG);
+
+ if (vdev_state != VDEV_STATE_HEALTHY)
+ return (ENXIO);
+
+ ASSERT3U(spa_guid(spa), !=, *newguid);
+
+ return (0);
+}
+
+static void
+spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ uint64_t *newguid = arg2;
+ uint64_t oldguid;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ oldguid = spa_guid(spa);
+
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
+ rvd->vdev_guid = *newguid;
+ rvd->vdev_guid_sum += (*newguid - oldguid);
+ vdev_config_dirty(rvd);
+ spa_config_exit(spa, SCL_STATE, FTAG);
+
+ spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx,
+ "old=%lld new=%lld", oldguid, *newguid);
+}
+
/*
* Change the GUID for the pool. This is done so that we can later
* re-import a pool built from a clone of our own vdevs. We will modify
int
spa_change_guid(spa_t *spa)
{
- uint64_t oldguid, newguid;
- uint64_t txg;
-
- if (!(spa_mode_global & FWRITE))
- return (EROFS);
-
- txg = spa_vdev_enter(spa);
-
- if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
- return (spa_vdev_exit(spa, NULL, txg, ENXIO));
+ int error;
+ uint64_t guid;
- oldguid = spa_guid(spa);
- newguid = spa_generate_guid(NULL);
- ASSERT3U(oldguid, !=, newguid);
+ mutex_enter(&spa_namespace_lock);
+ guid = spa_generate_guid(NULL);
- spa->spa_root_vdev->vdev_guid = newguid;
- spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
+ error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
+ spa_change_guid_sync, spa, &guid, 5);
- vdev_config_dirty(spa->spa_root_vdev);
+ if (error == 0) {
+ spa_config_sync(spa, B_FALSE, B_TRUE);
+ spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
+ }
- spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
+ mutex_exit(&spa_namespace_lock);
- return (spa_vdev_exit(spa, NULL, txg, 0));
+ return (error);
}
/*
offsetof(spa_error_entry_t, se_avl));
}
-static taskq_t *
-spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
- uint_t value)
+static void
+spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
{
- uint_t flags = TASKQ_PREPOPULATE;
+ const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
+ enum zti_modes mode = ztip->zti_mode;
+ uint_t value = ztip->zti_value;
+ uint_t count = ztip->zti_count;
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ char name[32];
+ uint_t i, flags = 0;
boolean_t batch = B_FALSE;
- switch (mode) {
- case zti_mode_null:
- return (NULL); /* no taskq needed */
+ if (mode == ZTI_MODE_NULL) {
+ tqs->stqs_count = 0;
+ tqs->stqs_taskq = NULL;
+ return;
+ }
- case zti_mode_fixed:
- ASSERT3U(value, >=, 1);
- value = MAX(value, 1);
- break;
+ ASSERT3U(count, >, 0);
- case zti_mode_batch:
- batch = B_TRUE;
- flags |= TASKQ_THREADS_CPU_PCT;
- value = zio_taskq_batch_pct;
- break;
+ tqs->stqs_count = count;
+ tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
- case zti_mode_online_percent:
- flags |= TASKQ_THREADS_CPU_PCT;
- break;
+ for (i = 0; i < count; i++) {
+ taskq_t *tq;
- default:
- panic("unrecognized mode for %s taskq (%u:%u) in "
- "spa_activate()",
- name, mode, value);
- break;
+ switch (mode) {
+ case ZTI_MODE_FIXED:
+ ASSERT3U(value, >=, 1);
+ value = MAX(value, 1);
+ break;
+
+ case ZTI_MODE_BATCH:
+ batch = B_TRUE;
+ flags |= TASKQ_THREADS_CPU_PCT;
+ value = zio_taskq_batch_pct;
+ break;
+
+ case ZTI_MODE_ONLINE_PERCENT:
+ flags |= TASKQ_THREADS_CPU_PCT;
+ break;
+
+ default:
+ panic("unrecognized mode for %s_%s taskq (%u:%u) in "
+ "spa_activate()",
+ zio_type_name[t], zio_taskq_types[q], mode, value);
+ break;
+ }
+
+ if (count > 1) {
+ (void) snprintf(name, sizeof (name), "%s_%s_%u",
+ zio_type_name[t], zio_taskq_types[q], i);
+ } else {
+ (void) snprintf(name, sizeof (name), "%s_%s",
+ zio_type_name[t], zio_taskq_types[q]);
+ }
+
+ if (zio_taskq_sysdc && spa->spa_proc != &p0) {
+ if (batch)
+ flags |= TASKQ_DC_BATCH;
+
+ tq = taskq_create_sysdc(name, value, 50, INT_MAX,
+ spa->spa_proc, zio_taskq_basedc, flags);
+ } else {
+ tq = taskq_create_proc(name, value, maxclsyspri, 50,
+ INT_MAX, spa->spa_proc, flags);
+ }
+
+ tqs->stqs_taskq[i] = tq;
+ }
+}
+
+static void
+spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
+{
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ uint_t i;
+
+ if (tqs->stqs_taskq == NULL) {
+ ASSERT3U(tqs->stqs_count, ==, 0);
+ return;
+ }
+
+ for (i = 0; i < tqs->stqs_count; i++) {
+ ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
+ taskq_destroy(tqs->stqs_taskq[i]);
}
- if (zio_taskq_sysdc && spa->spa_proc != &p0) {
- if (batch)
- flags |= TASKQ_DC_BATCH;
+ kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
+ tqs->stqs_taskq = NULL;
+}
+
+/*
+ * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
+ * Note that a type may have multiple discrete taskqs to avoid lock contention
+ * on the taskq itself. In that case we choose which taskq at random by using
+ * the low bits of gethrtime().
+ */
+void
+spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
+{
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ taskq_t *tq;
+
+ ASSERT3P(tqs->stqs_taskq, !=, NULL);
+ ASSERT3U(tqs->stqs_count, !=, 0);
+
+ if (tqs->stqs_count == 1) {
+ tq = tqs->stqs_taskq[0];
+ } else {
+ tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
+ }
+
+ taskq_dispatch_ent(tq, func, arg, flags, ent);
+}
+
+/*
+ * Same as spa_taskq_dispatch_ent() but block on the task until completion.
+ */
+void
+spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+ task_func_t *func, void *arg, uint_t flags)
+{
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ taskq_t *tq;
+ taskqid_t id;
+
+ ASSERT3P(tqs->stqs_taskq, !=, NULL);
+ ASSERT3U(tqs->stqs_count, !=, 0);
- return (taskq_create_sysdc(name, value, 50, INT_MAX,
- spa->spa_proc, zio_taskq_basedc, flags));
+ if (tqs->stqs_count == 1) {
+ tq = tqs->stqs_taskq[0];
+ } else {
+ tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
}
- return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
- spa->spa_proc, flags));
+
+ id = taskq_dispatch(tq, func, arg, flags);
+ if (id)
+ taskq_wait_id(tq, id);
}
static void
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
- const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
- enum zti_modes mode = ztip->zti_mode;
- uint_t value = ztip->zti_value;
- char name[32];
-
- (void) snprintf(name, sizeof (name),
- "%s_%s", zio_type_name[t], zio_taskq_types[q]);
-
- spa->spa_zio_taskq[t][q] =
- spa_taskq_create(spa, name, mode, value);
+ spa_taskqs_init(spa, t, q);
}
}
}
list_destroy(&spa->spa_config_dirty_list);
list_destroy(&spa->spa_state_dirty_list);
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
- if (spa->spa_zio_taskq[t][q] != NULL)
- taskq_destroy(spa->spa_zio_taskq[t][q]);
- spa->spa_zio_taskq[t][q] = NULL;
+ spa_taskqs_fini(spa, t, q);
}
}
/*ARGSUSED*/
static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
- arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
if (bp != NULL) {
zio_t *rio = arg;
if (spa_version(spa) >= SPA_VERSION_FEATURES) {
boolean_t missing_feat_read = B_FALSE;
- nvlist_t *unsup_feat;
+ nvlist_t *unsup_feat, *enabled_feat;
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
&spa->spa_feat_for_read_obj) != 0) {
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
}
- VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
- 0);
+ enabled_feat = fnvlist_alloc();
+ unsup_feat = fnvlist_alloc();
if (!feature_is_supported(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
- unsup_feat))
+ unsup_feat, enabled_feat))
missing_feat_read = B_TRUE;
if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
if (!feature_is_supported(spa->spa_meta_objset,
spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
- unsup_feat))
+ unsup_feat, enabled_feat)) {
missing_feat_write = B_TRUE;
+ }
}
+ fnvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat);
+
if (!nvlist_empty(unsup_feat)) {
- VERIFY(nvlist_add_nvlist(spa->spa_load_info,
- ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+ fnvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
}
- nvlist_free(unsup_feat);
+ fnvlist_free(enabled_feat);
+ fnvlist_free(unsup_feat);
if (!missing_feat_read) {
fnvlist_add_boolean(spa->spa_load_info,
spa_load_state_t state = SPA_LOAD_OPEN;
int error;
int locked = B_FALSE;
+ int firstopen = B_FALSE;
*spapp = NULL;
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
zpool_rewind_policy_t policy;
+ firstopen = B_TRUE;
+
zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
&policy);
if (policy.zrp_request & ZPOOL_DO_REWIND)
mutex_exit(&spa_namespace_lock);
}
+#ifdef _KERNEL
+ if (firstopen)
+ zvol_create_minors(spa->spa_name);
+#endif
+
*spapp = spa;
return (0);
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, LOG_POOL_IMPORT);
+#ifdef _KERNEL
+ zvol_create_minors(pool);
+#endif
+
return (0);
}
* The evacuation succeeded. Remove any remaining MOS metadata
* associated with this vdev, and wait for these changes to sync.
*/
- ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
+ ASSERT0(vd->vdev_stat.vs_alloc);
txg = spa_vdev_config_enter(spa);
vd->vdev_removing = B_TRUE;
vdev_dirty(vd, 0, NULL, txg);
config = spa_config_generate(spa, spa->spa_root_vdev,
dmu_tx_get_txg(tx), B_FALSE);
+ /*
+ * If we're upgrading the spa version then make sure that
+ * the config object gets updated with the correct version.
+ */
+ if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version)
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
+ spa->spa_uberblock.ub_version);
+
spa_config_exit(spa, SCL_STATE, FTAG);
if (spa->spa_config_syncing)
*/
ASSERT(tx->tx_txg != TXG_INITIAL);
- ASSERT(version <= SPA_VERSION);
+ ASSERT(SPA_VERSION_IS_SUPPORTED(version));
ASSERT(version >= spa_version(spa));
spa->spa_uberblock.ub_version = version;
tx = dmu_tx_create_assigned(dp, txg);
+ spa->spa_sync_starttime = gethrtime();
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
+ spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
+ NSEC_TO_TICK(spa->spa_deadman_synctime));
+
/*
* If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
* set spa_deflate if we have no raid-z vdevs.
zio_t *zio = zio_root(spa, NULL, NULL, 0);
VERIFY3U(bpobj_iterate(defer_bpo,
spa_free_sync_cb, zio, tx), ==, 0);
- VERIFY3U(zio_wait(zio), ==, 0);
+ VERIFY0(zio_wait(zio));
}
/*
spa_errlog_sync(spa, txg);
dsl_pool_sync(dp, txg);
- if (pass <= SYNC_PASS_DEFERRED_FREE) {
+ if (pass < zfs_sync_pass_deferred_free) {
zio_t *zio = zio_root(spa, NULL, NULL, 0);
bplist_iterate(free_bpl, spa_free_sync_cb,
zio, tx);
rvd->vdev_children, txg, B_TRUE);
}
+ if (error == 0)
+ spa->spa_last_synced_guid = rvd->vdev_guid;
+
spa_config_exit(spa, SCL_STATE, FTAG);
if (error == 0)
}
dmu_tx_commit(tx);
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ spa->spa_deadman_tqid = 0;
+
/*
* Clear the dirty config list.
*/
* future version would result in an unopenable pool, this shouldn't be
* possible.
*/
- ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
+ ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
ASSERT(version >= spa->spa_uberblock.ub_version);
spa->spa_uberblock.ub_version = version;