+
+ kmem_free(vd, gcount * sizeof (vdev_t *));
+}
+
+static int
+spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
+ boolean_t mosconfig)
+{
+ nvlist_t *config = spa->spa_config;
+ char *ereport = FM_EREPORT_ZFS_POOL;
+ char *comment;
+ int error;
+ uint64_t pool_guid;
+ nvlist_t *nvl;
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
+ return (EINVAL);
+
+ ASSERT(spa->spa_comment == NULL);
+ if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
+ spa->spa_comment = spa_strdup(comment);
+
+ /*
+ * Versioning wasn't explicitly added to the label until later, so if
+ * it's not present treat it as the initial version.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+ &spa->spa_ubsync.ub_version) != 0)
+ spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
+
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+ &spa->spa_config_txg);
+
+ if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
+ spa_guid_exists(pool_guid, 0)) {
+ error = EEXIST;
+ } else {
+ spa->spa_config_guid = pool_guid;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
+ &nvl) == 0) {
+ VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
+ KM_PUSHPAGE) == 0);
+ }
+
+ nvlist_free(spa->spa_load_info);
+ spa->spa_load_info = fnvlist_alloc();
+
+ gethrestime(&spa->spa_loaded_ts);
+ error = spa_load_impl(spa, pool_guid, config, state, type,
+ mosconfig, &ereport);
+ }
+
+ spa->spa_minref = refcount_count(&spa->spa_refcount);
+ if (error) {
+ if (error != EEXIST) {
+ spa->spa_loaded_ts.tv_sec = 0;
+ spa->spa_loaded_ts.tv_nsec = 0;
+ }
+ if (error != EBADF) {
+ zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
+ }
+ }
+ spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
+ spa->spa_ena = 0;
+
+ return (error);
+}
+
+/*
+ * Load an existing storage pool, using the pool's builtin spa_config as a
+ * source of configuration information.
+ */
+__attribute__((always_inline))
+static inline int
+spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
+ spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
+ char **ereport)
+{
+ int error = 0;
+ nvlist_t *nvroot = NULL;
+ nvlist_t *label;
+ vdev_t *rvd;
+ uberblock_t *ub = &spa->spa_uberblock;
+ uint64_t children, config_cache_txg = spa->spa_config_txg;
+ int orig_mode = spa->spa_mode;
+ int parse;
+ uint64_t obj;
+ boolean_t missing_feat_write = B_FALSE;
+
+ /*
+ * If this is an untrusted config, access the pool in read-only mode.
+ * This prevents things like resilvering recently removed devices.
+ */
+ if (!mosconfig)
+ spa->spa_mode = FREAD;
+
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+ spa->spa_load_state = state;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
+ return (EINVAL);
+
+ parse = (type == SPA_IMPORT_EXISTING ?
+ VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
+
+ /*
+ * Create "The Godfather" zio to hold all async IOs
+ */
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
+
+ /*
+ * Parse the configuration into a vdev tree. We explicitly set the
+ * value that will be returned by spa_version() since parsing the
+ * configuration requires knowing the version number.
+ */
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ if (error != 0)
+ return (error);
+
+ ASSERT(spa->spa_root_vdev == rvd);
+
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ ASSERT(spa_guid(spa) == pool_guid);
+ }
+
+ /*
+ * Try to open all vdevs, loading each label in the process.
+ */
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ error = vdev_open(rvd);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ if (error != 0)
+ return (error);
+
+ /*
+ * We need to validate the vdev labels against the configuration that
+ * we have in hand, which is dependent on the setting of mosconfig. If
+ * mosconfig is true then we're validating the vdev labels based on
+ * that config. Otherwise, we're validating against the cached config
+ * (zpool.cache) that was read when we loaded the zfs module, and then
+ * later we will recursively call spa_load() and validate against
+ * the vdev config.
+ *
+ * If we're assembling a new pool that's been split off from an
+ * existing pool, the labels haven't yet been updated so we skip
+ * validation for now.
+ */
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ error = vdev_validate(rvd, mosconfig);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ if (error != 0)
+ return (error);
+
+ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
+ return (ENXIO);
+ }
+
+ /*
+ * Find the best uberblock.
+ */
+ vdev_uberblock_load(rvd, ub, &label);
+
+ /*
+ * If we weren't able to find a single valid uberblock, return failure.
+ */
+ if (ub->ub_txg == 0) {
+ nvlist_free(label);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
+ }
+
+ /*
+ * If the pool has an unsupported version we can't open it.
+ */
+ if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
+ nvlist_free(label);
+ return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
+ }
+
+ if (ub->ub_version >= SPA_VERSION_FEATURES) {
+ nvlist_t *features;
+
+ /*
+ * If we weren't able to find what's necessary for reading the
+ * MOS in the label, return failure.
+ */
+ if (label == NULL || nvlist_lookup_nvlist(label,
+ ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
+ nvlist_free(label);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+ ENXIO));
+ }
+
+ /*
+ * Update our in-core representation with the definitive values
+ * from the label.
+ */
+ nvlist_free(spa->spa_label_features);
+ VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+ }
+
+ nvlist_free(label);
+
+ /*
+ * Look through entries in the label nvlist's features_for_read. If
+ * there is a feature listed there which we don't understand then we
+ * cannot open a pool.
+ */
+ if (ub->ub_version >= SPA_VERSION_FEATURES) {
+ nvlist_t *unsup_feat;
+ nvpair_t *nvp;
+
+ VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
+ 0);
+
+ for (nvp = nvlist_next_nvpair(spa->spa_label_features, NULL);
+ nvp != NULL;
+ nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
+ if (!zfeature_is_supported(nvpair_name(nvp))) {
+ VERIFY(nvlist_add_string(unsup_feat,
+ nvpair_name(nvp), "") == 0);
+ }
+ }
+
+ if (!nvlist_empty(unsup_feat)) {
+ VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+ nvlist_free(unsup_feat);
+ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+ ENOTSUP));
+ }
+
+ nvlist_free(unsup_feat);
+ }
+
+ /*
+ * If the vdev guid sum doesn't match the uberblock, we have an
+ * incomplete configuration. We first check to see if the pool
+ * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
+ * If it is, defer the vdev_guid_sum check till later so we
+ * can handle missing vdevs.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+ &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
+ rvd->vdev_guid_sum != ub->ub_guid_sum)
+ return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
+
+ if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_try_repair(spa, config);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ nvlist_free(spa->spa_config_splitting);
+ spa->spa_config_splitting = NULL;
+ }
+
+ /*
+ * Initialize internal SPA structures.
+ */
+ spa->spa_state = POOL_STATE_ACTIVE;
+ spa->spa_ubsync = spa->spa_uberblock;
+ spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
+ TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
+ spa->spa_first_txg = spa->spa_last_ubsync_txg ?
+ spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
+ spa->spa_claim_max_txg = spa->spa_first_txg;
+ spa->spa_prev_software_version = ub->ub_software_version;
+
+ error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
+ if (error)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
+
+ if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ if (spa_version(spa) >= SPA_VERSION_FEATURES) {
+ boolean_t missing_feat_read = B_FALSE;
+ nvlist_t *unsup_feat, *enabled_feat;
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
+ &spa->spa_feat_for_read_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
+ &spa->spa_feat_for_write_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
+ &spa->spa_feat_desc_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ enabled_feat = fnvlist_alloc();
+ unsup_feat = fnvlist_alloc();
+
+ if (!feature_is_supported(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
+ unsup_feat, enabled_feat))
+ missing_feat_read = B_TRUE;
+
+ if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
+ if (!feature_is_supported(spa->spa_meta_objset,
+ spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
+ unsup_feat, enabled_feat)) {
+ missing_feat_write = B_TRUE;
+ }
+ }
+
+ fnvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat);
+
+ if (!nvlist_empty(unsup_feat)) {
+ fnvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
+ }
+
+ fnvlist_free(enabled_feat);
+ fnvlist_free(unsup_feat);
+
+ if (!missing_feat_read) {
+ fnvlist_add_boolean(spa->spa_load_info,
+ ZPOOL_CONFIG_CAN_RDONLY);
+ }
+
+ /*
+ * If the state is SPA_LOAD_TRYIMPORT, our objective is
+ * twofold: to determine whether the pool is available for
+ * import in read-write mode and (if it is not) whether the
+ * pool is available for import in read-only mode. If the pool
+ * is available for import in read-write mode, it is displayed
+ * as available in userland; if it is not available for import
+ * in read-only mode, it is displayed as unavailable in
+ * userland. If the pool is available for import in read-only
+ * mode but not read-write mode, it is displayed as unavailable
+ * in userland with a special note that the pool is actually
+ * available for open in read-only mode.
+ *
+ * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
+ * missing a feature for write, we must first determine whether
+ * the pool can be opened read-only before returning to
+ * userland in order to know whether to display the
+ * abovementioned note.
+ */
+ if (missing_feat_read || (missing_feat_write &&
+ spa_writeable(spa))) {
+ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+ ENOTSUP));
+ }
+ }
+
+ spa->spa_is_initializing = B_TRUE;
+ error = dsl_pool_open(spa->spa_dsl_pool);
+ spa->spa_is_initializing = B_FALSE;
+ if (error != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ if (!mosconfig) {
+ uint64_t hostid;
+ nvlist_t *policy = NULL, *nvconfig;
+
+ if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
+ ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+ char *hostname;
+ unsigned long myhostid = 0;
+
+ VERIFY(nvlist_lookup_string(nvconfig,
+ ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
+
+#ifdef _KERNEL
+ myhostid = zone_get_hostid(NULL);
+#else /* _KERNEL */
+ /*
+ * We're emulating the system's hostid in userland, so
+ * we can't use zone_get_hostid().
+ */
+ (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
+#endif /* _KERNEL */
+ if (hostid != 0 && myhostid != 0 &&
+ hostid != myhostid) {
+ nvlist_free(nvconfig);
+ cmn_err(CE_WARN, "pool '%s' could not be "
+ "loaded as it was last accessed by "
+ "another system (host: %s hostid: 0x%lx). "
+ "See: http://zfsonlinux.org/msg/ZFS-8000-EY",
+ spa_name(spa), hostname,
+ (unsigned long)hostid);
+ return (EBADF);
+ }
+ }
+ if (nvlist_lookup_nvlist(spa->spa_config,
+ ZPOOL_REWIND_POLICY, &policy) == 0)
+ VERIFY(nvlist_add_nvlist(nvconfig,
+ ZPOOL_REWIND_POLICY, policy) == 0);
+
+ spa_config_set(spa, nvconfig);
+ spa_unload(spa);
+ spa_deactivate(spa);
+ spa_activate(spa, orig_mode);
+
+ return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
+ }
+
+ if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
+ if (error != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ /*
+ * Load the bit that tells us to use the new accounting function
+ * (raid-z deflation). If we have an older pool, this will not
+ * be present.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
+ &spa->spa_creation_version);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ /*
+ * Load the persistent error log. If we have an older pool, this will
+ * not be present.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
+ &spa->spa_errlog_scrub);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ /*
+ * Load the history object. If we have an older pool, this
+ * will not be present.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ /*
+ * If we're assembling the pool from the split-off vdevs of
+ * an existing pool, we don't want to attach the spares & cache
+ * devices.
+ */
+
+ /*
+ * Load any hot spares for this pool.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {