Add script for builtin module building.
[zfs.git] / module / zfs / spa.c
index e037f41..40849bc 100644 (file)
@@ -21,6 +21,8 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
  */
 
 /*
@@ -40,6 +42,7 @@
 #include <sys/zil.h>
 #include <sys/ddt.h>
 #include <sys/vdev_impl.h>
+#include <sys/vdev_disk.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
 #include <sys/uberblock_impl.h>
@@ -94,7 +97,7 @@ typedef struct zio_taskq_info {
 } zio_taskq_info_t;
 
 static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
-       "issue", "issue_high", "intr", "intr_high"
+       "iss", "iss_h", "int", "int_h"
 };
 
 /*
@@ -105,8 +108,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
        /* ISSUE        ISSUE_HIGH      INTR            INTR_HIGH */
        { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
        { ZTI_FIX(8),   ZTI_NULL,       ZTI_BATCH,      ZTI_NULL },
-       { ZTI_BATCH,    ZTI_FIX(5),     ZTI_FIX(8),     ZTI_FIX(5) },
-       { ZTI_FIX(100), ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
+       { ZTI_BATCH,    ZTI_FIX(5),     ZTI_FIX(16),    ZTI_FIX(5) },
+       { ZTI_PCT(100), ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
        { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
        { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
 };
@@ -229,7 +232,9 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
        zap_attribute_t za;
        int err;
 
-       VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
+       if (err)
+               return err;
 
        mutex_enter(&spa->spa_props_lock);
 
@@ -241,7 +246,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
        /* If no pool property object, no more prop to get. */
        if (mos == NULL || spa->spa_pool_props_object == 0) {
                mutex_exit(&spa->spa_props_lock);
-               return (0);
+               goto out;
        }
 
        /*
@@ -566,6 +571,43 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
 }
 
 /*
+ * Change the GUID for the pool.  This is done so that we can later
+ * re-import a pool built from a clone of our own vdevs.  We will modify
+ * the root vdev's guid, our own pool guid, and then mark all of our
+ * vdevs dirty.  Note that we must make sure that all our vdevs are
+ * online when we do this, or else any vdevs that weren't present
+ * would be orphaned from our pool.  We are also going to issue a
+ * sysevent to update any watchers.
+ */
+int
+spa_change_guid(spa_t *spa)
+{
+       uint64_t        oldguid, newguid;
+       uint64_t        txg;
+
+       if (!(spa_mode_global & FWRITE))
+               return (EROFS);
+
+       txg = spa_vdev_enter(spa);
+
+       if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
+               return (spa_vdev_exit(spa, NULL, txg, ENXIO));
+
+       oldguid = spa_guid(spa);
+       newguid = spa_generate_guid(NULL);
+       ASSERT3U(oldguid, !=, newguid);
+
+       spa->spa_root_vdev->vdev_guid = newguid;
+       spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
+
+       vdev_config_dirty(spa->spa_root_vdev);
+
+       spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
+
+       return (spa_vdev_exit(spa, NULL, txg, 0));
+}
+
+/*
  * ==========================================================================
  * SPA state manipulation (open/create/destroy/import/export)
  * ==========================================================================
@@ -611,9 +653,8 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
 
 static taskq_t *
 spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
-    uint_t value)
+    uint_t value, uint_t flags)
 {
-       uint_t flags = TASKQ_PREPOPULATE;
        boolean_t batch = B_FALSE;
 
        switch (mode) {
@@ -663,18 +704,22 @@ spa_create_zio_taskqs(spa_t *spa)
                        const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
                        enum zti_modes mode = ztip->zti_mode;
                        uint_t value = ztip->zti_value;
+                       uint_t flags = 0;
                        char name[32];
 
+                       if (t == ZIO_TYPE_WRITE)
+                               flags |= TASKQ_NORECLAIM;
+
                        (void) snprintf(name, sizeof (name),
                            "%s_%s", zio_type_name[t], zio_taskq_types[q]);
 
                        spa->spa_zio_taskq[t][q] =
-                           spa_taskq_create(spa, name, mode, value);
+                           spa_taskq_create(spa, name, mode, value, flags);
                }
        }
 }
 
-#ifdef _KERNEL
+#if defined(_KERNEL) && defined(HAVE_SPA_THREAD)
 static void
 spa_thread(void *arg)
 {
@@ -764,6 +809,7 @@ spa_activate(spa_t *spa, int mode)
        ASSERT(spa->spa_proc == &p0);
        spa->spa_did = 0;
 
+#ifdef HAVE_SPA_THREAD
        /* Only create a process if we're going to be around a while. */
        if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
                if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
@@ -784,6 +830,7 @@ spa_activate(spa_t *spa, int mode)
 #endif
                }
        }
+#endif /* HAVE_SPA_THREAD */
        mutex_exit(&spa->spa_proc_lock);
 
        /* If we didn't create a process, we need to create our taskqs. */
@@ -996,8 +1043,10 @@ spa_unload(spa_t *spa)
        }
        spa->spa_spares.sav_count = 0;
 
-       for (i = 0; i < spa->spa_l2cache.sav_count; i++)
+       for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
+               vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
                vdev_free(spa->spa_l2cache.sav_vdevs[i]);
+       }
        if (spa->spa_l2cache.sav_vdevs) {
                kmem_free(spa->spa_l2cache.sav_vdevs,
                    spa->spa_l2cache.sav_count * sizeof (void *));
@@ -1220,11 +1269,13 @@ spa_load_l2cache(spa_t *spa)
 
                vd = oldvdevs[i];
                if (vd != NULL) {
+                       ASSERT(vd->vdev_isl2cache);
+
                        if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
                            pool != 0ULL && l2arc_vdev_present(vd))
                                l2arc_remove_vdev(vd);
-                       (void) vdev_close(vd);
-                       spa_l2cache_remove(vd);
+                       vdev_clear_stats(vd);
+                       vdev_free(vd);
                }
        }
 
@@ -1270,7 +1321,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
        nvsize = *(uint64_t *)db->db_data;
        dmu_buf_rele(db, FTAG);
 
-       packed = kmem_alloc(nvsize, KM_SLEEP);
+       packed = kmem_alloc(nvsize, KM_SLEEP | KM_NODEBUG);
        error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
            DMU_READ_PREFETCH);
        if (error == 0)
@@ -1758,7 +1809,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
            spa_guid_exists(pool_guid, 0)) {
                error = EEXIST;
        } else {
-               spa->spa_load_guid = pool_guid;
+               spa->spa_config_guid = pool_guid;
 
                if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
                    &nvl) == 0) {
@@ -1871,7 +1922,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
         */
        if (type != SPA_IMPORT_ASSEMBLE) {
                spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-               error = vdev_validate(rvd);
+               error = vdev_validate(rvd, mosconfig);
                spa_config_exit(spa, SCL_ALL, FTAG);
 
                if (error != 0)
@@ -1968,7 +2019,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
                                cmn_err(CE_WARN, "pool '%s' could not be "
                                    "loaded as it was last accessed by "
                                    "another system (host: %s hostid: 0x%lx). "
-                                   "See: http://www.sun.com/msg/ZFS-8000-EY",
+                                   "See: http://zfsonlinux.org/msg/ZFS-8000-EY",
                                    spa_name(spa), hostname,
                                    (unsigned long)hostid);
                                return (EBADF);
@@ -2732,6 +2783,7 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
                if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
                    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
                        error = ENOTBLK;
+                       vdev_free(vd);
                        goto out;
                }
 #endif
@@ -2841,10 +2893,6 @@ spa_l2cache_drop(spa_t *spa)
                if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
                    pool != 0ULL && l2arc_vdev_present(vd))
                        l2arc_remove_vdev(vd);
-               if (vd->vdev_isl2cache)
-                       spa_l2cache_remove(vd);
-               vdev_clear_stats(vd);
-               (void) vdev_close(vd);
        }
 }
 
@@ -3840,7 +3888,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
        pvd = oldvd->vdev_parent;
 
        if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
-           VDEV_ALLOC_ADD)) != 0)
+           VDEV_ALLOC_ATTACH)) != 0)
                return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
        if (newrootvd->vdev_children != 1)
@@ -5217,7 +5265,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
         * saves us a pre-read to get data we don't actually care about.
         */
        bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
-       packed = kmem_alloc(bufsize, KM_SLEEP);
+       packed = vmem_alloc(bufsize, KM_SLEEP);
 
        VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
            KM_SLEEP) == 0);
@@ -5225,7 +5273,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 
        dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
 
-       kmem_free(packed, bufsize);
+       vmem_free(packed, bufsize);
 
        VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
        dmu_buf_will_dirty(db, tx);
@@ -5858,3 +5906,62 @@ spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
        zfs_ereport_post(name, spa, vd, NULL, 0, 0);
 #endif
 }
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* state manipulation functions */
+EXPORT_SYMBOL(spa_open);
+EXPORT_SYMBOL(spa_open_rewind);
+EXPORT_SYMBOL(spa_get_stats);
+EXPORT_SYMBOL(spa_create);
+EXPORT_SYMBOL(spa_import_rootpool);
+EXPORT_SYMBOL(spa_import);
+EXPORT_SYMBOL(spa_tryimport);
+EXPORT_SYMBOL(spa_destroy);
+EXPORT_SYMBOL(spa_export);
+EXPORT_SYMBOL(spa_reset);
+EXPORT_SYMBOL(spa_async_request);
+EXPORT_SYMBOL(spa_async_suspend);
+EXPORT_SYMBOL(spa_async_resume);
+EXPORT_SYMBOL(spa_inject_addref);
+EXPORT_SYMBOL(spa_inject_delref);
+EXPORT_SYMBOL(spa_scan_stat_init);
+EXPORT_SYMBOL(spa_scan_get_stats);
+
+/* device maniion */
+EXPORT_SYMBOL(spa_vdev_add);
+EXPORT_SYMBOL(spa_vdev_attach);
+EXPORT_SYMBOL(spa_vdev_detach);
+EXPORT_SYMBOL(spa_vdev_remove);
+EXPORT_SYMBOL(spa_vdev_setpath);
+EXPORT_SYMBOL(spa_vdev_setfru);
+EXPORT_SYMBOL(spa_vdev_split_mirror);
+
+/* spare statech is global across all pools) */
+EXPORT_SYMBOL(spa_spare_add);
+EXPORT_SYMBOL(spa_spare_remove);
+EXPORT_SYMBOL(spa_spare_exists);
+EXPORT_SYMBOL(spa_spare_activate);
+
+/* L2ARC statech is global across all pools) */
+EXPORT_SYMBOL(spa_l2cache_add);
+EXPORT_SYMBOL(spa_l2cache_remove);
+EXPORT_SYMBOL(spa_l2cache_exists);
+EXPORT_SYMBOL(spa_l2cache_activate);
+EXPORT_SYMBOL(spa_l2cache_drop);
+
+/* scanning */
+EXPORT_SYMBOL(spa_scan);
+EXPORT_SYMBOL(spa_scan_stop);
+
+/* spa syncing */
+EXPORT_SYMBOL(spa_sync); /* only for DMU use */
+EXPORT_SYMBOL(spa_sync_allpools);
+
+/* properties */
+EXPORT_SYMBOL(spa_prop_set);
+EXPORT_SYMBOL(spa_prop_get);
+EXPORT_SYMBOL(spa_prop_clear_bootfs);
+
+/* asynchronous event notification */
+EXPORT_SYMBOL(spa_event_notify);
+#endif