Illumos #3639 zpool.cache should skip over readonly pools
[zfs.git] / module / zfs / spa_config.c
index b2063bb..5e5b405 100644 (file)
@@ -20,8 +20,9 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -34,6 +35,7 @@
 #include <sys/utsname.h>
 #include <sys/systeminfo.h>
 #include <sys/sunddi.h>
+#include <sys/zfeature.h>
 #ifdef _KERNEL
 #include <sys/kobj.h>
 #include <sys/zone.h>
@@ -62,7 +64,8 @@ static uint64_t spa_config_generation = 1;
  * This can be overridden in userland to preserve an alternate namespace for
  * userland pools when doing testing.
  */
-const char *spa_config_path = ZPOOL_CACHE;
+char *spa_config_path = ZPOOL_CACHE;
+int zfs_autoimport_disable = 0;
 
 /*
  * Called when the module is first loaded, this routine loads the configuration
@@ -75,15 +78,17 @@ spa_config_load(void)
        void *buf = NULL;
        nvlist_t *nvlist, *child;
        nvpair_t *nvpair;
-       spa_t *spa;
        char *pathname;
        struct _buf *file;
        uint64_t fsize;
 
+       if (zfs_autoimport_disable)
+               return;
+
        /*
         * Open the configuration file.
         */
-       pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+       pathname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
 
        (void) snprintf(pathname, MAXPATHLEN, "%s%s",
            (rootdir != NULL) ? "./" : "", spa_config_path);
@@ -98,7 +103,7 @@ spa_config_load(void)
        if (kobj_get_filesize(file, &fsize) != 0)
                goto out;
 
-       buf = kmem_alloc(fsize, KM_SLEEP);
+       buf = kmem_alloc(fsize, KM_PUSHPAGE | KM_NODEBUG);
 
        /*
         * Read the nvlist from the file.
@@ -109,7 +114,7 @@ spa_config_load(void)
        /*
         * Unpack the nvlist.
         */
-       if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
+       if (nvlist_unpack(buf, fsize, &nvlist, KM_PUSHPAGE) != 0)
                goto out;
 
        /*
@@ -119,7 +124,6 @@ spa_config_load(void)
        mutex_enter(&spa_namespace_lock);
        nvpair = NULL;
        while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
-
                if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
                        continue;
 
@@ -127,13 +131,7 @@ spa_config_load(void)
 
                if (spa_lookup(nvpair_name(nvpair)) != NULL)
                        continue;
-               spa = spa_add(nvpair_name(nvpair), NULL);
-
-               /*
-                * We blindly duplicate the configuration here.  If it's
-                * invalid, we will catch it when the pool is first opened.
-                */
-               VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0);
+               (void) spa_add(nvpair_name(nvpair), child, NULL);
        }
        mutex_exit(&spa_namespace_lock);
 
@@ -168,11 +166,11 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
         */
        VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
 
-       buf = kmem_alloc(buflen, KM_SLEEP);
-       temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+       buf = kmem_alloc(buflen, KM_PUSHPAGE | KM_NODEBUG);
+       temp = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE);
 
        VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
-           KM_SLEEP) == 0);
+           KM_PUSHPAGE) == 0);
 
        /*
         * Write the configuration to disk.  We need to do the traditional
@@ -188,7 +186,6 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
                        (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
                }
                (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
-               VN_RELE(vp);
        }
 
        (void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
@@ -228,7 +225,15 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
                 */
                nvl = NULL;
                while ((spa = spa_next(spa)) != NULL) {
-                       if (spa == target && removing)
+                       /*
+                        * Skip over our own pool if we're about to remove
+                        * ourselves from the spa namespace or any pool that
+                        * is readonly. Since we cannot guarantee that a
+                        * readonly pool would successfully import upon reboot,
+                        * we don't allow them to be written to the cache file.
+                        */
+                       if ((spa == target && removing) ||
+                           !spa_writeable(spa))
                                continue;
 
                        mutex_enter(&spa->spa_props_lock);
@@ -242,7 +247,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
 
                        if (nvl == NULL)
                                VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
-                                   KM_SLEEP) == 0);
+                                   KM_PUSHPAGE) == 0);
 
                        VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
                            spa->spa_config) == 0);
@@ -267,7 +272,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
        spa_config_generation++;
 
        if (postsysevent)
-               spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
+               spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
 }
 
 /*
@@ -285,7 +290,7 @@ spa_all_configs(uint64_t *generation)
        if (*generation == spa_config_generation)
                return (NULL);
 
-       VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 
        mutex_enter(&spa_namespace_lock);
        while ((spa = spa_next(spa)) != NULL) {
@@ -325,6 +330,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
        vdev_t *rvd = spa->spa_root_vdev;
        unsigned long hostid = 0;
        boolean_t locked = B_FALSE;
+       uint64_t split_guid;
 
        if (vd == NULL) {
                vd = rvd;
@@ -341,7 +347,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
        if (txg == -1ULL)
                txg = spa->spa_config_txg;
 
-       VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 
        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
            spa_version(spa)) == 0);
@@ -353,6 +359,10 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
            txg) == 0);
        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
            spa_guid(spa)) == 0);
+       VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
+           ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
+
+
 #ifdef _KERNEL
        hostid = zone_get_hostid(NULL);
 #else  /* _KERNEL */
@@ -381,12 +391,69 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
                        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
                            1ULL) == 0);
                vd = vd->vdev_top;              /* label contains top config */
+       } else {
+               /*
+                * Only add the (potentially large) split information
+                * in the mos config, and not in the vdev labels
+                */
+               if (spa->spa_config_splitting != NULL)
+                       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
+                           spa->spa_config_splitting) == 0);
        }
 
-       nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
+       /*
+        * Add the top-level config.  We even add this on pools which
+        * don't support holes in the namespace.
+        */
+       vdev_top_config_generate(spa, config);
+
+       /*
+        * If we're splitting, record the original pool's guid.
+        */
+       if (spa->spa_config_splitting != NULL &&
+           nvlist_lookup_uint64(spa->spa_config_splitting,
+           ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
+               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
+                   split_guid) == 0);
+       }
+
+       nvroot = vdev_config_generate(spa, vd, getstats, 0);
        VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
        nvlist_free(nvroot);
 
+       /*
+        * Store what's necessary for reading the MOS in the label.
+        */
+       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+           spa->spa_label_features) == 0);
+
+       if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
+               ddt_histogram_t *ddh;
+               ddt_stat_t *dds;
+               ddt_object_t *ddo;
+
+               ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE);
+               ddt_get_dedup_histogram(spa, ddh);
+               VERIFY(nvlist_add_uint64_array(config,
+                   ZPOOL_CONFIG_DDT_HISTOGRAM,
+                   (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
+               kmem_free(ddh, sizeof (ddt_histogram_t));
+
+               ddo = kmem_zalloc(sizeof (ddt_object_t), KM_PUSHPAGE);
+               ddt_get_dedup_object_stats(spa, ddo);
+               VERIFY(nvlist_add_uint64_array(config,
+                   ZPOOL_CONFIG_DDT_OBJ_STATS,
+                   (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
+               kmem_free(ddo, sizeof (ddt_object_t));
+
+               dds = kmem_zalloc(sizeof (ddt_stat_t), KM_PUSHPAGE);
+               ddt_get_dedup_stats(spa, dds);
+               VERIFY(nvlist_add_uint64_array(config,
+                   ZPOOL_CONFIG_DDT_STATS,
+                   (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
+               kmem_free(dds, sizeof (ddt_stat_t));
+       }
+
        if (locked)
                spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 
@@ -442,3 +509,19 @@ spa_config_update(spa_t *spa, int what)
        if (what == SPA_CONFIG_UPDATE_POOL)
                spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
 }
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(spa_config_sync);
+EXPORT_SYMBOL(spa_config_load);
+EXPORT_SYMBOL(spa_all_configs);
+EXPORT_SYMBOL(spa_config_set);
+EXPORT_SYMBOL(spa_config_generate);
+EXPORT_SYMBOL(spa_config_update);
+
+module_param(spa_config_path, charp, 0444);
+MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
+
+module_param(zfs_autoimport_disable, int, 0644);
+MODULE_PARM_DESC(zfs_autoimport_disable, "Disable pool import at module load");
+
+#endif