Illumos #3006
[zfs.git] / module / zfs / space_map.c
index 1cdacc8..ab7cb7a 100644 (file)
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/space_map.h>
 
+static kmem_cache_t *space_seg_cache;
+
+void
+space_map_init(void)
+{
+       ASSERT(space_seg_cache == NULL);
+       space_seg_cache = kmem_cache_create("space_seg_cache",
+           sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+space_map_fini(void)
+{
+       kmem_cache_destroy(space_seg_cache);
+       space_seg_cache = NULL;
+}
+
 /*
  * Space map routines.
  * NOTE: caller is responsible for all locking.
@@ -73,7 +93,7 @@ void
 space_map_destroy(space_map_t *sm)
 {
        ASSERT(!sm->sm_loaded && !sm->sm_loading);
-       VERIFY3U(sm->sm_space, ==, 0);
+       VERIFY0(sm->sm_space);
        avl_destroy(&sm->sm_root);
        cv_destroy(&sm->sm_load_cv);
 }
@@ -116,19 +136,33 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
 
        if (merge_before && merge_after) {
                avl_remove(&sm->sm_root, ss_before);
+               if (sm->sm_pp_root) {
+                       avl_remove(sm->sm_pp_root, ss_before);
+                       avl_remove(sm->sm_pp_root, ss_after);
+               }
                ss_after->ss_start = ss_before->ss_start;
-               kmem_free(ss_before, sizeof (*ss_before));
+               kmem_cache_free(space_seg_cache, ss_before);
+               ss = ss_after;
        } else if (merge_before) {
                ss_before->ss_end = end;
+               if (sm->sm_pp_root)
+                       avl_remove(sm->sm_pp_root, ss_before);
+               ss = ss_before;
        } else if (merge_after) {
                ss_after->ss_start = start;
+               if (sm->sm_pp_root)
+                       avl_remove(sm->sm_pp_root, ss_after);
+               ss = ss_after;
        } else {
-               ss = kmem_alloc(sizeof (*ss), KM_SLEEP);
+               ss = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
                ss->ss_start = start;
                ss->ss_end = end;
                avl_insert(&sm->sm_root, ss, where);
        }
 
+       if (sm->sm_pp_root)
+               avl_add(sm->sm_pp_root, ss);
+
        sm->sm_space += size;
 }
 
@@ -163,21 +197,30 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
        left_over = (ss->ss_start != start);
        right_over = (ss->ss_end != end);
 
+       if (sm->sm_pp_root)
+               avl_remove(sm->sm_pp_root, ss);
+
        if (left_over && right_over) {
-               newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP);
+               newseg = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
                newseg->ss_start = end;
                newseg->ss_end = ss->ss_end;
                ss->ss_end = start;
                avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER);
+               if (sm->sm_pp_root)
+                       avl_add(sm->sm_pp_root, newseg);
        } else if (left_over) {
                ss->ss_end = start;
        } else if (right_over) {
                ss->ss_start = end;
        } else {
                avl_remove(&sm->sm_root, ss);
-               kmem_free(ss, sizeof (*ss));
+               kmem_cache_free(space_seg_cache, ss);
+               ss = NULL;
        }
 
+       if (sm->sm_pp_root && ss != NULL)
+               avl_add(sm->sm_pp_root, ss);
+
        sm->sm_space -= size;
 }
 
@@ -211,7 +254,7 @@ space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
        while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
                if (func != NULL)
                        func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
-               kmem_free(ss, sizeof (*ss));
+               kmem_cache_free(space_seg_cache, ss);
        }
        sm->sm_space = 0;
 }
@@ -235,8 +278,10 @@ space_map_load_wait(space_map_t *sm)
 {
        ASSERT(MUTEX_HELD(sm->sm_lock));
 
-       while (sm->sm_loading)
+       while (sm->sm_loading) {
+               ASSERT(!sm->sm_loaded);
                cv_wait(&sm->sm_load_cv, sm->sm_lock);
+       }
 }
 
 /*
@@ -253,18 +298,15 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
        int error = 0;
 
        ASSERT(MUTEX_HELD(sm->sm_lock));
-
-       space_map_load_wait(sm);
-
-       if (sm->sm_loaded)
-               return (0);
+       ASSERT(!sm->sm_loaded);
+       ASSERT(!sm->sm_loading);
 
        sm->sm_loading = B_TRUE;
        end = smo->smo_objsize;
        space = smo->smo_alloc;
 
        ASSERT(sm->sm_ops == NULL);
-       VERIFY3U(sm->sm_space, ==, 0);
+       VERIFY0(sm->sm_space);
 
        if (maptype == SM_FREE) {
                space_map_add(sm, sm->sm_start, sm->sm_size);
@@ -288,7 +330,8 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
                    smo->smo_object, offset, size);
 
                mutex_exit(sm->sm_lock);
-               error = dmu_read(os, smo->smo_object, offset, size, entry_map);
+               error = dmu_read(os, smo->smo_object, offset, size, entry_map,
+                   DMU_READ_PREFETCH);
                mutex_enter(sm->sm_lock);
                if (error != 0)
                        break;
@@ -342,6 +385,13 @@ space_map_unload(space_map_t *sm)
 }
 
 uint64_t
+space_map_maxsize(space_map_t *sm)
+{
+       ASSERT(sm->sm_ops != NULL);
+       return (sm->sm_ops->smop_max(sm));
+}
+
+uint64_t
 space_map_alloc(space_map_t *sm, uint64_t size)
 {
        uint64_t start;
@@ -376,7 +426,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
        spa_t *spa = dmu_objset_spa(os);
        void *cookie = NULL;
        space_seg_t *ss;
-       uint64_t bufsize, start, size, run_len;
+       uint64_t bufsize, start, size, run_len, delta, sm_space;
        uint64_t *entry, *entry_map, *entry_map_end;
 
        ASSERT(MUTEX_HELD(sm->sm_lock));
@@ -405,11 +455,13 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
            SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
            SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
 
+       delta = 0;
+       sm_space = sm->sm_space;
        while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
                size = ss->ss_end - ss->ss_start;
                start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
 
-               sm->sm_space -= size;
+               delta += size;
                size >>= sm->sm_shift;
 
                while (size) {
@@ -431,7 +483,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
                        start += run_len;
                        size -= run_len;
                }
-               kmem_free(ss, sizeof (*ss));
+               kmem_cache_free(space_seg_cache, ss);
        }
 
        if (entry != entry_map) {
@@ -443,9 +495,16 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
                smo->smo_objsize += size;
        }
 
+       /*
+        * Ensure that the space_map's accounting wasn't changed
+        * while we were in the middle of writing it out.
+        */
+       VERIFY3U(sm->sm_space, ==, sm_space);
+
        zio_buf_free(entry_map, bufsize);
 
-       VERIFY3U(sm->sm_space, ==, 0);
+       sm->sm_space -= delta;
+       VERIFY0(sm->sm_space);
 }
 
 void
@@ -521,7 +580,7 @@ space_map_ref_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt)
 {
        space_ref_t *sr;
 
-       sr = kmem_alloc(sizeof (*sr), KM_SLEEP);
+       sr = kmem_alloc(sizeof (*sr), KM_PUSHPAGE);
        sr->sr_offset = offset;
        sr->sr_refcnt = refcnt;