Illumos #3006
[zfs.git] / module / zfs / metaslab.c
index d06012f..cc51ea4 100644 (file)
@@ -58,7 +58,7 @@ int zfs_mg_alloc_failures;
 /*
  * Metaslab debugging: when set, keeps all space maps in core to verify frees.
  */
-static int metaslab_debug = 0;
+int metaslab_debug = 0;
 
 /*
  * Minimum size which forces the dynamic allocator to change
@@ -107,6 +107,7 @@ metaslab_class_create(spa_t *spa, space_map_ops_t *ops)
        mc->mc_spa = spa;
        mc->mc_rotor = NULL;
        mc->mc_ops = ops;
+       mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL);
 
        return (mc);
 }
@@ -120,6 +121,7 @@ metaslab_class_destroy(metaslab_class_t *mc)
        ASSERT(mc->mc_space == 0);
        ASSERT(mc->mc_dspace == 0);
 
+       mutex_destroy(&mc->mc_fastwrite_lock);
        kmem_free(mc, sizeof (metaslab_class_t));
 }
 
@@ -790,7 +792,7 @@ metaslab_fini(metaslab_t *msp)
        for (t = 0; t < TXG_DEFER_SIZE; t++)
                space_map_destroy(&msp->ms_defermap[t]);
 
-       ASSERT3S(msp->ms_deferspace, ==, 0);
+       ASSERT0(msp->ms_deferspace);
 
        mutex_exit(&msp->ms_lock);
        mutex_destroy(&msp->ms_lock);
@@ -897,8 +899,9 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
        if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
                space_map_load_wait(sm);
                if (!sm->sm_loaded) {
-                       int error = space_map_load(sm, sm_ops, SM_FREE,
-                           &msp->ms_smo,
+                       space_map_obj_t *smo = &msp->ms_smo;
+
+                       int error = space_map_load(sm, sm_ops, SM_FREE, smo,
                            spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
                        if (error)  {
                                metaslab_group_sort(msp->ms_group, msp, 0);
@@ -1307,7 +1310,7 @@ static int
 metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
     dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags)
 {
-       metaslab_group_t *mg, *rotor;
+       metaslab_group_t *mg, *fast_mg, *rotor;
        vdev_t *vd;
        int dshift = 3;
        int all_zero;
@@ -1325,6 +1328,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
        if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
                return (ENOSPC);
 
+       if (flags & METASLAB_FASTWRITE)
+               mutex_enter(&mc->mc_fastwrite_lock);
+
        /*
         * Start at the rotor and loop through all mgs until we find something.
         * Note that there's no locking on mc_rotor or mc_aliquot because
@@ -1367,6 +1373,15 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
        } else if (d != 0) {
                vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
                mg = vd->vdev_mg->mg_next;
+       } else if (flags & METASLAB_FASTWRITE) {
+               mg = fast_mg = mc->mc_rotor;
+
+               do {
+                       if (fast_mg->mg_vd->vdev_pending_fastwrite <
+                           mg->mg_vd->vdev_pending_fastwrite)
+                               mg = fast_mg;
+               } while ((fast_mg = fast_mg->mg_next) != mc->mc_rotor);
+
        } else {
                mg = mc->mc_rotor;
        }
@@ -1453,7 +1468,8 @@ top:
                                    (int64_t)mg->mg_aliquot) / 100;
                        }
 
-                       if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
+                       if ((flags & METASLAB_FASTWRITE) ||
+                           atomic_add_64_nv(&mc->mc_aliquot, asize) >=
                            mg->mg_aliquot + mg->mg_bias) {
                                mc->mc_rotor = mg->mg_next;
                                mc->mc_aliquot = 0;
@@ -1464,6 +1480,12 @@ top:
                        DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER));
                        DVA_SET_ASIZE(&dva[d], asize);
 
+                       if (flags & METASLAB_FASTWRITE) {
+                               atomic_add_64(&vd->vdev_pending_fastwrite,
+                                   psize);
+                               mutex_exit(&mc->mc_fastwrite_lock);
+                       }
+
                        return (0);
                }
 next:
@@ -1485,6 +1507,8 @@ next:
 
        bzero(&dva[d], sizeof (dva_t));
 
+       if (flags & METASLAB_FASTWRITE)
+               mutex_exit(&mc->mc_fastwrite_lock);
        return (ENOSPC);
 }
 
@@ -1678,3 +1702,53 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
 
        return (error);
 }
+
+void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
+{
+       const dva_t *dva = bp->blk_dva;
+       int ndvas = BP_GET_NDVAS(bp);
+       uint64_t psize = BP_GET_PSIZE(bp);
+       int d;
+       vdev_t *vd;
+
+       ASSERT(!BP_IS_HOLE(bp));
+       ASSERT(psize > 0);
+
+       spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+       for (d = 0; d < ndvas; d++) {
+               if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
+                       continue;
+               atomic_add_64(&vd->vdev_pending_fastwrite, psize);
+       }
+
+       spa_config_exit(spa, SCL_VDEV, FTAG);
+}
+
+void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
+{
+       const dva_t *dva = bp->blk_dva;
+       int ndvas = BP_GET_NDVAS(bp);
+       uint64_t psize = BP_GET_PSIZE(bp);
+       int d;
+       vdev_t *vd;
+
+       ASSERT(!BP_IS_HOLE(bp));
+       ASSERT(psize > 0);
+
+       spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+       for (d = 0; d < ndvas; d++) {
+               if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
+                       continue;
+               ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
+               atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
+       }
+
+       spa_config_exit(spa, SCL_VDEV, FTAG);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(metaslab_debug, int, 0644);
+MODULE_PARM_DESC(metaslab_debug, "keep space maps in core to verify frees");
+#endif /* _KERNEL && HAVE_SPL */