X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fmetaslab.c;h=d199921b7edf1a17d6bc7ab2c20275941a340d91;hb=36f86f73f68548f46eb3229c8adf583d59fa9988;hp=b089f1eac4cf756b28dfeb326f7312053ea28ecb;hpb=6d974228ef05366c546bb04198dafcb38785c16d;p=zfs.git diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index b089f1e..d199921 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -38,7 +38,7 @@ * avoid having to load lots of space_maps in a given txg. There are, * however, some cases where we want to avoid "fast" ganging and instead * we want to do an exhaustive search of all metaslabs on this device. - * Currently we don't allow any gang or dump device related allocations + * Currently we don't allow any gang, zil, or dump device related allocations * to "fast" gang. */ #define CAN_FASTGANG(flags) \ @@ -102,11 +102,12 @@ metaslab_class_create(spa_t *spa, space_map_ops_t *ops) { metaslab_class_t *mc; - mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); + mc = kmem_zalloc(sizeof (metaslab_class_t), KM_PUSHPAGE); mc->mc_spa = spa; mc->mc_rotor = NULL; mc->mc_ops = ops; + mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL); return (mc); } @@ -120,6 +121,7 @@ metaslab_class_destroy(metaslab_class_t *mc) ASSERT(mc->mc_space == 0); ASSERT(mc->mc_dspace == 0); + mutex_destroy(&mc->mc_fastwrite_lock); kmem_free(mc, sizeof (metaslab_class_t)); } @@ -217,7 +219,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) { metaslab_group_t *mg; - mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); + mg = kmem_zalloc(sizeof (metaslab_group_t), KM_PUSHPAGE); mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&mg->mg_metaslab_tree, metaslab_compare, sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); @@ -422,9 +424,9 @@ metaslab_pp_load(space_map_t *sm) space_seg_t *ss; ASSERT(sm->sm_ppd == NULL); - sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); + sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_PUSHPAGE); - sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_PUSHPAGE); avl_create(sm->sm_pp_root, metaslab_segsize_compare, sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); @@ -725,7 +727,7 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, vdev_t *vd = mg->mg_vd; metaslab_t *msp; - msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); + msp = kmem_zalloc(sizeof (metaslab_t), KM_PUSHPAGE); mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); msp->ms_smo_syncing = *smo; @@ -1307,7 +1309,7 @@ static int metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) { - metaslab_group_t *mg, *rotor; + metaslab_group_t *mg, *fast_mg, *rotor; vdev_t *vd; int dshift = 3; int all_zero; @@ -1325,6 +1327,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) return (ENOSPC); + if (flags & METASLAB_FASTWRITE) + mutex_enter(&mc->mc_fastwrite_lock); + /* * Start at the rotor and loop through all mgs until we find something. * Note that there's no locking on mc_rotor or mc_aliquot because @@ -1367,6 +1372,15 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, } else if (d != 0) { vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); mg = vd->vdev_mg->mg_next; + } else if (flags & METASLAB_FASTWRITE) { + mg = fast_mg = mc->mc_rotor; + + do { + if (fast_mg->mg_vd->vdev_pending_fastwrite < + mg->mg_vd->vdev_pending_fastwrite) + mg = fast_mg; + } while ((fast_mg = fast_mg->mg_next) != mc->mc_rotor); + } else { mg = mc->mc_rotor; } @@ -1453,7 +1467,8 @@ top: (int64_t)mg->mg_aliquot) / 100; } - if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= + if ((flags & METASLAB_FASTWRITE) || + atomic_add_64_nv(&mc->mc_aliquot, asize) >= mg->mg_aliquot + mg->mg_bias) { mc->mc_rotor = mg->mg_next; mc->mc_aliquot = 0; @@ -1464,6 +1479,12 @@ top: DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); DVA_SET_ASIZE(&dva[d], asize); + if (flags & METASLAB_FASTWRITE) { + atomic_add_64(&vd->vdev_pending_fastwrite, + psize); + mutex_exit(&mc->mc_fastwrite_lock); + } + return (0); } next: @@ -1485,6 +1506,8 @@ next: bzero(&dva[d], sizeof (dva_t)); + if (flags & METASLAB_FASTWRITE) + mutex_exit(&mc->mc_fastwrite_lock); return (ENOSPC); } @@ -1678,3 +1701,48 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) return (error); } + +void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + uint64_t psize = BP_GET_PSIZE(bp); + int d; + vdev_t *vd; + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(psize > 0); + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + + for (d = 0; d < ndvas; d++) { + if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) + continue; + atomic_add_64(&vd->vdev_pending_fastwrite, psize); + } + + spa_config_exit(spa, SCL_VDEV, FTAG); +} + +void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + uint64_t psize = BP_GET_PSIZE(bp); + int d; + vdev_t *vd; + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(psize > 0); + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + + for (d = 0; d < ndvas; d++) { + if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) + continue; + ASSERT3U(vd->vdev_pending_fastwrite, >=, psize); + atomic_sub_64(&vd->vdev_pending_fastwrite, psize); + } + + spa_config_exit(spa, SCL_VDEV, FTAG); +}