X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fmetaslab.c;h=d199921b7edf1a17d6bc7ab2c20275941a340d91;hb=1c24b699b0c7590e135f4701b50a4c933ebe0499;hp=17b4b12c4ee4db8f4aa0b5af2e915e9d459294c2;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 17b4b12..d199921 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -30,10 +31,31 @@ #include #include +#define WITH_DF_BLOCK_ALLOCATOR + +/* + * Allow allocations to switch to gang blocks quickly. We do this to + * avoid having to load lots of space_maps in a given txg. There are, + * however, some cases where we want to avoid "fast" ganging and instead + * we want to do an exhaustive search of all metaslabs on this device. + * Currently we don't allow any gang, zil, or dump device related allocations + * to "fast" gang. + */ +#define CAN_FASTGANG(flags) \ + (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \ + METASLAB_GANG_AVOID))) + uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ /* + * This value defines the number of allowed allocation failures per vdev. + * If a device reaches this threshold in a given txg then we consider skipping + * allocations on that device. + */ +int zfs_mg_alloc_failures; + +/* * Metaslab debugging: when set, keeps all space maps in core to verify frees. */ static int metaslab_debug = 0; @@ -80,11 +102,12 @@ metaslab_class_create(spa_t *spa, space_map_ops_t *ops) { metaslab_class_t *mc; - mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); + mc = kmem_zalloc(sizeof (metaslab_class_t), KM_PUSHPAGE); mc->mc_spa = spa; mc->mc_rotor = NULL; mc->mc_ops = ops; + mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL); return (mc); } @@ -98,6 +121,7 @@ metaslab_class_destroy(metaslab_class_t *mc) ASSERT(mc->mc_space == 0); ASSERT(mc->mc_dspace == 0); + mutex_destroy(&mc->mc_fastwrite_lock); kmem_free(mc, sizeof (metaslab_class_t)); } @@ -195,7 +219,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) { metaslab_group_t *mg; - mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); + mg = kmem_zalloc(sizeof (metaslab_group_t), KM_PUSHPAGE); mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&mg->mg_metaslab_tree, metaslab_compare, sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); @@ -350,6 +374,9 @@ metaslab_segsize_compare(const void *x1, const void *x2) return (0); } +#if defined(WITH_FF_BLOCK_ALLOCATOR) || \ + defined(WITH_DF_BLOCK_ALLOCATOR) || \ + defined(WITH_CDF_BLOCK_ALLOCATOR) /* * This is a helper function that can be used by the allocator to find * a suitable block to allocate. This will search the specified AVL @@ -389,6 +416,7 @@ metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, *cursor = 0; return (metaslab_block_picker(t, cursor, size, align)); } +#endif /* WITH_FF/DF/CDF_BLOCK_ALLOCATOR */ static void metaslab_pp_load(space_map_t *sm) @@ -396,9 +424,9 @@ metaslab_pp_load(space_map_t *sm) space_seg_t *ss; ASSERT(sm->sm_ppd == NULL); - sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); + sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_PUSHPAGE); - sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_PUSHPAGE); avl_create(sm->sm_pp_root, metaslab_segsize_compare, sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); @@ -452,6 +480,7 @@ metaslab_pp_maxsize(space_map_t *sm) return (ss->ss_end - ss->ss_start); } +#if defined(WITH_FF_BLOCK_ALLOCATOR) /* * ========================================================================== * The first-fit block allocator @@ -484,6 +513,10 @@ static space_map_ops_t metaslab_ff_ops = { metaslab_ff_fragmented }; +space_map_ops_t *zfs_metaslab_ops = &metaslab_ff_ops; +#endif /* WITH_FF_BLOCK_ALLOCATOR */ + +#if defined(WITH_DF_BLOCK_ALLOCATOR) /* * ========================================================================== * Dynamic block allocator - @@ -543,11 +576,15 @@ static space_map_ops_t metaslab_df_ops = { metaslab_df_fragmented }; +space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops; +#endif /* WITH_DF_BLOCK_ALLOCATOR */ + /* * ========================================================================== * Other experimental allocators * ========================================================================== */ +#if defined(WITH_CDF_BLOCK_ALLOCATOR) static uint64_t metaslab_cdf_alloc(space_map_t *sm, uint64_t size) { @@ -607,6 +644,10 @@ static space_map_ops_t metaslab_cdf_ops = { metaslab_cdf_fragmented }; +space_map_ops_t *zfs_metaslab_ops = &metaslab_cdf_ops; +#endif /* WITH_CDF_BLOCK_ALLOCATOR */ + +#if defined(WITH_NDF_BLOCK_ALLOCATOR) uint64_t metaslab_ndf_clump_shift = 4; static uint64_t @@ -672,6 +713,7 @@ static space_map_ops_t metaslab_ndf_ops = { }; space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops; +#endif /* WITH_NDF_BLOCK_ALLOCATOR */ /* * ========================================================================== @@ -685,7 +727,7 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, vdev_t *vd = mg->mg_vd; metaslab_t *msp; - msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); + msp = kmem_zalloc(sizeof (metaslab_t), KM_PUSHPAGE); mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); msp->ms_smo_syncing = *smo; @@ -730,6 +772,7 @@ void metaslab_fini(metaslab_t *msp) { metaslab_group_t *mg = msp->ms_group; + int t; vdev_space_update(mg->mg_vd, -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size); @@ -741,12 +784,12 @@ metaslab_fini(metaslab_t *msp) space_map_unload(&msp->ms_map); space_map_destroy(&msp->ms_map); - for (int t = 0; t < TXG_SIZE; t++) { + for (t = 0; t < TXG_SIZE; t++) { space_map_destroy(&msp->ms_allocmap[t]); space_map_destroy(&msp->ms_freemap[t]); } - for (int t = 0; t < TXG_DEFER_SIZE; t++) + for (t = 0; t < TXG_DEFER_SIZE; t++) space_map_destroy(&msp->ms_defermap[t]); ASSERT3S(msp->ms_deferspace, ==, 0); @@ -844,11 +887,12 @@ metaslab_prefetch(metaslab_group_t *mg) } static int -metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) +metaslab_activate(metaslab_t *msp, uint64_t activation_weight) { metaslab_group_t *mg = msp->ms_group; space_map_t *sm = &msp->ms_map; space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; + int t; ASSERT(MUTEX_HELD(&msp->ms_lock)); @@ -862,7 +906,7 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) metaslab_group_sort(msp->ms_group, msp, 0); return (error); } - for (int t = 0; t < TXG_DEFER_SIZE; t++) + for (t = 0; t < TXG_DEFER_SIZE; t++) space_map_walk(&msp->ms_defermap[t], space_map_claim, sm); @@ -877,13 +921,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) mutex_exit(&mg->mg_lock); } - /* - * If we were able to load the map then make sure - * that this map is still able to satisfy our request. - */ - if (msp->ms_weight < size) - return (ENOSPC); - metaslab_group_sort(msp->ms_group, msp, msp->ms_weight | activation_weight); } @@ -922,6 +959,7 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) space_map_obj_t *smo = &msp->ms_smo_syncing; dmu_buf_t *db; dmu_tx_t *tx; + int t; ASSERT(!vd->vdev_ishole); @@ -977,11 +1015,11 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) space_map_walk(sm, space_map_remove, allocmap); space_map_walk(freed_map, space_map_remove, allocmap); - for (int t = 0; t < TXG_DEFER_SIZE; t++) + for (t = 0; t < TXG_DEFER_SIZE; t++) space_map_walk(&msp->ms_defermap[t], space_map_remove, allocmap); - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) + for (t = 1; t < TXG_CONCURRENT_STATES; t++) space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK], space_map_remove, allocmap); @@ -1019,6 +1057,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) metaslab_group_t *mg = msp->ms_group; vdev_t *vd = mg->mg_vd; int64_t alloc_delta, defer_delta; + int t; ASSERT(!vd->vdev_ishole); @@ -1029,14 +1068,14 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) * allocmaps and freemaps and add its capacity to the vdev. */ if (freed_map->sm_size == 0) { - for (int t = 0; t < TXG_SIZE; t++) { + for (t = 0; t < TXG_SIZE; t++) { space_map_create(&msp->ms_allocmap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); space_map_create(&msp->ms_freemap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); } - for (int t = 0; t < TXG_DEFER_SIZE; t++) + for (t = 0; t < TXG_DEFER_SIZE; t++) space_map_create(&msp->ms_defermap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); @@ -1082,7 +1121,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { int evictable = 1; - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) + for (t = 1; t < TXG_CONCURRENT_STATES; t++) if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space) evictable = 0; @@ -1099,12 +1138,14 @@ void metaslab_sync_reassess(metaslab_group_t *mg) { vdev_t *vd = mg->mg_vd; + int64_t failures = mg->mg_alloc_failures; + int m; /* * Re-evaluate all metaslabs which have lower offsets than the * bonus area. */ - for (int m = 0; m < vd->vdev_ms_count; m++) { + for (m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; if (msp->ms_map.sm_start > mg->mg_bonus_area) @@ -1115,6 +1156,8 @@ metaslab_sync_reassess(metaslab_group_t *mg) mutex_exit(&msp->ms_lock); } + atomic_add_64(&mg->mg_alloc_failures, -failures); + /* * Prefetch the next potential metaslabs */ @@ -1139,9 +1182,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) } static uint64_t -metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, - uint64_t min_distance, dva_t *dva, int d) +metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, + uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) { + spa_t *spa = mg->mg_vd->vdev_spa; metaslab_t *msp = NULL; uint64_t offset = -1ULL; avl_tree_t *t = &mg->mg_metaslab_tree; @@ -1162,11 +1206,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, mutex_enter(&mg->mg_lock); for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { - if (msp->ms_weight < size) { + if (msp->ms_weight < asize) { + spa_dbgmsg(spa, "%s: failed to meet weight " + "requirement: vdev %llu, txg %llu, mg %p, " + "msp %p, psize %llu, asize %llu, " + "failures %llu, weight %llu", + spa_name(spa), mg->mg_vd->vdev_id, txg, + mg, msp, psize, asize, + mg->mg_alloc_failures, msp->ms_weight); mutex_exit(&mg->mg_lock); return (-1ULL); } - was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -1185,6 +1235,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp == NULL) return (-1ULL); + /* + * If we've already reached the allowable number of failed + * allocation attempts on this metaslab group then we + * consider skipping it. We skip it only if we're allowed + * to "fast" gang, the physical size is larger than + * a gang block, and we're attempting to allocate from + * the primary metaslab. + */ + if (mg->mg_alloc_failures > zfs_mg_alloc_failures && + CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && + activation_weight == METASLAB_WEIGHT_PRIMARY) { + spa_dbgmsg(spa, "%s: skipping metaslab group: " + "vdev %llu, txg %llu, mg %p, psize %llu, " + "asize %llu, failures %llu", spa_name(spa), + mg->mg_vd->vdev_id, txg, mg, psize, asize, + mg->mg_alloc_failures); + return (-1ULL); + } + mutex_enter(&msp->ms_lock); /* @@ -1193,7 +1262,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, * another thread may have changed the weight while we * were blocked on the metaslab lock. */ - if (msp->ms_weight < size || (was_active && + if (msp->ms_weight < asize || (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK) && activation_weight == METASLAB_WEIGHT_PRIMARY)) { mutex_exit(&msp->ms_lock); @@ -1208,14 +1277,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, continue; } - if (metaslab_activate(msp, activation_weight, size) != 0) { + if (metaslab_activate(msp, activation_weight) != 0) { mutex_exit(&msp->ms_lock); continue; } - if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) + if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL) break; + atomic_inc_64(&mg->mg_alloc_failures); + metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); mutex_exit(&msp->ms_lock); @@ -1224,7 +1295,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize); mutex_exit(&msp->ms_lock); @@ -1238,7 +1309,7 @@ static int metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) { - metaslab_group_t *mg, *rotor; + metaslab_group_t *mg, *fast_mg, *rotor; vdev_t *vd; int dshift = 3; int all_zero; @@ -1256,6 +1327,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) return (ENOSPC); + if (flags & METASLAB_FASTWRITE) + mutex_enter(&mc->mc_fastwrite_lock); + /* * Start at the rotor and loop through all mgs until we find something. * Note that there's no locking on mc_rotor or mc_aliquot because @@ -1298,6 +1372,15 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, } else if (d != 0) { vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); mg = vd->vdev_mg->mg_next; + } else if (flags & METASLAB_FASTWRITE) { + mg = fast_mg = mc->mc_rotor; + + do { + if (fast_mg->mg_vd->vdev_pending_fastwrite < + mg->mg_vd->vdev_pending_fastwrite) + mg = fast_mg; + } while ((fast_mg = fast_mg->mg_next) != mc->mc_rotor); + } else { mg = mc->mc_rotor; } @@ -1351,7 +1434,8 @@ top: asize = vdev_psize_to_asize(vd, psize); ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); - offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); + offset = metaslab_group_alloc(mg, psize, asize, txg, distance, + dva, d, flags); if (offset != -1ULL) { /* * If we've just selected this metaslab group, @@ -1363,21 +1447,28 @@ top: vdev_stat_t *vs = &vd->vdev_stat; int64_t vu, cu; - /* - * Determine percent used in units of 0..1024. - * (This is just to avoid floating point.) - */ - vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); - cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); + vu = (vs->vs_alloc * 100) / (vs->vs_space + 1); + cu = (mc->mc_alloc * 100) / (mc->mc_space + 1); /* - * Bias by at most +/- 25% of the aliquot. + * Calculate how much more or less we should + * try to allocate from this device during + * this iteration around the rotor. + * For example, if a device is 80% full + * and the pool is 20% full then we should + * reduce allocations by 60% on this device. + * + * mg_bias = (20 - 80) * 512K / 100 = -307K + * + * This reduces allocations by 307K for this + * iteration. */ mg->mg_bias = ((cu - vu) * - (int64_t)mg->mg_aliquot) / (1024 * 4); + (int64_t)mg->mg_aliquot) / 100; } - if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= + if ((flags & METASLAB_FASTWRITE) || + atomic_add_64_nv(&mc->mc_aliquot, asize) >= mg->mg_aliquot + mg->mg_bias) { mc->mc_rotor = mg->mg_next; mc->mc_aliquot = 0; @@ -1388,6 +1479,12 @@ top: DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); DVA_SET_ASIZE(&dva[d], asize); + if (flags & METASLAB_FASTWRITE) { + atomic_add_64(&vd->vdev_pending_fastwrite, + psize); + mutex_exit(&mc->mc_fastwrite_lock); + } + return (0); } next: @@ -1409,6 +1506,8 @@ next: bzero(&dva[d], sizeof (dva_t)); + if (flags & METASLAB_FASTWRITE) + mutex_exit(&mc->mc_fastwrite_lock); return (ENOSPC); } @@ -1488,7 +1587,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) - error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) error = ENOENT; @@ -1517,7 +1616,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, { dva_t *dva = bp->blk_dva; dva_t *hintdva = hintbp->blk_dva; - int error = 0; + int d, error = 0; ASSERT(bp->blk_birth == 0); ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); @@ -1533,7 +1632,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, ASSERT(BP_GET_NDVAS(bp) == 0); ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); - for (int d = 0; d < ndvas; d++) { + for (d = 0; d < ndvas; d++) { error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, txg, flags); if (error) { @@ -1559,14 +1658,14 @@ void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) { const dva_t *dva = bp->blk_dva; - int ndvas = BP_GET_NDVAS(bp); + int d, ndvas = BP_GET_NDVAS(bp); ASSERT(!BP_IS_HOLE(bp)); ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); spa_config_enter(spa, SCL_FREE, FTAG, RW_READER); - for (int d = 0; d < ndvas; d++) + for (d = 0; d < ndvas; d++) metaslab_free_dva(spa, &dva[d], txg, now); spa_config_exit(spa, SCL_FREE, FTAG); @@ -1577,7 +1676,7 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) { const dva_t *dva = bp->blk_dva; int ndvas = BP_GET_NDVAS(bp); - int error = 0; + int d, error = 0; ASSERT(!BP_IS_HOLE(bp)); @@ -1592,7 +1691,7 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); - for (int d = 0; d < ndvas; d++) + for (d = 0; d < ndvas; d++) if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0) break; @@ -1602,3 +1701,48 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) return (error); } + +void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + uint64_t psize = BP_GET_PSIZE(bp); + int d; + vdev_t *vd; + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(psize > 0); + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + + for (d = 0; d < ndvas; d++) { + if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) + continue; + atomic_add_64(&vd->vdev_pending_fastwrite, psize); + } + + spa_config_exit(spa, SCL_VDEV, FTAG); +} + +void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + uint64_t psize = BP_GET_PSIZE(bp); + int d; + vdev_t *vd; + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(psize > 0); + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + + for (d = 0; d < ndvas; d++) { + if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) + continue; + ASSERT3U(vd->vdev_pending_fastwrite, >=, psize); + atomic_sub_64(&vd->vdev_pending_fastwrite, psize); + } + + spa_config_exit(spa, SCL_VDEV, FTAG); +}