X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=943f2d677a96376fc1bce6d29f80ef109332a9c5;hb=cc92e9d0c3e67a7e66c844466f85696a087bf60a;hp=fe2bdc867fea6151d2b0177e8dbd34796804eeca;hpb=86dd0fd9222b6103c6533036c47b908ece944460;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index fe2bdc8..943f2d6 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -210,6 +210,8 @@ zio_init(void) zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); zio_inject_init(); + + lz4_init(); } void @@ -238,6 +240,8 @@ zio_fini(void) kmem_cache_destroy(zio_cache); zio_inject_fini(); + + lz4_fini(); } /* @@ -259,7 +263,7 @@ zio_buf_alloc(size_t size) ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); + return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG)); } /* @@ -275,7 +279,8 @@ zio_data_buf_alloc(size_t size) ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); + return (kmem_cache_alloc(zio_data_buf_cache[c], + KM_PUSHPAGE | KM_NODEBUG)); } void @@ -604,6 +609,9 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_vsd_ops = NULL; zio->io_offset = offset; zio->io_deadline = 0; + zio->io_timestamp = 0; + zio->io_delta = 0; + zio->io_delay = 0; zio->io_orig_data = zio->io_data = data; zio->io_orig_size = zio->io_size = size; zio->io_orig_flags = zio->io_flags = flags; @@ -615,7 +623,6 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_bp_override = NULL; zio->io_walk_link = NULL; zio->io_transform_stack = NULL; - zio->io_delay = 0; zio->io_error = 0; zio->io_child_count = 0; zio->io_parent_count = 0; @@ -703,7 +710,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && zp->zp_compress >= ZIO_COMPRESS_OFF && zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && - zp->zp_type < DMU_OT_NUMTYPES && + DMU_OT_IS_VALID(zp->zp_type) && zp->zp_level < 32 && zp->zp_copies > 0 && zp->zp_copies <= spa_max_replication(spa) && @@ -987,7 +994,7 @@ zio_read_bp_init(zio_t *zio) zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } - if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) + if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) zio->io_flags |= ZIO_FLAG_DONT_CACHE; if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) @@ -1247,7 +1254,7 @@ __zio_execute(zio_t *zio) while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; - dsl_pool_t *dsl; + dsl_pool_t *dp; boolean_t cut; int rv; @@ -1261,7 +1268,7 @@ __zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); - dsl = spa_get_dsl(zio->io_spa); + dp = spa_get_dsl(zio->io_spa); cut = (stage == ZIO_STAGE_VDEV_IO_START) ? zio_requeue_io_start_cut_in_line : B_FALSE; @@ -1271,20 +1278,30 @@ __zio_execute(zio_t *zio) * or may wait for an I/O that needs an interrupt thread * to complete, issue async to avoid deadlock. * - * If we are in the txg_sync_thread or being called - * during pool init issue async to minimize stack depth. - * Both of these call paths may be recursively called. - * * For VDEV_IO_START, we cut in line so that the io will * be sent to disk promptly. */ - if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && - zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) || - (dsl != NULL && dsl_pool_sync_context(dsl))) { + if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } +#ifdef _KERNEL + /* + * If we executing in the context of the tx_sync_thread, + * or we are performing pool initialization outside of a + * zio_taskq[ZIO_TASKQ_ISSUE] context. Then issue the zio + * async to minimize stack usage for these deep call paths. + */ + if ((dp && curthread == dp->dp_tx.tx_sync_thread) || + (dp && spa_is_initializing(dp->dp_spa) && + !zio_taskq_member(zio, ZIO_TASKQ_ISSUE))) { + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); + return; + } +#endif + zio->io_stage = stage; rv = zio_pipeline[highbit(stage) - 1](zio); @@ -1315,7 +1332,7 @@ zio_wait(zio_t *zio) mutex_enter(&zio->io_lock); while (zio->io_executor != NULL) - cv_wait(&zio->io_cv, &zio->io_lock); + cv_wait_io(&zio->io_cv, &zio->io_lock); mutex_exit(&zio->io_lock); error = zio->io_error; @@ -1860,6 +1877,11 @@ zio_write_gang_block(zio_t *pio) */ pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + /* + * We didn't allocate this bp, so make sure it doesn't get unmarked. + */ + pio->io_flags &= ~ZIO_FLAG_FASTWRITE; + zio_nowait(zio); return (ZIO_PIPELINE_CONTINUE); @@ -2229,8 +2251,11 @@ zio_ddt_free(zio_t *zio) ddt_enter(ddt); freedde = dde = ddt_lookup(ddt, bp, B_TRUE); - ddp = ddt_phys_select(dde, bp); - ddt_phys_decref(ddp); + if (dde) { + ddp = ddt_phys_select(dde, bp); + if (ddp) + ddt_phys_decref(ddp); + } ddt_exit(ddt); return (ZIO_PIPELINE_CONTINUE); @@ -2269,6 +2294,7 @@ zio_dva_allocate(zio_t *zio) flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? METASLAB_GANG_CHILD : 0; + flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, zio->io_prop.zp_copies, zio->io_txg, NULL, flags); @@ -2332,8 +2358,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t size, boolean_t use_slog) +zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size, + boolean_t use_slog) { int error = 1; @@ -2346,14 +2372,14 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, */ if (use_slog) { error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error) { error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error == 0) { @@ -2882,11 +2908,11 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); /* - * If this I/O is attached to a particular vdev is slow, exeeding + * If this I/O is attached to a particular vdev is slow, exceeding * 30 seconds to complete, post an error described the I/O delay. * We ignore these errors if the device is currently unavailable. */ - if (zio->io_delay >= zio_delay_max) { + if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) { if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, zio->io_vd, zio, 0, 0); @@ -3059,6 +3085,11 @@ zio_done(zio_t *zio) zfs_ereport_free_checksum(zcr); } + if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && + !BP_IS_HOLE(zio->io_bp)) { + metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); + } + /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as @@ -3119,6 +3150,48 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_done }; +/* dnp is the dnode for zb1->zb_object */ +boolean_t +zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, + const zbookmark_t *zb2) +{ + uint64_t zb1nextL0, zb2thisobj; + + ASSERT(zb1->zb_objset == zb2->zb_objset); + ASSERT(zb2->zb_level == 0); + + /* + * A bookmark in the deadlist is considered to be after + * everything else. + */ + if (zb2->zb_object == DMU_DEADLIST_OBJECT) + return (B_TRUE); + + /* The objset_phys_t isn't before anything. */ + if (dnp == NULL) + return (B_FALSE); + + zb1nextL0 = (zb1->zb_blkid + 1) << + ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + + zb2thisobj = zb2->zb_object ? zb2->zb_object : + zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); + + if (zb1->zb_object == DMU_META_DNODE_OBJECT) { + uint64_t nextobj = zb1nextL0 * + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; + return (nextobj <= zb2thisobj); + } + + if (zb1->zb_object < zb2thisobj) + return (B_TRUE); + if (zb1->zb_object > zb2thisobj) + return (B_FALSE); + if (zb2->zb_object == DMU_META_DNODE_OBJECT) + return (B_FALSE); + return (zb1nextL0 <= zb2->zb_blkid); +} + #if defined(_KERNEL) && defined(HAVE_SPL) /* Fault injection */ EXPORT_SYMBOL(zio_injection_enabled);