X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=77b1764d7403f26638ff825652f31fcb715aaf20;hb=b01615d5ac86913da1e092d0378bfb8f0e72af30;hp=ace72a0875be7db5dad1427c0bc6a3e2186e9d93;hpb=a5c20e2a0a9046c06d86615fbf51dc04f12bba14;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ace72a0..77b1764 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -210,6 +210,8 @@ zio_init(void) zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); zio_inject_init(); + + lz4_init(); } void @@ -238,6 +240,8 @@ zio_fini(void) kmem_cache_destroy(zio_cache); zio_inject_fini(); + + lz4_fini(); } /* @@ -704,7 +708,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && zp->zp_compress >= ZIO_COMPRESS_OFF && zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && - zp->zp_type < DMU_OT_NUMTYPES && + DMU_OT_IS_VALID(zp->zp_type) && zp->zp_level < 32 && zp->zp_copies > 0 && zp->zp_copies <= spa_max_replication(spa) && @@ -988,7 +992,7 @@ zio_read_bp_init(zio_t *zio) zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } - if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) + if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) zio->io_flags |= ZIO_FLAG_DONT_CACHE; if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) @@ -1248,7 +1252,7 @@ __zio_execute(zio_t *zio) while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; - dsl_pool_t *dsl; + dsl_pool_t *dp; boolean_t cut; int rv; @@ -1262,7 +1266,7 @@ __zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); - dsl = spa_get_dsl(zio->io_spa); + dp = spa_get_dsl(zio->io_spa); cut = (stage == ZIO_STAGE_VDEV_IO_START) ? zio_requeue_io_start_cut_in_line : B_FALSE; @@ -1272,19 +1276,29 @@ __zio_execute(zio_t *zio) * or may wait for an I/O that needs an interrupt thread * to complete, issue async to avoid deadlock. * - * If we are in the txg_sync_thread or being called - * during pool init issue async to minimize stack depth. - * Both of these call paths may be recursively called. - * * For VDEV_IO_START, we cut in line so that the io will * be sent to disk promptly. */ - if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && - zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) || - (dsl != NULL && dsl_pool_sync_context(dsl))) { + if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); + return; + } + +#ifdef _KERNEL + /* + * If we executing in the context of the tx_sync_thread, + * or we are performing pool initialization outside of a + * zio_taskq[ZIO_TASKQ_ISSUE] context. Then issue the zio + * async to minimize stack usage for these deep call paths. + */ + if ((dp && curthread == dp->dp_tx.tx_sync_thread) || + (dp && spa_is_initializing(dp->dp_spa) && + !zio_taskq_member(zio, ZIO_TASKQ_ISSUE))) { zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } +#endif zio->io_stage = stage; rv = zio_pipeline[highbit(stage) - 1](zio); @@ -1316,7 +1330,7 @@ zio_wait(zio_t *zio) mutex_enter(&zio->io_lock); while (zio->io_executor != NULL) - cv_wait(&zio->io_cv, &zio->io_lock); + cv_wait_io(&zio->io_cv, &zio->io_lock); mutex_exit(&zio->io_lock); error = zio->io_error; @@ -1861,6 +1875,11 @@ zio_write_gang_block(zio_t *pio) */ pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + /* + * We didn't allocate this bp, so make sure it doesn't get unmarked. + */ + pio->io_flags &= ~ZIO_FLAG_FASTWRITE; + zio_nowait(zio); return (ZIO_PIPELINE_CONTINUE); @@ -2270,6 +2289,7 @@ zio_dva_allocate(zio_t *zio) flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? METASLAB_GANG_CHILD : 0; + flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, zio->io_prop.zp_copies, zio->io_txg, NULL, flags); @@ -2333,8 +2353,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t size, boolean_t use_slog) +zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size, + boolean_t use_slog) { int error = 1; @@ -2347,14 +2367,14 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, */ if (use_slog) { error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error) { error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error == 0) { @@ -2432,25 +2452,19 @@ zio_vdev_io_start(zio_t *zio) align = 1ULL << vd->vdev_top->vdev_ashift; - /* - * On Linux, we don't care about read alignment. The backing block - * device driver will take care of that for us. - * The only exception is raidz, which needs a full block for parity. - */ - if (P2PHASE(zio->io_size, align) != 0 && - (zio->io_type != ZIO_TYPE_READ || - vd->vdev_ops == &vdev_raidz_ops)) { + if (P2PHASE(zio->io_size, align) != 0) { uint64_t asize = P2ROUNDUP(zio->io_size, align); char *abuf = zio_buf_alloc(asize); + ASSERT(vd == vd->vdev_top); if (zio->io_type == ZIO_TYPE_WRITE) { bcopy(zio->io_data, abuf, zio->io_size); bzero(abuf + zio->io_size, asize - zio->io_size); } zio_push_transform(zio, abuf, asize, asize, zio_subblock); - ASSERT(P2PHASE(zio->io_size, align) == 0); } ASSERT(P2PHASE(zio->io_offset, align) == 0); + ASSERT(P2PHASE(zio->io_size, align) == 0); VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); /* @@ -3066,6 +3080,11 @@ zio_done(zio_t *zio) zfs_ereport_free_checksum(zcr); } + if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && + !BP_IS_HOLE(zio->io_bp)) { + metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); + } + /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as @@ -3126,6 +3145,48 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_done }; +/* dnp is the dnode for zb1->zb_object */ +boolean_t +zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, + const zbookmark_t *zb2) +{ + uint64_t zb1nextL0, zb2thisobj; + + ASSERT(zb1->zb_objset == zb2->zb_objset); + ASSERT(zb2->zb_level == 0); + + /* + * A bookmark in the deadlist is considered to be after + * everything else. + */ + if (zb2->zb_object == DMU_DEADLIST_OBJECT) + return (B_TRUE); + + /* The objset_phys_t isn't before anything. */ + if (dnp == NULL) + return (B_FALSE); + + zb1nextL0 = (zb1->zb_blkid + 1) << + ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + + zb2thisobj = zb2->zb_object ? zb2->zb_object : + zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); + + if (zb1->zb_object == DMU_META_DNODE_OBJECT) { + uint64_t nextobj = zb1nextL0 * + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; + return (nextobj <= zb2thisobj); + } + + if (zb1->zb_object < zb2thisobj) + return (B_TRUE); + if (zb1->zb_object > zb2thisobj) + return (B_FALSE); + if (zb2->zb_object == DMU_META_DNODE_OBJECT) + return (B_FALSE); + return (zb1nextL0 <= zb2->zb_blkid); +} + #if defined(_KERNEL) && defined(HAVE_SPL) /* Fault injection */ EXPORT_SYMBOL(zio_injection_enabled);