X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=ace72a0875be7db5dad1427c0bc6a3e2186e9d93;hb=82f46731fd5a9eef4f87530e94922664b58a6138;hp=149088d939b329154b8766288dd706b016e1dff8;hpb=49be0ccf1fdc2ce852271d4d2f8b7a9c2c4be6db;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 149088d..ace72a0 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ @@ -73,6 +73,7 @@ char *zio_type_name[ZIO_TYPES] = { */ kmem_cache_t *zio_cache; kmem_cache_t *zio_link_cache; +kmem_cache_t *zio_vdev_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; int zio_bulk_flags = 0; @@ -141,6 +142,8 @@ zio_init(void) zio_cons, zio_dest, NULL, NULL, NULL, KMC_KMEM); zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM); + zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof(vdev_io_t), + PAGESIZE, NULL, NULL, NULL, NULL, NULL, KMC_VMEM); /* * For small buffers, we want a cache for each multiple of @@ -230,6 +233,7 @@ zio_fini(void) zio_data_buf_cache[c] = NULL; } + kmem_cache_destroy(zio_vdev_cache); kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -255,7 +259,7 @@ zio_buf_alloc(size_t size) ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); + return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG)); } /* @@ -271,7 +275,8 @@ zio_data_buf_alloc(size_t size) ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); + return (kmem_cache_alloc(zio_data_buf_cache[c], + KM_PUSHPAGE | KM_NODEBUG)); } void @@ -295,6 +300,24 @@ zio_data_buf_free(void *buf, size_t size) } /* + * Dedicated I/O buffers to ensure that memory fragmentation never prevents + * or significantly delays the issuing of a zio. These buffers are used + * to aggregate I/O and could be used for raidz stripes. + */ +void * +zio_vdev_alloc(void) +{ + return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE)); +} + +void +zio_vdev_free(void *buf) +{ + kmem_cache_free(zio_vdev_cache, buf); + +} + +/* * ========================================================================== * Push and pop I/O transform buffers * ========================================================================== @@ -2317,13 +2340,22 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, ASSERT(txg > spa_syncing_txg(spa)); - if (use_slog) + /* + * ZIL blocks are always contiguous (i.e. not gang blocks) so we + * set the METASLAB_GANG_AVOID flag so that they don't "fast gang" + * when allocating them. + */ + if (use_slog) { error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + new_bp, 1, txg, old_bp, + METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + } - if (error) + if (error) { error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + new_bp, 1, txg, old_bp, + METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + } if (error == 0) { BP_SET_LSIZE(new_bp, size); @@ -2400,19 +2432,25 @@ zio_vdev_io_start(zio_t *zio) align = 1ULL << vd->vdev_top->vdev_ashift; - if (P2PHASE(zio->io_size, align) != 0) { + /* + * On Linux, we don't care about read alignment. The backing block + * device driver will take care of that for us. + * The only exception is raidz, which needs a full block for parity. + */ + if (P2PHASE(zio->io_size, align) != 0 && + (zio->io_type != ZIO_TYPE_READ || + vd->vdev_ops == &vdev_raidz_ops)) { uint64_t asize = P2ROUNDUP(zio->io_size, align); char *abuf = zio_buf_alloc(asize); - ASSERT(vd == vd->vdev_top); if (zio->io_type == ZIO_TYPE_WRITE) { bcopy(zio->io_data, abuf, zio->io_size); bzero(abuf + zio->io_size, asize - zio->io_size); } zio_push_transform(zio, abuf, asize, asize, zio_subblock); + ASSERT(P2PHASE(zio->io_size, align) == 0); } ASSERT(P2PHASE(zio->io_offset, align) == 0); - ASSERT(P2PHASE(zio->io_size, align) == 0); VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); /*