*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#ifdef _KERNEL
extern vmem_t *zio_alloc_arena;
#endif
+extern int zfs_mg_alloc_failures;
/*
* An allocating zio is one that either currently has the DVA allocate
data_alloc_arena = zio_alloc_arena;
#endif
zio_cache = kmem_cache_create("zio_cache",
- sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
zio_link_cache = kmem_cache_create("zio_link_cache",
- sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
/*
* For small buffers, we want a cache for each multiple of
if (align != 0) {
char name[36];
+ int flags = zio_bulk_flags;
+
+ /*
+ * The smallest buffers (512b) are heavily used and
+ * experience a lot of churn. The slabs allocated
+ * for them are also relatively small (32K). Thus
+ * in over to avoid expensive calls to vmalloc() we
+ * make an exception to the usual slab allocation
+ * policy and force these buffers to be kmem backed.
+ */
+ if (size == (1 << SPA_MINBLOCKSHIFT))
+ flags |= KMC_KMEM;
+
(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, NULL,
- (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
- zio_bulk_flags);
+ align, NULL, NULL, NULL, NULL, NULL, flags);
(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, data_alloc_arena,
- (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
- zio_bulk_flags);
+ align, NULL, NULL, NULL, NULL,
+ data_alloc_arena, flags);
}
}
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
}
+ /*
+ * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
+ * to fail 3 times per txg or 8 failures, whichever is greater.
+ */
+ zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
+
zio_inject_init();
}
metaslab_class_t *mc = spa_normal_class(spa);
blkptr_t *bp = zio->io_bp;
int error;
+ int flags = 0;
if (zio->io_gang_leader == NULL) {
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
+ /*
+ * The dump device does not support gang blocks so allocation on
+ * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
+ * the "fast" gang feature.
+ */
+ flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
+ flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
+ METASLAB_GANG_CHILD : 0;
error = metaslab_alloc(spa, mc, zio->io_size, bp,
- zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
+ zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
if (error) {
+ spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
+ "size %llu, error %d", spa_name(spa), zio, zio->io_size,
+ error);
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
return (zio_write_gang_block(zio));
zio->io_error = error;