X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=zfs%2Flib%2Flibumem%2Fumem.c;h=a3eb0b8e6c93f466c50d3c2914e7d7d889fa025c;hb=b128c09fbee863d15be744a2ce602b514eddbe3a;hp=635c19e1af89f4ea7bcd4520bb5808ad2bea3e21;hpb=34dc7c2f2553220ebc6e29ca195fb6d57155f95f;p=zfs.git diff --git a/zfs/lib/libumem/umem.c b/zfs/lib/libumem/umem.c index 635c19e..a3eb0b8 100644 --- a/zfs/lib/libumem/umem.c +++ b/zfs/lib/libumem/umem.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,47 +18,15 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Portions Copyright 2006 OmniTI, Inc. - */ -/* #pragma ident "@(#)umem.c 1.11 05/06/08 SMI" */ +#pragma ident "%Z%%M% %I% %E% SMI" -/*! - * \mainpage Main Page - * - * \section README - * - * \include README - * - * \section Nuances - * - * There is a nuance in the behaviour of the umem port compared - * with umem on Solaris. - * - * On Linux umem will not return memory back to the OS until umem fails - * to allocate a chunk. On failure, umem_reap() will be called automatically, - * to return memory to the OS. If your code is going to be running - * for a long time on Linux and mixes calls to different memory allocators - * (e.g.: malloc()) and umem, your code will need to call - * umem_reap() periodically. - * - * This doesn't happen on Solaris, because malloc is replaced - * with umem calls, meaning that umem_reap() is called automatically. - * - * \section References - * - * http://docs.sun.com/app/docs/doc/816-5173/6mbb8advq?a=view - * - * http://access1.sun.com/techarticles/libumem.html - * - * \section Overview - * - * \code +/* * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18 * * The slab allocator, as described in the following two papers: @@ -88,6 +55,7 @@ * * * KM_SLEEP v.s. UMEM_NOFAIL * + * * lock ordering * * 2. Initialization * ----------------- @@ -362,41 +330,51 @@ * If a constructor callback _does_ do a UMEM_NOFAIL allocation, and * the nofail callback does a non-local exit, we will leak the * partially-constructed buffer. - * \endcode + * + * + * 6. Lock Ordering + * ---------------- + * umem has a few more locks than kmem does, mostly in the update path. The + * overall lock ordering (earlier locks must be acquired first) is: + * + * umem_init_lock + * + * vmem_list_lock + * vmem_nosleep_lock.vmpl_mutex + * vmem_t's: + * vm_lock + * sbrk_lock + * + * umem_cache_lock + * umem_update_lock + * umem_flags_lock + * umem_cache_t's: + * cache_cpu[*].cc_lock + * cache_depot_lock + * cache_lock + * umem_log_header_t's: + * lh_cpu[*].clh_lock + * lh_lock */ -#include "config.h" -/* #include "mtlib.h" */ #include #include #include "umem_base.h" #include "vmem_base.h" -#if HAVE_SYS_PROCESSOR_H #include -#endif -#if HAVE_SYS_SYSMACROS_H #include -#endif -#if HAVE_ALLOCA_H #include -#endif #include #include #include #include #include -#if HAVE_STRINGS_H #include -#endif #include -#if HAVE_UNISTD_H #include -#endif -#if HAVE_ATOMIC_H #include -#endif #include "misc.h" @@ -413,8 +391,12 @@ size_t pagesize; * bytes, so that it will be 64-byte aligned. For all multiples of 64, * the next kmem_cache_size greater than or equal to it must be a * multiple of 64. + * + * This table must be in sorted order, from smallest to highest. The + * highest slot must be UMEM_MAXBUF, and every slot afterwards must be + * zero. */ -static const int umem_alloc_sizes[] = { +static int umem_alloc_sizes[] = { #ifdef _LP64 1 * 8, 1 * 16, @@ -433,17 +415,19 @@ static const int umem_alloc_sizes[] = { P2ALIGN(8192 / 7, 64), P2ALIGN(8192 / 6, 64), P2ALIGN(8192 / 5, 64), - P2ALIGN(8192 / 4, 64), + P2ALIGN(8192 / 4, 64), 2304, P2ALIGN(8192 / 3, 64), - P2ALIGN(8192 / 2, 64), - P2ALIGN(8192 / 1, 64), + P2ALIGN(8192 / 2, 64), 4544, + P2ALIGN(8192 / 1, 64), 9216, 4096 * 3, - 8192 * 2, + UMEM_MAXBUF, /* = 8192 * 2 */ + /* 24 slots for user expansion */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }; #define NUM_ALLOC_SIZES (sizeof (umem_alloc_sizes) / sizeof (*umem_alloc_sizes)) -#define UMEM_MAXBUF 16384 - static umem_magtype_t umem_magtype[] = { { 1, 8, 3200, 65536 }, { 3, 16, 256, 32768 }, @@ -480,21 +464,21 @@ size_t umem_minfirewall; /* hardware-enforced redzone threshold */ uint_t umem_flags = 0; -mutex_t umem_init_lock = DEFAULTMUTEX; /* locks initialization */ -cond_t umem_init_cv = DEFAULTCV; /* initialization CV */ +mutex_t umem_init_lock; /* locks initialization */ +cond_t umem_init_cv; /* initialization CV */ thread_t umem_init_thr; /* thread initializing */ int umem_init_env_ready; /* environ pre-initted */ int umem_ready = UMEM_READY_STARTUP; static umem_nofail_callback_t *nofail_callback; -static mutex_t umem_nofail_exit_lock = DEFAULTMUTEX; +static mutex_t umem_nofail_exit_lock; static thread_t umem_nofail_exit_thr; static umem_cache_t *umem_slab_cache; static umem_cache_t *umem_bufctl_cache; static umem_cache_t *umem_bufctl_audit_cache; -mutex_t umem_flags_lock = DEFAULTMUTEX; +mutex_t umem_flags_lock; static vmem_t *heap_arena; static vmem_alloc_t *heap_alloc; @@ -517,15 +501,7 @@ umem_log_header_t *umem_content_log; umem_log_header_t *umem_failure_log; umem_log_header_t *umem_slab_log; -extern thread_t _thr_self(void); -#if defined(__MACH__) || defined(__FreeBSD__) -# define CPUHINT() ((int)(_thr_self())) -#endif - -#ifndef CPUHINT -#define CPUHINT() (_thr_self()) -#endif - +#define CPUHINT() (thr_self()) #define CPUHINT_MAX() INT_MAX #define CPU(mask) (umem_cpus + (CPUHINT() & (mask))) @@ -547,12 +523,12 @@ volatile thread_t umem_st_update_thr; /* only used when single-thd */ thr_self() == umem_st_update_thr) #define IN_REAP() IN_UPDATE() -mutex_t umem_update_lock = DEFAULTMUTEX; /* cache_u{next,prev,flags} */ -cond_t umem_update_cv = DEFAULTCV; +mutex_t umem_update_lock; /* cache_u{next,prev,flags} */ +cond_t umem_update_cv; volatile hrtime_t umem_reap_next; /* min hrtime of next reap */ -mutex_t umem_cache_lock = DEFAULTMUTEX; /* inter-cache linkage only */ +mutex_t umem_cache_lock; /* inter-cache linkage only */ #ifdef UMEM_STANDALONE umem_cache_t umem_null_cache; @@ -625,12 +601,6 @@ caddr_t umem_min_stack; caddr_t umem_max_stack; -/* - * we use the _ versions, since we don't want to be cancelled. - * Actually, this is automatically taken care of by including "mtlib.h". - */ -extern int _cond_wait(cond_t *cv, mutex_t *mutex); - #define UMERR_MODIFIED 0 /* buffer modified while on freelist */ #define UMERR_REDZONE 1 /* redzone violation (write past end of buf) */ #define UMERR_DUPFREE 2 /* freed a buffer twice */ @@ -757,6 +727,8 @@ umem_remove_updates(umem_cache_t *cp) * Get it out of the active state */ while (cp->cache_uflags & UMU_ACTIVE) { + int cancel_state; + ASSERT(cp->cache_unext == NULL); cp->cache_uflags |= UMU_NOTIFY; @@ -768,7 +740,10 @@ umem_remove_updates(umem_cache_t *cp) ASSERT(umem_update_thr != thr_self() && umem_st_update_thr != thr_self()); - (void) _cond_wait(&umem_update_cv, &umem_update_lock); + (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, + &cancel_state); + (void) cond_wait(&umem_update_cv, &umem_update_lock); + (void) pthread_setcancelstate(cancel_state, NULL); } /* * Get it out of the Work Requested state @@ -1097,7 +1072,7 @@ umem_log_enter(umem_log_header_t *lhp, void *data, size_t size) { void *logspace; umem_cpu_log_header_t *clhp = - &(lhp->lh_cpu[CPU(umem_cpu_mask)->cpu_number]); + &lhp->lh_cpu[CPU(umem_cpu_mask)->cpu_number]; if (lhp == NULL || umem_logging == 0) return (NULL); @@ -1659,9 +1634,7 @@ umem_cpu_reload(umem_cpu_cache_t *ccp, umem_magazine_t *mp, int rounds) /* * Allocate a constructed object from cache cp. */ -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_cache_alloc = _umem_cache_alloc -#endif void * _umem_cache_alloc(umem_cache_t *cp, int umflag) { @@ -1779,9 +1752,7 @@ retry: /* * Free a constructed object to cache cp. */ -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_cache_free = _umem_cache_free -#endif void _umem_cache_free(umem_cache_t *cp, void *buf) { @@ -1886,9 +1857,7 @@ _umem_cache_free(umem_cache_t *cp, void *buf) umem_slab_free(cp, buf); } -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_zalloc = _umem_zalloc -#endif void * _umem_zalloc(size_t size, int umflag) { @@ -1916,9 +1885,7 @@ retry: return (buf); } -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_alloc = _umem_alloc -#endif void * _umem_alloc(size_t size, int umflag) { @@ -1954,9 +1921,7 @@ umem_alloc_retry: return (buf); } -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_alloc_align = _umem_alloc_align -#endif void * _umem_alloc_align(size_t size, size_t align, int umflag) { @@ -1986,9 +1951,7 @@ umem_alloc_align_retry: return (buf); } -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_free = _umem_free -#endif void _umem_free(void *buf, size_t size) { @@ -2026,9 +1989,7 @@ _umem_free(void *buf, size_t size) } } -#ifndef NO_WEAK_SYMBOLS #pragma weak umem_free_align = _umem_free_align -#endif void _umem_free_align(void *buf, size_t size) { @@ -2382,7 +2343,6 @@ umem_reap(void) (void) mutex_unlock(&umem_update_lock); return; } - umem_reaping = UMEM_REAP_ADDING; /* lock out other reaps */ (void) mutex_unlock(&umem_update_lock); @@ -2770,6 +2730,88 @@ umem_cache_destroy(umem_cache_t *cp) vmem_free(umem_cache_arena, cp, UMEM_CACHE_SIZE(umem_max_ncpus)); } +void +umem_alloc_sizes_clear(void) +{ + int i; + + umem_alloc_sizes[0] = UMEM_MAXBUF; + for (i = 1; i < NUM_ALLOC_SIZES; i++) + umem_alloc_sizes[i] = 0; +} + +void +umem_alloc_sizes_add(size_t size_arg) +{ + int i, j; + size_t size = size_arg; + + if (size == 0) { + log_message("size_add: cannot add zero-sized cache\n", + size, UMEM_MAXBUF); + return; + } + + if (size > UMEM_MAXBUF) { + log_message("size_add: %ld > %d, cannot add\n", size, + UMEM_MAXBUF); + return; + } + + if (umem_alloc_sizes[NUM_ALLOC_SIZES - 1] != 0) { + log_message("size_add: no space in alloc_table for %d\n", + size); + return; + } + + if (P2PHASE(size, UMEM_ALIGN) != 0) { + size = P2ROUNDUP(size, UMEM_ALIGN); + log_message("size_add: rounding %d up to %d\n", size_arg, + size); + } + + for (i = 0; i < NUM_ALLOC_SIZES; i++) { + int cur = umem_alloc_sizes[i]; + if (cur == size) { + log_message("size_add: %ld already in table\n", + size); + return; + } + if (cur > size) + break; + } + + for (j = NUM_ALLOC_SIZES - 1; j > i; j--) + umem_alloc_sizes[j] = umem_alloc_sizes[j-1]; + umem_alloc_sizes[i] = size; +} + +void +umem_alloc_sizes_remove(size_t size) +{ + int i; + + if (size == UMEM_MAXBUF) { + log_message("size_remove: cannot remove %ld\n", size); + return; + } + + for (i = 0; i < NUM_ALLOC_SIZES; i++) { + int cur = umem_alloc_sizes[i]; + if (cur == size) + break; + else if (cur > size || cur == 0) { + log_message("size_remove: %ld not found in table\n", + size); + return; + } + } + + for (; i + 1 < NUM_ALLOC_SIZES; i++) + umem_alloc_sizes[i] = umem_alloc_sizes[i+1]; + umem_alloc_sizes[i] = 0; +} + static int umem_cache_init(void) { @@ -2862,6 +2904,10 @@ umem_cache_init(void) for (i = 0; i < NUM_ALLOC_SIZES; i++) { size_t cache_size = umem_alloc_sizes[i]; size_t align = 0; + + if (cache_size == 0) + break; /* 0 terminates the list */ + /* * If they allocate a multiple of the coherency granularity, * they get a coherency-granularity-aligned address. @@ -2889,6 +2935,9 @@ umem_cache_init(void) for (i = 0; i < NUM_ALLOC_SIZES; i++) { size_t cache_size = umem_alloc_sizes[i]; + if (cache_size == 0) + break; /* 0 terminates the list */ + cp = umem_alloc_caches[i]; while (size <= cache_size) { @@ -2896,6 +2945,7 @@ umem_cache_init(void) size += UMEM_ALIGN; } } + ASSERT(size - UMEM_ALIGN == UMEM_MAXBUF); return (1); } @@ -2903,16 +2953,15 @@ umem_cache_init(void) * umem_startup() is called early on, and must be called explicitly if we're * the standalone version. */ -static void -umem_startup() __attribute__((constructor)); - +#ifdef UMEM_STANDALONE void -umem_startup() +#else +#pragma init(umem_startup) +static void +#endif +umem_startup(caddr_t start, size_t len, size_t pagesize, caddr_t minstack, + caddr_t maxstack) { - caddr_t start = NULL; - size_t len = 0; - size_t pagesize = 0; - #ifdef UMEM_STANDALONE int idx; /* Standalone doesn't fork */ @@ -2995,9 +3044,16 @@ umem_init(void) * someone else beat us to initializing umem. Wait * for them to complete, then return. */ - while (umem_ready == UMEM_READY_INITING) - (void) _cond_wait(&umem_init_cv, + while (umem_ready == UMEM_READY_INITING) { + int cancel_state; + + (void) pthread_setcancelstate( + PTHREAD_CANCEL_DISABLE, &cancel_state); + (void) cond_wait(&umem_init_cv, &umem_init_lock); + (void) pthread_setcancelstate( + cancel_state, NULL); + } ASSERT(umem_ready == UMEM_READY || umem_ready == UMEM_READY_INIT_FAILED); (void) mutex_unlock(&umem_init_lock); @@ -3199,10 +3255,3 @@ fail: (void) mutex_unlock(&umem_init_lock); return (0); } - -size_t -umem_cache_get_bufsize(umem_cache_t *cache) -{ - return cache->cache_bufsize; -} -