X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=lib%2Flibzpool%2Fkernel.c;h=f7aeeb440c0696d27f46f3ebf1fc48c0cafe6509;hb=a1d9543a39942be56879ca9338078afc77c25cea;hp=0559347e96b9d9a80c1477eb861c81cae96bbae0;hpb=5ae4e2c2c660269d87dd5bbac6f590f69d0c7d8c;p=zfs.git diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0559347..f7aeeb4 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -35,6 +35,7 @@ #include #include #include +#include #include /* @@ -140,7 +141,7 @@ zk_thread_helper(void *arg) kthread_t * zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, - size_t len, proc_t *pp, int state, pri_t pri) + size_t len, proc_t *pp, int state, pri_t pri, int detachstate) { kthread_t *kt; pthread_attr_t attr; @@ -158,9 +159,14 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * * We reduce the default stack size in userspace, to ensure * we observe stack overruns in user space as well as in - * kernel space. PTHREAD_STACK_MIN is the minimum stack - * required for a NULL procedure in user space and is added - * in to the stack requirements. + * kernel space. In practice we can't set the userspace stack + * size to 8k because differences in stack usage between kernel + * space and userspace could lead to spurious stack overflows + * (especially when debugging is enabled). Nevertheless, we try + * to set it to the lowest value that works (currently 8k*4). + * PTHREAD_STACK_MIN is the minimum stack required for a NULL + * procedure in user space and is added in to the stack + * requirements. * * Some buggy NPTL threading implementations include the * guard area within the stack size allocations. In @@ -169,12 +175,12 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * on Linux. */ - stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) + - EXTRA_GUARD_BYTES; + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) * 4; VERIFY3S(pthread_attr_init(&attr), ==, 0); VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); + VERIFY3S(pthread_attr_setdetachstate(&attr, detachstate), ==, 0); VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), ==, 0); @@ -516,7 +522,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) { int fd; vnode_t *vp; - int old_umask; + int old_umask = 0; char *realpath; struct stat64 st; int err; @@ -533,7 +539,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * for its size. So -- gag -- we open the block device to get * its size, and remember it for subsequent VOP_GETATTR(). */ +#if defined(__sun__) || defined(__sun) if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif char *dsk; fd = open64(path, O_RDONLY); if (fd == -1) { @@ -562,6 +572,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) } } + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { +#ifdef __linux__ + flags |= O_DIRECT; +#endif + /* We shouldn't be writing to block devices in userspace */ + VERIFY(!(flags & FWRITE)); + } + if (flags & FCREAT) old_umask = umask(0); @@ -578,7 +596,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) if (fd == -1) return (errno); - if (fstat64(fd, &st) == -1) { + if (fstat64_blk(fd, &st) == -1) { err = errno; close(fd); return (err); @@ -628,7 +646,9 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, * To simulate partial disk writes, we split writes into two * system calls so that the process can be killed in between. */ - split = (len > 0 ? rand() % len : 0); + int sectors = len >> SPA_MINBLOCKSHIFT; + split = (sectors > 0 ? rand() % sectors : 0) << + SPA_MINBLOCKSHIFT; rc = pwrite64(vp->v_fd, addr, split, offset); if (rc != -1) { done = rc; @@ -637,6 +657,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, } } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + abort(); + } +#endif if (rc == -1) return (errno); @@ -665,18 +695,18 @@ int fop_getattr(vnode_t *vp, vattr_t *vap) { struct stat64 st; + int err; - if (fstat64(vp->v_fd, &st) == -1) { + if (fstat64_blk(vp->v_fd, &st) == -1) { + err = errno; close(vp->v_fd); - return (errno); + return (err); } vap->va_size = st.st_size; return (0); } -#ifdef ZFS_DEBUG - /* * ========================================================================= * Figure out which debugging statements to print @@ -789,8 +819,6 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) } } -#endif /* ZFS_DEBUG */ - /* * ========================================================================= * cmn_err() and panic() @@ -1107,25 +1135,27 @@ ksiddomain_rele(ksiddomain_t *ksid) umem_free(ksid, sizeof (ksiddomain_t)); } -/* - * Do not change the length of the returned string; it must be freed - * with strfree(). - */ char * -kmem_asprintf(const char *fmt, ...) +kmem_vasprintf(const char *fmt, va_list adx) { - int size; - va_list adx; - char *buf; + char *buf = NULL; + va_list adx_copy; - va_start(adx, fmt); - size = vsnprintf(NULL, 0, fmt, adx) + 1; - va_end(adx); + va_copy(adx_copy, adx); + VERIFY(vasprintf(&buf, fmt, adx_copy) != -1); + va_end(adx_copy); - buf = kmem_alloc(size, KM_SLEEP); + return (buf); +} + +char * +kmem_asprintf(const char *fmt, ...) +{ + char *buf = NULL; + va_list adx; va_start(adx, fmt); - size = vsnprintf(buf, size, fmt, adx); + VERIFY(vasprintf(&buf, fmt, adx) != -1); va_end(adx); return (buf);