X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=lib%2Flibzpool%2Fkernel.c;h=f7aeeb440c0696d27f46f3ebf1fc48c0cafe6509;hb=bbb75c11908d1009b6749b797b3a763558bbaaaf;hp=494e544ea7f8ba8746f0909f2de0921e1b0a1e4f;hpb=00b46022c676e402e3f33ce93ee2983bbad2c46f;p=zfs.git diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 494e544..f7aeeb4 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -35,6 +35,7 @@ #include #include #include +#include #include /* @@ -140,7 +141,7 @@ zk_thread_helper(void *arg) kthread_t * zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, - size_t len, proc_t *pp, int state, pri_t pri) + size_t len, proc_t *pp, int state, pri_t pri, int detachstate) { kthread_t *kt; pthread_attr_t attr; @@ -158,9 +159,14 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * * We reduce the default stack size in userspace, to ensure * we observe stack overruns in user space as well as in - * kernel space. PTHREAD_STACK_MIN is the minimum stack - * required for a NULL procedure in user space and is added - * in to the stack requirements. + * kernel space. In practice we can't set the userspace stack + * size to 8k because differences in stack usage between kernel + * space and userspace could lead to spurious stack overflows + * (especially when debugging is enabled). Nevertheless, we try + * to set it to the lowest value that works (currently 8k*4). + * PTHREAD_STACK_MIN is the minimum stack required for a NULL + * procedure in user space and is added in to the stack + * requirements. * * Some buggy NPTL threading implementations include the * guard area within the stack size allocations. In @@ -169,12 +175,12 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * on Linux. */ - stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) + - EXTRA_GUARD_BYTES; + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) * 4; VERIFY3S(pthread_attr_init(&attr), ==, 0); VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); + VERIFY3S(pthread_attr_setdetachstate(&attr, detachstate), ==, 0); VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), ==, 0); @@ -516,7 +522,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) { int fd; vnode_t *vp; - int old_umask; + int old_umask = 0; char *realpath; struct stat64 st; int err; @@ -533,7 +539,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * for its size. So -- gag -- we open the block device to get * its size, and remember it for subsequent VOP_GETATTR(). */ +#if defined(__sun__) || defined(__sun) if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif char *dsk; fd = open64(path, O_RDONLY); if (fd == -1) { @@ -562,6 +572,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) } } + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { +#ifdef __linux__ + flags |= O_DIRECT; +#endif + /* We shouldn't be writing to block devices in userspace */ + VERIFY(!(flags & FWRITE)); + } + if (flags & FCREAT) old_umask = umask(0); @@ -578,7 +596,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) if (fd == -1) return (errno); - if (fstat64(fd, &st) == -1) { + if (fstat64_blk(fd, &st) == -1) { err = errno; close(fd); return (err); @@ -628,7 +646,9 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, * To simulate partial disk writes, we split writes into two * system calls so that the process can be killed in between. */ - split = (len > 0 ? rand() % len : 0); + int sectors = len >> SPA_MINBLOCKSHIFT; + split = (sectors > 0 ? rand() % sectors : 0) << + SPA_MINBLOCKSHIFT; rc = pwrite64(vp->v_fd, addr, split, offset); if (rc != -1) { done = rc; @@ -637,6 +657,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, } } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + abort(); + } +#endif if (rc == -1) return (errno); @@ -665,18 +695,18 @@ int fop_getattr(vnode_t *vp, vattr_t *vap) { struct stat64 st; + int err; - if (fstat64(vp->v_fd, &st) == -1) { + if (fstat64_blk(vp->v_fd, &st) == -1) { + err = errno; close(vp->v_fd); - return (errno); + return (err); } vap->va_size = st.st_size; return (0); } -#ifdef ZFS_DEBUG - /* * ========================================================================= * Figure out which debugging statements to print @@ -789,8 +819,6 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) } } -#endif /* ZFS_DEBUG */ - /* * ========================================================================= * cmn_err() and panic()