X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=lib%2Flibzpool%2Fkernel.c;h=c38efd0aad661b71bdbbd70b63d456cb1b954f69;hb=645fb9cc214c79c77378dd1e1fd2e3ef668bc848;hp=4bd08cdd450e0fe193db076cae19d0c1864b1a3f;hpb=1e33ac1e2677c898a0b5ef6207048c692cb51bf4;p=zfs.git diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 4bd08cd..c38efd0 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -35,6 +35,7 @@ #include #include #include +#include #include /* @@ -140,7 +141,7 @@ zk_thread_helper(void *arg) kthread_t * zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, - size_t len, proc_t *pp, int state, pri_t pri) + size_t len, proc_t *pp, int state, pri_t pri, int detachstate) { kthread_t *kt; pthread_attr_t attr; @@ -158,9 +159,14 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * * We reduce the default stack size in userspace, to ensure * we observe stack overruns in user space as well as in - * kernel space. PTHREAD_STACK_MIN is the minimum stack - * required for a NULL procedure in user space and is added - * in to the stack requirements. + * kernel space. In practice we can't set the userspace stack + * size to 8k because differences in stack usage between kernel + * space and userspace could lead to spurious stack overflows + * (especially when debugging is enabled). Nevertheless, we try + * to set it to the lowest value that works (currently 8k*4). + * PTHREAD_STACK_MIN is the minimum stack required for a NULL + * procedure in user space and is added in to the stack + * requirements. * * Some buggy NPTL threading implementations include the * guard area within the stack size allocations. In @@ -169,12 +175,13 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, * on Linux. */ - stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) + + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) * 4 + EXTRA_GUARD_BYTES; VERIFY3S(pthread_attr_init(&attr), ==, 0); VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); + VERIFY3S(pthread_attr_setdetachstate(&attr, detachstate), ==, 0); VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), ==, 0); @@ -517,10 +524,12 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) int fd; vnode_t *vp; int old_umask; - char realpath[MAXPATHLEN]; + char *realpath; struct stat64 st; int err; + realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); + /* * If we're accessing a real disk from userland, we need to use * the character interface to avoid caching. This is particularly @@ -531,14 +540,23 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * for its size. So -- gag -- we open the block device to get * its size, and remember it for subsequent VOP_GETATTR(). */ +#if defined(__sun__) || defined(__sun) if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif char *dsk; fd = open64(path, O_RDONLY); - if (fd == -1) - return (errno); + if (fd == -1) { + err = errno; + free(realpath); + return (err); + } if (fstat64(fd, &st) == -1) { + err = errno; close(fd); - return (errno); + free(realpath); + return (err); } close(fd); (void) sprintf(realpath, "%s", path); @@ -548,8 +566,19 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) dsk + 1); } else { (void) sprintf(realpath, "%s", path); - if (!(flags & FCREAT) && stat64(realpath, &st) == -1) - return (errno); + if (!(flags & FCREAT) && stat64(realpath, &st) == -1) { + err = errno; + free(realpath); + return (err); + } + } + + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { +#ifdef __linux__ + flags |= O_DIRECT; +#endif + /* We shouldn't be writing to block devices in userspace */ + VERIFY(!(flags & FWRITE)); } if (flags & FCREAT) @@ -560,6 +589,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. */ fd = open64(realpath, flags - FREAD, mode); + free(realpath); if (flags & FCREAT) (void) umask(old_umask); @@ -567,7 +597,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) if (fd == -1) return (errno); - if (fstat64(fd, &st) == -1) { + if (fstat64_blk(fd, &st) == -1) { err = errno; close(fd); return (err); @@ -626,6 +656,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, } } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + abort(); + } +#endif if (rc == -1) return (errno); @@ -654,18 +694,18 @@ int fop_getattr(vnode_t *vp, vattr_t *vap) { struct stat64 st; + int err; - if (fstat64(vp->v_fd, &st) == -1) { + if (fstat64_blk(vp->v_fd, &st) == -1) { + err = errno; close(vp->v_fd); - return (errno); + return (err); } vap->va_size = st.st_size; return (0); } -#ifdef ZFS_DEBUG - /* * ========================================================================= * Figure out which debugging statements to print @@ -778,8 +818,6 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) } } -#endif /* ZFS_DEBUG */ - /* * ========================================================================= * cmn_err() and panic() @@ -1096,25 +1134,27 @@ ksiddomain_rele(ksiddomain_t *ksid) umem_free(ksid, sizeof (ksiddomain_t)); } -/* - * Do not change the length of the returned string; it must be freed - * with strfree(). - */ char * -kmem_asprintf(const char *fmt, ...) +kmem_vasprintf(const char *fmt, va_list adx) { - int size; - va_list adx; - char *buf; + char *buf = NULL; + va_list adx_copy; - va_start(adx, fmt); - size = vsnprintf(NULL, 0, fmt, adx) + 1; - va_end(adx); + va_copy(adx_copy, adx); + VERIFY(vasprintf(&buf, fmt, adx_copy) != -1); + va_end(adx_copy); - buf = kmem_alloc(size, KM_SLEEP); + return (buf); +} + +char * +kmem_asprintf(const char *fmt, ...) +{ + char *buf = NULL; + va_list adx; va_start(adx, fmt); - size = vsnprintf(buf, size, fmt, adx); + VERIFY(vasprintf(&buf, fmt, adx) != -1); va_end(adx); return (buf);