X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzpl_xattr.c;h=d79d35bce75b85ca76adcc5f931ad3d284de9d8f;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=cf52e720d96468050c5ec957919f739c960e3d8c;hpb=2cf7f52bc42f215d4ef27d0fd75fc1b1417cb841;p=zfs.git diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index cf52e72..d79d35b 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -29,43 +29,58 @@ * as practically no size limit on the file, and the extended * attributes permissions may differ from those of the parent file. * This interface is really quite clever, but it's also completely - * different than what is supported on Linux. + * different than what is supported on Linux. It also comes with a + * steep performance penalty when accessing small xattrs because they + * are not stored with the parent file. * * Under Linux extended attributes are manipulated by the system * calls getxattr(2), setxattr(2), and listxattr(2). They consider * extended attributes to be name/value pairs where the name is a * NULL terminated string. The name must also include one of the - * following name space prefixes: + * following namespace prefixes: * * user - No restrictions and is available to user applications. * trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use. * system - Used for access control lists (system.nfs4_acl, etc). * security - Used by SELinux to store a files security context. * - * This Linux interface is implemented internally using the more - * flexible Solaris style extended attributes. Every extended - * attribute is store as a file in a hidden directory associated - * with the parent file. This ensures on disk compatibility with - * zfs implementations on other platforms (Solaris, FreeBSD, MacOS). + * The value under Linux to limited to 65536 bytes of binary data. + * In practice, individual xattrs tend to be much smaller than this + * and are typically less than 100 bytes. A good example of this + * are the security.selinux xattrs which are less than 100 bytes and + * exist for every file when xattr labeling is enabled. * - * One consequence of this implementation is that when an extended - * attribute is manipulated an inode is created. This inode will - * exist in the Linux inode cache but there will be no associated - * entry in the dentry cache which references it. This is safe - * but it may result in some confusion. + * The Linux xattr implemenation has been written to take advantage of + * this typical usage. When the dataset property 'xattr=sa' is set, + * then xattrs will be preferentially stored as System Attributes (SA). + * This allows tiny xattrs (~100 bytes) to be stored with the dnode and + * up to 64k of xattrs to be stored in the spill block. If additional + * xattr space is required, which is unlikely under Linux, they will + * be stored using the traditional directory approach. * - * Longer term I would like to see the 'security.selinux' extended - * attribute moved to a SA. This should significantly improve - * performance on a SELinux enabled system by minimizing the - * number of seeks required to access a file. However, for now - * this xattr is still stored in a file because I'm pretty sure - * adding a new SA will break on-disk compatibility. + * This optimization results in roughly a 3x performance improvement + * when accessing xattrs because it avoids the need to perform a seek + * for every xattr value. When multiple xattrs are stored per-file + * the performance improvements are even greater because all of the + * xattrs stored in the spill block will be cached. + * + * However, by default SA based xattrs are disabled in the Linux port + * to maximize compatibility with other implementations. If you do + * enable SA based xattrs then they will not be visible on platforms + * which do not support this feature. + * + * NOTE: One additional consequence of the xattr directory implementation + * is that when an extended attribute is manipulated an inode is created. + * This inode will exist in the Linux inode cache but there will be no + * associated entry in the dentry cache which references it. This is + * safe but it may result in some confusion. Enabling SA based xattrs + * largely avoids the issue except in the overflow case. */ - #include #include #include +#include #include #include @@ -77,11 +92,8 @@ typedef struct xattr_filldir { } xattr_filldir_t; static int -zpl_xattr_filldir(void *arg, const char *name, int name_len, - loff_t offset, uint64_t objnum, unsigned int d_type) +zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len) { - xattr_filldir_t *xf = arg; - if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR)) return (0); @@ -104,17 +116,47 @@ zpl_xattr_filldir(void *arg, const char *name, int name_len, return (0); } -ssize_t -zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) +/* + * Read as many directory entry names as will fit in to the provided buffer, + * or when no buffer is provided calculate the required buffer size. + */ +int +zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf) { - struct inode *ip = dentry->d_inode; - struct inode *dxip = NULL; - loff_t pos = 3; /* skip '.', '..', and '.zfs' entries. */ - cred_t *cr = CRED(); + zap_cursor_t zc; + zap_attribute_t zap; int error; - xattr_filldir_t xf = { buffer_size, 0, buffer, ip }; - crhold(cr); + zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id); + + while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) { + + if (zap.za_integer_length != 8 || zap.za_num_integers != 1) { + error = -ENXIO; + break; + } + + error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name)); + if (error) + break; + + zap_cursor_advance(&zc); + } + + zap_cursor_fini(&zc); + + if (error == -ENOENT) + error = 0; + + return (error); +} + +static ssize_t +zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr) +{ + struct inode *ip = xf->inode; + struct inode *dxip = NULL; + int error; /* Lookup the xattr directory */ error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL); @@ -122,34 +164,83 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (error == -ENOENT) error = 0; - goto out; + return (error); } - /* Fill provided buffer via zpl_zattr_filldir helper */ - error = -zfs_readdir(dxip, (void *)&xf, zpl_xattr_filldir, &pos, cr); + error = zpl_xattr_readdir(dxip, xf); + iput(dxip); + + return (error); +} + +static ssize_t +zpl_xattr_list_sa(xattr_filldir_t *xf) +{ + znode_t *zp = ITOZ(xf->inode); + nvpair_t *nvp = NULL; + int error = 0; + + mutex_enter(&zp->z_lock); + if (zp->z_xattr_cached == NULL) + error = -zfs_sa_get_xattr(zp); + mutex_exit(&zp->z_lock); + + if (error) + return (error); + + ASSERT(zp->z_xattr_cached); + + while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { + ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); + + error = zpl_xattr_filldir(xf, nvpair_name(nvp), + strlen(nvpair_name(nvp))); + if (error) + return (error); + } + + return (0); +} + +ssize_t +zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + znode_t *zp = ITOZ(dentry->d_inode); + zfs_sb_t *zsb = ZTOZSB(zp); + xattr_filldir_t xf = { buffer_size, 0, buffer, dentry->d_inode }; + cred_t *cr = CRED(); + int error = 0; + + crhold(cr); + rw_enter(&zp->z_xattr_lock, RW_READER); + + if (zsb->z_use_sa && zp->z_is_sa) { + error = zpl_xattr_list_sa(&xf); + if (error) + goto out; + } + + error = zpl_xattr_list_dir(&xf, cr); if (error) goto out; error = xf.offset; out: - if (dxip) - iput(dxip); + rw_exit(&zp->z_xattr_lock); crfree(cr); return (error); } static int -zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size) +zpl_xattr_get_dir(struct inode *ip, const char *name, void *value, + size_t size, cred_t *cr) { struct inode *dxip = NULL; struct inode *xip = NULL; - cred_t *cr = CRED(); int error; - crhold(cr); - /* Lookup the xattr directory */ error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL); if (error) @@ -165,7 +256,12 @@ zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size) goto out; } - error = zpl_read_common(xip, buf, size, 0, UIO_SYSSPACE, 0, cr); + if (size < i_size_read(xip)) { + error = -ERANGE; + goto out; + } + + error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr); out: if (xip) iput(xip); @@ -173,8 +269,62 @@ out: if (dxip) iput(dxip); - crfree(cr); + return (error); +} + +static int +zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size) +{ + znode_t *zp = ITOZ(ip); + uchar_t *nv_value; + uint_t nv_size; + int error = 0; + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + + mutex_enter(&zp->z_lock); + if (zp->z_xattr_cached == NULL) + error = -zfs_sa_get_xattr(zp); + mutex_exit(&zp->z_lock); + if (error) + return (error); + + ASSERT(zp->z_xattr_cached); + error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name, + &nv_value, &nv_size); + if (error) + return (error); + + if (!size) + return (nv_size); + + if (size < nv_size) + return (-ERANGE); + + memcpy(value, nv_value, nv_size); + + return (nv_size); +} + +static int +__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size, + cred_t *cr) +{ + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ZTOZSB(zp); + int error; + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + + if (zsb->z_use_sa && zp->z_is_sa) { + error = zpl_xattr_get_sa(ip, name, value, size); + if (error != -ENOENT) + goto out; + } + + error = zpl_xattr_get_dir(ip, name, value, size, cr); +out: if (error == -ENOENT) error = -ENODATA; @@ -182,42 +332,43 @@ out: } static int -zpl_xattr_set(struct inode *ip, const char *name, const void *value, - size_t size, int flags) +zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size) +{ + znode_t *zp = ITOZ(ip); + cred_t *cr = CRED(); + int error; + + crhold(cr); + rw_enter(&zp->z_xattr_lock, RW_READER); + error = __zpl_xattr_get(ip, name, value, size, cr); + rw_exit(&zp->z_xattr_lock); + crfree(cr); + + return (error); +} + +static int +zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value, + size_t size, int flags, cred_t *cr) { struct inode *dxip = NULL; struct inode *xip = NULL; vattr_t *vap = NULL; - cred_t *cr = CRED(); ssize_t wrote; int error; const int xattr_mode = S_IFREG | 0644; - crhold(cr); - /* Lookup the xattr directory and create it if required. */ error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR, cr, NULL, NULL); if (error) goto out; - /* - * Lookup a specific xattr name in the directory, two failure modes: - * XATTR_CREATE: fail if xattr already exists - * XATTR_REMOVE: fail if xattr does not exist - */ + /* Lookup a specific xattr name in the directory */ error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL); - if (error) { - if (error != -ENOENT) - goto out; + if (error && (error != -ENOENT)) + goto out; - if ((error == -ENOENT) && (flags & XATTR_REPLACE)) - goto out; - } else { - error = -EEXIST; - if (flags & XATTR_CREATE) - goto out; - } error = 0; /* Remove a specific name xattr when value is set to NULL. */ @@ -262,7 +413,6 @@ out: if (dxip) iput(dxip); - crfree(cr); if (error == -ENOENT) error = -ENODATA; @@ -272,8 +422,104 @@ out: } static int +zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value, + size_t size, int flags, cred_t *cr) +{ + znode_t *zp = ITOZ(ip); + nvlist_t *nvl; + size_t sa_size; + int error; + + ASSERT(zp->z_xattr_cached); + nvl = zp->z_xattr_cached; + + if (value == NULL) { + error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + if (error == -ENOENT) + error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr); + } else { + /* Do not allow SA xattrs in symlinks (issue #1648) */ + if (S_ISLNK(ip->i_mode)) + return (-EMLINK); + + /* Limited to 32k to keep nvpair memory allocations small */ + if (size > DXATTR_MAX_ENTRY_SIZE) + return (-EFBIG); + + /* Prevent the DXATTR SA from consuming the entire SA region */ + error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); + if (error) + return (error); + + if (sa_size > DXATTR_MAX_SA_SIZE) + return (-EFBIG); + + error = -nvlist_add_byte_array(nvl, name, + (uchar_t *)value, size); + if (error) + return (error); + } + + /* Update the SA for additions, modifications, and removals. */ + if (!error) + error = -zfs_sa_set_xattr(zp); + + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_xattr_set(struct inode *ip, const char *name, const void *value, + size_t size, int flags) +{ + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ZTOZSB(zp); + cred_t *cr = CRED(); + int error; + + crhold(cr); + rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER); + + /* + * Before setting the xattr check to see if it already exists. + * This is done to ensure the following optional flags are honored. + * + * XATTR_CREATE: fail if xattr already exists + * XATTR_REPLACE: fail if xattr does not exist + */ + error = __zpl_xattr_get(ip, name, NULL, 0, cr); + if (error < 0) { + if (error != -ENODATA) + goto out; + + if ((error == -ENODATA) && (flags & XATTR_REPLACE)) + goto out; + } else { + error = -EEXIST; + if (flags & XATTR_CREATE) + goto out; + } + + /* Preferentially store the xattr as a SA for better performance */ + if (zsb->z_use_sa && zsb->z_xattr_sa && zp->z_is_sa) { + error = zpl_xattr_set_sa(ip, name, value, size, flags, cr); + if (error == 0) + goto out; + } + + error = zpl_xattr_set_dir(ip, name, value, size, flags, cr); +out: + rw_exit(&ITOZ(ip)->z_xattr_lock); + crfree(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int __zpl_xattr_user_get(struct inode *ip, const char *name, - void *buffer, size_t size) + void *value, size_t size) { char *xattr_name; int error; @@ -285,7 +531,7 @@ __zpl_xattr_user_get(struct inode *ip, const char *name, return -EOPNOTSUPP; xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name); - error = zpl_xattr_get(ip, xattr_name, buffer, size); + error = zpl_xattr_get(ip, xattr_name, value, size); strfree(xattr_name); return (error); @@ -321,7 +567,7 @@ xattr_handler_t zpl_xattr_user_handler = { static int __zpl_xattr_trusted_get(struct inode *ip, const char *name, - void *buffer, size_t size) + void *value, size_t size) { char *xattr_name; int error; @@ -333,7 +579,7 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name, return -EINVAL; xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); - error = zpl_xattr_get(ip, xattr_name, buffer, size); + error = zpl_xattr_get(ip, xattr_name, value, size); strfree(xattr_name); return (error); @@ -369,7 +615,7 @@ xattr_handler_t zpl_xattr_trusted_handler = { static int __zpl_xattr_security_get(struct inode *ip, const char *name, - void *buffer, size_t size) + void *value, size_t size) { char *xattr_name; int error; @@ -378,7 +624,7 @@ __zpl_xattr_security_get(struct inode *ip, const char *name, return -EINVAL; xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); - error = zpl_xattr_get(ip, xattr_name, buffer, size); + error = zpl_xattr_get(ip, xattr_name, value, size); strfree(xattr_name); return (error); @@ -403,31 +649,59 @@ __zpl_xattr_security_set(struct inode *ip, const char *name, } ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set); +#ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY +static int +__zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs, + void *fs_info) +{ + const struct xattr *xattr; + int error = 0; + + for (xattr = xattrs; xattr->name != NULL; xattr++) { + error = __zpl_xattr_security_set(ip, + xattr->name, xattr->value, xattr->value_len, 0); + + if (error < 0) + break; + } + + return (error); +} + int zpl_xattr_security_init(struct inode *ip, struct inode *dip, const struct qstr *qstr) { - int error; - size_t len; - void *value; - char *name; + return security_inode_init_security(ip, dip, qstr, + &__zpl_xattr_security_init, NULL); +} - error = zpl_security_inode_init_security(ip, dip, qstr, - &name, &value, &len); - if (error) { - if (error == -EOPNOTSUPP) - return 0; +#else +int +zpl_xattr_security_init(struct inode *ip, struct inode *dip, + const struct qstr *qstr) +{ + int error; + size_t len; + void *value; + char *name; - return (error); - } + error = zpl_security_inode_init_security(ip, dip, qstr, + &name, &value, &len); + if (error) { + if (error == -EOPNOTSUPP) + return 0; + return (error); + } error = __zpl_xattr_security_set(ip, name, value, len, 0); - kfree(name); - kfree(value); + kfree(name); + kfree(value); - return (error); + return (error); } +#endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */ xattr_handler_t zpl_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, @@ -441,6 +715,7 @@ xattr_handler_t *zpl_xattr_handlers[] = { &zpl_xattr_user_handler, #ifdef HAVE_POSIX_ACLS &zpl_xattr_acl_access_handler, - &zpl_xattr_acl_default_handler, + &zpl_xattr_acl_default_handler, #endif /* HAVE_POSIX_ACLS */ + NULL };