4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
24 * Extended attributes (xattr) on Solaris are implemented as files
25 * which exist in a hidden xattr directory. These extended attributes
26 * can be accessed using the attropen() system call which opens
27 * the extended attribute. It can then be manipulated just like
28 * a standard file descriptor. This has a couple advantages such
29 * as practically no size limit on the file, and the extended
30 * attributes permissions may differ from those of the parent file.
31 * This interface is really quite clever, but it's also completely
32 * different than what is supported on Linux. It also comes with a
33 * steep performance penalty when accessing small xattrs because they
34 * are not stored with the parent file.
36 * Under Linux extended attributes are manipulated by the system
37 * calls getxattr(2), setxattr(2), and listxattr(2). They consider
38 * extended attributes to be name/value pairs where the name is a
39 * NULL terminated string. The name must also include one of the
40 * following namespace prefixes:
42 * user - No restrictions and is available to user applications.
43 * trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44 * system - Used for access control lists (system.nfs4_acl, etc).
45 * security - Used by SELinux to store a files security context.
47 * The value under Linux to limited to 65536 bytes of binary data.
48 * In practice, individual xattrs tend to be much smaller than this
49 * and are typically less than 100 bytes. A good example of this
50 * are the security.selinux xattrs which are less than 100 bytes and
51 * exist for every file when xattr labeling is enabled.
53 * The Linux xattr implemenation has been written to take advantage of
54 * this typical usage. When the dataset property 'xattr=sa' is set,
55 * then xattrs will be preferentially stored as System Attributes (SA).
56 * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57 * up to 64k of xattrs to be stored in the spill block. If additional
58 * xattr space is required, which is unlikely under Linux, they will
59 * be stored using the traditional directory approach.
61 * This optimization results in roughly a 3x performance improvement
62 * when accessing xattrs because it avoids the need to perform a seek
63 * for every xattr value. When multiple xattrs are stored per-file
64 * the performance improvements are even greater because all of the
65 * xattrs stored in the spill block will be cached.
67 * However, by default SA based xattrs are disabled in the Linux port
68 * to maximize compatibility with other implementations. If you do
69 * enable SA based xattrs then they will not be visible on platforms
70 * which do not support this feature.
72 * NOTE: One additional consequence of the xattr directory implementation
73 * is that when an extended attribute is manipulated an inode is created.
74 * This inode will exist in the Linux inode cache but there will be no
75 * associated entry in the dentry cache which references it. This is
76 * safe but it may result in some confusion. Enabling SA based xattrs
77 * largely avoids the issue except in the overflow case.
80 #include <sys/zfs_vfsops.h>
81 #include <sys/zfs_vnops.h>
82 #include <sys/zfs_znode.h>
86 typedef struct xattr_filldir {
94 zpl_xattr_filldir(void *arg, const char *name, int name_len,
95 loff_t offset, uint64_t objnum, unsigned int d_type)
97 xattr_filldir_t *xf = arg;
99 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
100 if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR))
103 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
104 if (!capable(CAP_SYS_ADMIN))
107 /* When xf->buf is NULL only calculate the required size. */
109 if (xf->offset + name_len + 1 > xf->size)
112 memcpy(xf->buf + xf->offset, name, name_len);
113 xf->buf[xf->offset + name_len] = '\0';
116 xf->offset += (name_len + 1);
122 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
124 struct inode *ip = xf->inode;
125 struct inode *dxip = NULL;
126 loff_t pos = 3; /* skip '.', '..', and '.zfs' entries. */
129 /* Lookup the xattr directory */
130 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
132 if (error == -ENOENT)
138 /* Fill provided buffer via zpl_zattr_filldir helper */
139 error = -zfs_readdir(dxip, (void *)xf, zpl_xattr_filldir, &pos, cr);
146 zpl_xattr_list_sa(xattr_filldir_t *xf)
148 znode_t *zp = ITOZ(xf->inode);
149 nvpair_t *nvp = NULL;
152 mutex_enter(&zp->z_lock);
153 if (zp->z_xattr_cached == NULL)
154 error = -zfs_sa_get_xattr(zp);
155 mutex_exit(&zp->z_lock);
160 ASSERT(zp->z_xattr_cached);
162 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
163 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
165 error = zpl_xattr_filldir((void *)xf, nvpair_name(nvp),
166 strlen(nvpair_name(nvp)), 0, 0, 0);
175 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
177 znode_t *zp = ITOZ(dentry->d_inode);
178 zfs_sb_t *zsb = ZTOZSB(zp);
179 xattr_filldir_t xf = { buffer_size, 0, buffer, dentry->d_inode };
184 rw_enter(&zp->z_xattr_lock, RW_READER);
186 if (zsb->z_use_sa && zp->z_is_sa) {
187 error = zpl_xattr_list_sa(&xf);
192 error = zpl_xattr_list_dir(&xf, cr);
199 rw_exit(&zp->z_xattr_lock);
206 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
207 size_t size, cred_t *cr)
209 struct inode *dxip = NULL;
210 struct inode *xip = NULL;
213 /* Lookup the xattr directory */
214 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
218 /* Lookup a specific xattr name in the directory */
219 error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
224 error = i_size_read(xip);
228 error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
240 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
242 znode_t *zp = ITOZ(ip);
247 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
249 mutex_enter(&zp->z_lock);
250 if (zp->z_xattr_cached == NULL)
251 error = -zfs_sa_get_xattr(zp);
252 mutex_exit(&zp->z_lock);
257 ASSERT(zp->z_xattr_cached);
258 error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
259 &nv_value, &nv_size);
266 memcpy(value, nv_value, MIN(size, nv_size));
268 return (MIN(size, nv_size));
272 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
275 znode_t *zp = ITOZ(ip);
276 zfs_sb_t *zsb = ZTOZSB(zp);
279 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
281 if (zsb->z_use_sa && zp->z_is_sa) {
282 error = zpl_xattr_get_sa(ip, name, value, size);
287 error = zpl_xattr_get_dir(ip, name, value, size, cr);
289 if (error == -ENOENT)
296 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
298 znode_t *zp = ITOZ(ip);
303 rw_enter(&zp->z_xattr_lock, RW_READER);
304 error = __zpl_xattr_get(ip, name, value, size, cr);
305 rw_exit(&zp->z_xattr_lock);
312 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
313 size_t size, int flags, cred_t *cr)
315 struct inode *dxip = NULL;
316 struct inode *xip = NULL;
320 const int xattr_mode = S_IFREG | 0644;
322 /* Lookup the xattr directory and create it if required. */
323 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR,
328 /* Lookup a specific xattr name in the directory */
329 error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
330 if (error && (error != -ENOENT))
335 /* Remove a specific name xattr when value is set to NULL. */
338 error = -zfs_remove(dxip, (char *)name, cr);
343 /* Lookup failed create a new xattr. */
345 vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
346 vap->va_mode = xattr_mode;
347 vap->va_mask = ATTR_MODE;
348 vap->va_uid = crgetfsuid(cr);
349 vap->va_gid = crgetfsgid(cr);
351 error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
359 error = -zfs_freesp(ITOZ(xip), 0, 0, xattr_mode, TRUE);
363 wrote = zpl_write_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
369 kmem_free(vap, sizeof(vattr_t));
377 if (error == -ENOENT)
380 ASSERT3S(error, <=, 0);
386 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
387 size_t size, int flags, cred_t *cr)
389 znode_t *zp = ITOZ(ip);
394 ASSERT(zp->z_xattr_cached);
395 nvl = zp->z_xattr_cached;
398 error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
399 if (error == -ENOENT)
400 error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
402 /* Limited to 32k to keep nvpair memory allocations small */
403 if (size > DXATTR_MAX_ENTRY_SIZE)
406 /* Prevent the DXATTR SA from consuming the entire SA region */
407 error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
411 if (sa_size > DXATTR_MAX_SA_SIZE)
414 error = -nvlist_add_byte_array(nvl, name,
415 (uchar_t *)value, size);
420 /* Update the SA for additions, modifications, and removals. */
422 error = -zfs_sa_set_xattr(zp);
424 ASSERT3S(error, <=, 0);
430 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
431 size_t size, int flags)
433 znode_t *zp = ITOZ(ip);
434 zfs_sb_t *zsb = ZTOZSB(zp);
439 rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
442 * Before setting the xattr check to see if it already exists.
443 * This is done to ensure the following optional flags are honored.
445 * XATTR_CREATE: fail if xattr already exists
446 * XATTR_REPLACE: fail if xattr does not exist
448 error = __zpl_xattr_get(ip, name, NULL, 0, cr);
450 if (error != -ENODATA)
453 if ((error == -ENODATA) && (flags & XATTR_REPLACE))
457 if (flags & XATTR_CREATE)
461 /* Preferentially store the xattr as a SA for better performance */
462 if (zsb->z_use_sa && zsb->z_xattr_sa && zp->z_is_sa) {
463 error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
468 error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
470 rw_exit(&ITOZ(ip)->z_xattr_lock);
472 ASSERT3S(error, <=, 0);
478 __zpl_xattr_user_get(struct inode *ip, const char *name,
479 void *value, size_t size)
484 if (strcmp(name, "") == 0)
487 if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
490 xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
491 error = zpl_xattr_get(ip, xattr_name, value, size);
496 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
499 __zpl_xattr_user_set(struct inode *ip, const char *name,
500 const void *value, size_t size, int flags)
505 if (strcmp(name, "") == 0)
508 if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
511 xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
512 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
517 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
519 xattr_handler_t zpl_xattr_user_handler = {
520 .prefix = XATTR_USER_PREFIX,
521 .get = zpl_xattr_user_get,
522 .set = zpl_xattr_user_set,
526 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
527 void *value, size_t size)
532 if (!capable(CAP_SYS_ADMIN))
535 if (strcmp(name, "") == 0)
538 xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
539 error = zpl_xattr_get(ip, xattr_name, value, size);
544 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
547 __zpl_xattr_trusted_set(struct inode *ip, const char *name,
548 const void *value, size_t size, int flags)
553 if (!capable(CAP_SYS_ADMIN))
556 if (strcmp(name, "") == 0)
559 xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
560 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
565 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
567 xattr_handler_t zpl_xattr_trusted_handler = {
568 .prefix = XATTR_TRUSTED_PREFIX,
569 .get = zpl_xattr_trusted_get,
570 .set = zpl_xattr_trusted_set,
574 __zpl_xattr_security_get(struct inode *ip, const char *name,
575 void *value, size_t size)
580 if (strcmp(name, "") == 0)
583 xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
584 error = zpl_xattr_get(ip, xattr_name, value, size);
589 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
592 __zpl_xattr_security_set(struct inode *ip, const char *name,
593 const void *value, size_t size, int flags)
598 if (strcmp(name, "") == 0)
601 xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
602 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
607 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
609 #ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY
611 __zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs,
614 const struct xattr *xattr;
617 for (xattr = xattrs; xattr->name != NULL; xattr++) {
618 error = __zpl_xattr_security_set(ip,
619 xattr->name, xattr->value, xattr->value_len, 0);
629 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
630 const struct qstr *qstr)
632 return security_inode_init_security(ip, dip, qstr,
633 &__zpl_xattr_security_init, NULL);
638 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
639 const struct qstr *qstr)
646 error = zpl_security_inode_init_security(ip, dip, qstr,
647 &name, &value, &len);
649 if (error == -EOPNOTSUPP)
654 error = __zpl_xattr_security_set(ip, name, value, len, 0);
661 #endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */
663 xattr_handler_t zpl_xattr_security_handler = {
664 .prefix = XATTR_SECURITY_PREFIX,
665 .get = zpl_xattr_security_get,
666 .set = zpl_xattr_security_set,
669 xattr_handler_t *zpl_xattr_handlers[] = {
670 &zpl_xattr_security_handler,
671 &zpl_xattr_trusted_handler,
672 &zpl_xattr_user_handler,
673 #ifdef HAVE_POSIX_ACLS
674 &zpl_xattr_acl_access_handler,
675 &zpl_xattr_acl_default_handler,
676 #endif /* HAVE_POSIX_ACLS */