4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "@(#)zfs_fuid.c 1.5 08/01/31 SMI"
28 #include <sys/zfs_context.h>
29 #include <sys/sunddi.h>
33 #include <sys/refcount.h>
34 #include <sys/nvpair.h>
36 #include <sys/kidmap.h>
38 #include <sys/zfs_vfsops.h>
39 #include <sys/zfs_znode.h>
41 #include <sys/zfs_fuid.h>
44 * FUID Domain table(s).
46 * The FUID table is stored as a packed nvlist of an array
47 * of nvlists which contain an index, domain string and offset
49 * During file system initialization the nvlist(s) are read and
50 * two AVL trees are created. One tree is keyed by the index number
51 * and the other by the domain string. Nodes are never removed from
52 * trees, but new entries may be added. If a new entry is added then the
53 * on-disk packed nvlist will also be updated.
56 #define FUID_IDX "fuid_idx"
57 #define FUID_DOMAIN "fuid_domain"
58 #define FUID_OFFSET "fuid_offset"
59 #define FUID_NVP_ARRAY "fuid_nvlist"
61 typedef struct fuid_domain {
69 * Compare two indexes.
72 idx_compare(const void *arg1, const void *arg2)
74 const fuid_domain_t *node1 = arg1;
75 const fuid_domain_t *node2 = arg2;
77 if (node1->f_idx < node2->f_idx)
79 else if (node1->f_idx > node2->f_idx)
85 * Compare two domain strings.
88 domain_compare(const void *arg1, const void *arg2)
90 const fuid_domain_t *node1 = arg1;
91 const fuid_domain_t *node2 = arg2;
94 val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
97 return (val > 0 ? 1 : -1);
101 * load initial fuid domain and idx trees. This function is used by
102 * both the kernel and zdb.
105 zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
106 avl_tree_t *domain_tree)
111 avl_create(idx_tree, idx_compare,
112 sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
113 avl_create(domain_tree, domain_compare,
114 sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
116 VERIFY(0 == dmu_bonus_hold(os, fuid_obj, FTAG, &db));
117 fuid_size = *(uint64_t *)db->db_data;
118 dmu_buf_rele(db, FTAG);
122 nvlist_t *nvp = NULL;
127 packed = kmem_alloc(fuid_size, KM_SLEEP);
128 VERIFY(dmu_read(os, fuid_obj, 0, fuid_size, packed) == 0);
129 VERIFY(nvlist_unpack(packed, fuid_size,
131 VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
132 &fuidnvp, &count) == 0);
134 for (i = 0; i != count; i++) {
135 fuid_domain_t *domnode;
139 VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
141 VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
144 domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
146 domnode->f_idx = idx;
147 domnode->f_ksid = ksid_lookupdomain(domain);
148 avl_add(idx_tree, domnode);
149 avl_add(domain_tree, domnode);
152 kmem_free(packed, fuid_size);
158 zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
160 fuid_domain_t *domnode;
164 while (domnode = avl_destroy_nodes(domain_tree, &cookie))
165 ksiddomain_rele(domnode->f_ksid);
167 avl_destroy(domain_tree);
169 while (domnode = avl_destroy_nodes(idx_tree, &cookie))
170 kmem_free(domnode, sizeof (fuid_domain_t));
171 avl_destroy(idx_tree);
175 zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
177 fuid_domain_t searchnode, *findnode;
180 searchnode.f_idx = idx;
182 findnode = avl_find(idx_tree, &searchnode, &loc);
184 return (findnode->f_ksid->kd_name);
189 * Load the fuid table(s) into memory.
192 zfs_fuid_init(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
196 rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
198 if (zfsvfs->z_fuid_loaded) {
199 rw_exit(&zfsvfs->z_fuid_lock);
203 if (zfsvfs->z_fuid_obj == 0) {
205 /* first make sure we need to allocate object */
207 error = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
208 ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
209 if (error == ENOENT && tx != NULL) {
210 zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
211 DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
212 sizeof (uint64_t), tx);
213 VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
214 ZFS_FUID_TABLES, sizeof (uint64_t), 1,
215 &zfsvfs->z_fuid_obj, tx) == 0);
219 zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
220 zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
222 zfsvfs->z_fuid_loaded = B_TRUE;
223 rw_exit(&zfsvfs->z_fuid_lock);
227 * Query domain table for a given domain.
229 * If domain isn't found it is added to AVL trees and
230 * the results are pushed out to disk.
233 zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
236 fuid_domain_t searchnode, *findnode;
240 * If the dummy "nobody" domain then return an index of 0
241 * to cause the created FUID to be a standard POSIX id
242 * for the user nobody.
244 if (domain[0] == '\0') {
249 searchnode.f_ksid = ksid_lookupdomain(domain);
251 *retdomain = searchnode.f_ksid->kd_name;
253 if (!zfsvfs->z_fuid_loaded)
254 zfs_fuid_init(zfsvfs, tx);
256 rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
257 findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
258 rw_exit(&zfsvfs->z_fuid_lock);
261 ksiddomain_rele(searchnode.f_ksid);
262 return (findnode->f_idx);
264 fuid_domain_t *domnode;
273 domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
274 domnode->f_ksid = searchnode.f_ksid;
276 rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
277 retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
279 avl_add(&zfsvfs->z_fuid_domain, domnode);
280 avl_add(&zfsvfs->z_fuid_idx, domnode);
282 * Now resync the on-disk nvlist.
284 VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
286 domnode = avl_first(&zfsvfs->z_fuid_domain);
287 fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
289 VERIFY(nvlist_alloc(&fuids[i],
290 NV_UNIQUE_NAME, KM_SLEEP) == 0);
291 VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
292 domnode->f_idx) == 0);
293 VERIFY(nvlist_add_uint64(fuids[i],
294 FUID_OFFSET, 0) == 0);
295 VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
296 domnode->f_ksid->kd_name) == 0);
297 domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
299 VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
300 fuids, retidx) == 0);
301 for (i = 0; i != retidx; i++)
302 nvlist_free(fuids[i]);
303 kmem_free(fuids, retidx * sizeof (void *));
304 VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
305 packed = kmem_alloc(nvsize, KM_SLEEP);
306 VERIFY(nvlist_pack(nvp, &packed, &nvsize,
307 NV_ENCODE_XDR, KM_SLEEP) == 0);
309 zfsvfs->z_fuid_size = nvsize;
310 dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
311 zfsvfs->z_fuid_size, packed, tx);
312 kmem_free(packed, zfsvfs->z_fuid_size);
313 VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
315 dmu_buf_will_dirty(db, tx);
316 *(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
317 dmu_buf_rele(db, FTAG);
319 rw_exit(&zfsvfs->z_fuid_lock);
325 * Query domain table by index, returning domain string
327 * Returns a pointer from an avl node of the domain string.
331 zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
335 if (idx == 0 || !zfsvfs->z_use_fuids)
338 if (!zfsvfs->z_fuid_loaded)
339 zfs_fuid_init(zfsvfs, NULL);
341 rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
342 domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
343 rw_exit(&zfsvfs->z_fuid_lock);
350 zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
352 *uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_uid,
354 *gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_gid,
359 zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
360 cred_t *cr, zfs_fuid_type_t type)
362 uint32_t index = FUID_INDEX(fuid);
369 domain = zfs_fuid_find_by_idx(zfsvfs, index);
370 ASSERT(domain != NULL);
372 if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
373 (void) kidmap_getuidbysid(crgetzone(cr), domain,
374 FUID_RID(fuid), &id);
376 (void) kidmap_getgidbysid(crgetzone(cr), domain,
377 FUID_RID(fuid), &id);
383 * Add a FUID node to the list of fuid's being created for this
386 * If ACL has multiple domains, then keep only one copy of each unique
390 zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
391 uint64_t idx, uint64_t id, zfs_fuid_type_t type)
394 zfs_fuid_domain_t *fuid_domain;
395 zfs_fuid_info_t *fuidp;
397 boolean_t found = B_FALSE;
400 *fuidpp = zfs_fuid_info_alloc();
404 * First find fuid domain index in linked list
406 * If one isn't found then create an entry.
409 for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
410 fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
411 fuid_domain), fuididx++) {
412 if (idx == fuid_domain->z_domidx) {
419 fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
420 fuid_domain->z_domain = domain;
421 fuid_domain->z_domidx = idx;
422 list_insert_tail(&fuidp->z_domains, fuid_domain);
423 fuidp->z_domain_str_sz += strlen(domain) + 1;
424 fuidp->z_domain_cnt++;
427 if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
429 * Now allocate fuid entry and add it on the end of the list
432 fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
434 fuid->z_domidx = idx;
435 fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
437 list_insert_tail(&fuidp->z_fuids, fuid);
440 if (type == ZFS_OWNER)
441 fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
443 fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
448 * Create a file system FUID, based on information in the users cred
451 zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
452 dmu_tx_t *tx, cred_t *cr, zfs_fuid_info_t **fuidp)
461 VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
463 if (type == ZFS_OWNER)
468 if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id))
469 return ((uint64_t)id);
471 ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
473 VERIFY(ksid != NULL);
474 rid = ksid_getrid(ksid);
475 domain = ksid_getdomain(ksid);
477 idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
479 zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
481 return (FUID_ENCODE(idx, rid));
485 * Create a file system FUID for an ACL ace
486 * or a chown/chgrp of the file.
487 * This is similar to zfs_fuid_create_cred, except that
488 * we can't find the domain + rid information in the
489 * cred. Instead we have to query Winchester for the
492 * During replay operations the domain+rid information is
493 * found in the zfs_fuid_info_t that the replay code has
494 * attached to the zfsvfs of the file system.
497 zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
498 zfs_fuid_type_t type, dmu_tx_t *tx, zfs_fuid_info_t **fuidpp)
502 uint32_t fuid_idx = FUID_INDEX(id);
506 boolean_t is_replay = (zfsvfs->z_assign >= TXG_INITIAL);
507 zfs_fuid_t *zfuid = NULL;
508 zfs_fuid_info_t *fuidp;
511 * If POSIX ID, or entry is already a FUID then
514 * We may also be handed an already FUID'ized id via
518 if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
522 fuidp = zfsvfs->z_fuid_replay;
525 * If we are passed an ephemeral id, but no
526 * fuid_info was logged then return NOBODY.
527 * This is most likely a result of idmap service
528 * not being available.
536 zfuid = list_head(&fuidp->z_fuids);
537 rid = FUID_RID(zfuid->z_logfuid);
538 idx = FUID_INDEX(zfuid->z_logfuid);
541 rid = FUID_RID(fuidp->z_fuid_owner);
542 idx = FUID_INDEX(fuidp->z_fuid_owner);
545 rid = FUID_RID(fuidp->z_fuid_group);
546 idx = FUID_INDEX(fuidp->z_fuid_group);
549 domain = fuidp->z_domain_table[idx -1];
551 if (type == ZFS_OWNER || type == ZFS_ACE_USER)
552 status = kidmap_getsidbyuid(crgetzone(cr), id,
555 status = kidmap_getsidbygid(crgetzone(cr), id,
560 * When returning nobody we will need to
561 * make a dummy fuid table entry for logging
569 idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
572 zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type);
573 else if (zfuid != NULL) {
574 list_remove(&fuidp->z_fuids, zfuid);
575 kmem_free(zfuid, sizeof (zfs_fuid_t));
577 return (FUID_ENCODE(idx, rid));
581 zfs_fuid_destroy(zfsvfs_t *zfsvfs)
583 rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
584 if (!zfsvfs->z_fuid_loaded) {
585 rw_exit(&zfsvfs->z_fuid_lock);
588 zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
589 rw_exit(&zfsvfs->z_fuid_lock);
593 * Allocate zfs_fuid_info for tracking FUIDs created during
594 * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
597 zfs_fuid_info_alloc(void)
599 zfs_fuid_info_t *fuidp;
601 fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
602 list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
603 offsetof(zfs_fuid_domain_t, z_next));
604 list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
605 offsetof(zfs_fuid_t, z_next));
610 * Release all memory associated with zfs_fuid_info_t
613 zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
616 zfs_fuid_domain_t *zdomain;
618 while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
619 list_remove(&fuidp->z_fuids, zfuid);
620 kmem_free(zfuid, sizeof (zfs_fuid_t));
623 if (fuidp->z_domain_table != NULL)
624 kmem_free(fuidp->z_domain_table,
625 (sizeof (char **)) * fuidp->z_domain_cnt);
627 while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
628 list_remove(&fuidp->z_domains, zdomain);
629 kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
632 kmem_free(fuidp, sizeof (zfs_fuid_info_t));
636 * Check to see if id is a groupmember. If cred
637 * has ksid info then sidlist is checked first
638 * and if still not found then POSIX groups are checked
640 * Will use a straight FUID compare when possible.
643 zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
645 ksid_t *ksid = crgetsid(cr, KSID_GROUP);
651 ksidlist_t *ksidlist = crgetsidlist(cr);
652 uint32_t idx = FUID_INDEX(id);
653 uint32_t rid = FUID_RID(id);
656 ksid_groups = ksidlist->ksl_sids;
658 for (i = 0; i != ksidlist->ksl_nsid; i++) {
660 if (id != IDMAP_WK_CREATOR_GROUP_GID &&
661 id == ksid_groups[i].ks_id) {
667 domain = zfs_fuid_find_by_idx(zfsvfs, idx);
668 ASSERT(domain != NULL);
671 IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
675 ksid_groups[i].ks_domain->kd_name) == 0) &&
676 rid == ksid_groups[i].ks_rid)
683 * Not found in ksidlist, check posix groups
685 gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
686 return (groupmember(gid, cr));