4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Portions Copyright 2010 Robert Milkowski */
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
32 #include <sys/pathname.h>
33 #include <sys/vnode.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/mntent.h>
37 #include <sys/mount.h>
38 #include <sys/cmn_err.h>
39 #include "fs/fs_subr.h"
40 #include <sys/zfs_znode.h>
41 #include <sys/zfs_dir.h>
43 #include <sys/fs/zfs.h>
45 #include <sys/dsl_prop.h>
46 #include <sys/dsl_dataset.h>
47 #include <sys/dsl_deleg.h>
51 #include <sys/varargs.h>
52 #include <sys/policy.h>
53 #include <sys/atomic.h>
54 #include <sys/mkdev.h>
55 #include <sys/modctl.h>
56 #include <sys/refstr.h>
57 #include <sys/zfs_ioctl.h>
58 #include <sys/zfs_ctldir.h>
59 #include <sys/zfs_fuid.h>
60 #include <sys/bootconf.h>
61 #include <sys/sunddi.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/spa_boot.h>
66 #include "zfs_comutil.h"
69 extern int sys_shutdown;
73 zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
76 * Data integrity is job one. We don't want a compromised kernel
77 * writing to the storage pool, so we never sync during panic.
83 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
84 * to sync metadata, which they would otherwise cache indefinitely.
85 * Semantically, the only requirement is that the sync be initiated.
86 * The DMU syncs out txgs frequently, so there's nothing to do.
93 * Sync a specific filesystem.
95 zfsvfs_t *zfsvfs = vfsp->vfs_data;
99 dp = dmu_objset_pool(zfsvfs->z_os);
102 * If the system is shutting down, then skip any
103 * filesystems which may exist on a suspended pool.
105 if (sys_shutdown && spa_suspended(dp->dp_spa)) {
110 if (zfsvfs->z_log != NULL)
111 zil_commit(zfsvfs->z_log, 0);
116 * Sync all ZFS filesystems. This is what happens when you
117 * run sync(1M). Unlike other filesystems, ZFS honors the
118 * request by waiting for all pools to commit all dirty data.
125 EXPORT_SYMBOL(zfs_sync);
128 atime_changed_cb(void *arg, uint64_t newval)
130 zfsvfs_t *zfsvfs = arg;
132 if (newval == TRUE) {
133 zfsvfs->z_atime = TRUE;
134 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
135 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
137 zfsvfs->z_atime = FALSE;
138 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
139 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
144 xattr_changed_cb(void *arg, uint64_t newval)
146 zfsvfs_t *zfsvfs = arg;
148 if (newval == TRUE) {
149 /* XXX locking on vfs_flag? */
150 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
151 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
152 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
154 /* XXX locking on vfs_flag? */
155 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
156 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
157 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
162 blksz_changed_cb(void *arg, uint64_t newval)
164 zfsvfs_t *zfsvfs = arg;
166 if (newval < SPA_MINBLOCKSIZE ||
167 newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
168 newval = SPA_MAXBLOCKSIZE;
170 zfsvfs->z_max_blksz = newval;
171 zfsvfs->z_vfs->vfs_bsize = newval;
175 readonly_changed_cb(void *arg, uint64_t newval)
177 zfsvfs_t *zfsvfs = arg;
180 /* XXX locking on vfs_flag? */
181 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
182 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
183 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
185 /* XXX locking on vfs_flag? */
186 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
187 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
188 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
193 devices_changed_cb(void *arg, uint64_t newval)
195 zfsvfs_t *zfsvfs = arg;
197 if (newval == FALSE) {
198 zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
199 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
200 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
202 zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
203 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
204 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
209 setuid_changed_cb(void *arg, uint64_t newval)
211 zfsvfs_t *zfsvfs = arg;
213 if (newval == FALSE) {
214 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
215 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
216 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
218 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
219 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
220 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
225 exec_changed_cb(void *arg, uint64_t newval)
227 zfsvfs_t *zfsvfs = arg;
229 if (newval == FALSE) {
230 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
231 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
232 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
234 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
235 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
236 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
241 * The nbmand mount option can be changed at mount time.
242 * We can't allow it to be toggled on live file systems or incorrect
243 * behavior may be seen from cifs clients
245 * This property isn't registered via dsl_prop_register(), but this callback
246 * will be called when a file system is first mounted
249 nbmand_changed_cb(void *arg, uint64_t newval)
251 zfsvfs_t *zfsvfs = arg;
252 if (newval == FALSE) {
253 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
254 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
256 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
257 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
262 snapdir_changed_cb(void *arg, uint64_t newval)
264 zfsvfs_t *zfsvfs = arg;
266 zfsvfs->z_show_ctldir = newval;
270 vscan_changed_cb(void *arg, uint64_t newval)
272 zfsvfs_t *zfsvfs = arg;
274 zfsvfs->z_vscan = newval;
278 acl_inherit_changed_cb(void *arg, uint64_t newval)
280 zfsvfs_t *zfsvfs = arg;
282 zfsvfs->z_acl_inherit = newval;
286 zfs_register_callbacks(vfs_t *vfsp)
288 struct dsl_dataset *ds = NULL;
290 zfsvfs_t *zfsvfs = NULL;
292 int readonly, do_readonly = B_FALSE;
293 int setuid, do_setuid = B_FALSE;
294 int exec, do_exec = B_FALSE;
295 int devices, do_devices = B_FALSE;
296 int xattr, do_xattr = B_FALSE;
297 int atime, do_atime = B_FALSE;
301 zfsvfs = vfsp->vfs_data;
306 * The act of registering our callbacks will destroy any mount
307 * options we may have. In order to enable temporary overrides
308 * of mount options, we stash away the current values and
309 * restore them after we register the callbacks.
311 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
312 !spa_writeable(dmu_objset_spa(os))) {
314 do_readonly = B_TRUE;
315 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
317 do_readonly = B_TRUE;
319 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
325 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
328 } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
333 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
336 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
341 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
344 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
348 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
351 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
355 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
358 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
364 * nbmand is a special property. It can only be changed at
367 * This is weird, but it is documented to only be changeable
370 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
372 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
375 char osname[MAXNAMELEN];
377 dmu_objset_name(os, osname);
378 if ((error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
385 * Register property callbacks.
387 * It would probably be fine to just check for i/o error from
388 * the first prop_register(), but I guess I like to go
391 ds = dmu_objset_ds(os);
392 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
393 error = error ? error : dsl_prop_register(ds,
394 "xattr", xattr_changed_cb, zfsvfs);
395 error = error ? error : dsl_prop_register(ds,
396 "recordsize", blksz_changed_cb, zfsvfs);
397 error = error ? error : dsl_prop_register(ds,
398 "readonly", readonly_changed_cb, zfsvfs);
399 error = error ? error : dsl_prop_register(ds,
400 "devices", devices_changed_cb, zfsvfs);
401 error = error ? error : dsl_prop_register(ds,
402 "setuid", setuid_changed_cb, zfsvfs);
403 error = error ? error : dsl_prop_register(ds,
404 "exec", exec_changed_cb, zfsvfs);
405 error = error ? error : dsl_prop_register(ds,
406 "snapdir", snapdir_changed_cb, zfsvfs);
407 error = error ? error : dsl_prop_register(ds,
408 "aclinherit", acl_inherit_changed_cb, zfsvfs);
409 error = error ? error : dsl_prop_register(ds,
410 "vscan", vscan_changed_cb, zfsvfs);
415 * Invoke our callbacks to restore temporary mount options.
418 readonly_changed_cb(zfsvfs, readonly);
420 setuid_changed_cb(zfsvfs, setuid);
422 exec_changed_cb(zfsvfs, exec);
424 devices_changed_cb(zfsvfs, devices);
426 xattr_changed_cb(zfsvfs, xattr);
428 atime_changed_cb(zfsvfs, atime);
430 nbmand_changed_cb(zfsvfs, nbmand);
436 * We may attempt to unregister some callbacks that are not
437 * registered, but this is OK; it will simply return ENOMSG,
438 * which we will ignore.
440 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
441 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
442 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
443 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
444 (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
445 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
446 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
447 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
448 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
450 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
454 EXPORT_SYMBOL(zfs_register_callbacks);
455 #endif /* HAVE_ZPL */
458 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
459 uint64_t *userp, uint64_t *groupp)
461 znode_phys_t *znp = data;
465 * Is it a valid type of object to track?
467 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
471 * If we have a NULL data pointer
472 * then assume the id's aren't changing and
473 * return EEXIST to the dmu to let it know to
479 if (bonustype == DMU_OT_ZNODE) {
480 *userp = znp->zp_uid;
481 *groupp = znp->zp_gid;
485 ASSERT(bonustype == DMU_OT_SA);
486 hdrsize = sa_hdrsize(data);
489 *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
491 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
495 * This should only happen for newly created
496 * files that haven't had the znode data filled
508 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
509 char *domainbuf, int buflen, uid_t *ridp)
514 fuid = strtonum(fuidstr, NULL);
516 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
518 (void) strlcpy(domainbuf, domain, buflen);
521 *ridp = FUID_RID(fuid);
525 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
528 case ZFS_PROP_USERUSED:
529 return (DMU_USERUSED_OBJECT);
530 case ZFS_PROP_GROUPUSED:
531 return (DMU_GROUPUSED_OBJECT);
532 case ZFS_PROP_USERQUOTA:
533 return (zfsvfs->z_userquota_obj);
534 case ZFS_PROP_GROUPQUOTA:
535 return (zfsvfs->z_groupquota_obj);
543 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
544 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
549 zfs_useracct_t *buf = vbuf;
552 if (!dmu_objset_userspace_present(zfsvfs->z_os))
555 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
561 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
562 (error = zap_cursor_retrieve(&zc, &za)) == 0;
563 zap_cursor_advance(&zc)) {
564 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
568 fuidstr_to_sid(zfsvfs, za.za_name,
569 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
571 buf->zu_space = za.za_first_integer;
577 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
578 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
579 *cookiep = zap_cursor_serialize(&zc);
580 zap_cursor_fini(&zc);
583 EXPORT_SYMBOL(zfs_userspace_many);
586 * buf must be big enough (eg, 32 bytes)
589 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
590 char *buf, boolean_t addok)
595 if (domain && domain[0]) {
596 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
600 fuid = FUID_ENCODE(domainid, rid);
601 (void) sprintf(buf, "%llx", (longlong_t)fuid);
606 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
607 const char *domain, uint64_t rid, uint64_t *valp)
615 if (!dmu_objset_userspace_present(zfsvfs->z_os))
618 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
622 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
626 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
631 EXPORT_SYMBOL(zfs_userspace_one);
634 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
635 const char *domain, uint64_t rid, uint64_t quota)
641 boolean_t fuid_dirtied;
643 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
646 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
649 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
650 &zfsvfs->z_groupquota_obj;
652 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
655 fuid_dirtied = zfsvfs->z_fuid_dirty;
657 tx = dmu_tx_create(zfsvfs->z_os);
658 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
660 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
661 zfs_userquota_prop_prefixes[type]);
664 zfs_fuid_txhold(zfsvfs, tx);
665 err = dmu_tx_assign(tx, TXG_WAIT);
671 mutex_enter(&zfsvfs->z_lock);
673 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
675 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
676 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
678 mutex_exit(&zfsvfs->z_lock);
681 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
685 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx);
689 zfs_fuid_sync(zfsvfs, tx);
693 EXPORT_SYMBOL(zfs_set_userquota);
696 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
699 uint64_t used, quota, usedobj, quotaobj;
702 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
703 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
705 if (quotaobj == 0 || zfsvfs->z_replay)
708 (void) sprintf(buf, "%llx", (longlong_t)fuid);
709 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
713 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
716 return (used >= quota);
718 EXPORT_SYMBOL(zfs_fuid_overquota);
721 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
726 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
728 fuid = isgroup ? zp->z_gid : zp->z_uid;
730 if (quotaobj == 0 || zfsvfs->z_replay)
733 return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
735 EXPORT_SYMBOL(zfs_owner_overquota);
738 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
746 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
749 * We claim to always be readonly so we can open snapshots;
750 * other ZPL code will prevent us from writing to snapshots.
752 error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
754 kmem_free(zfsvfs, sizeof (zfsvfs_t));
759 * Initialize the zfs-specific filesystem structure.
760 * Should probably make this a kmem cache, shuffle fields,
761 * and just bzero up to z_hold_mtx[].
763 zfsvfs->z_vfs = NULL;
764 zfsvfs->z_parent = zfsvfs;
765 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
766 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
769 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
772 } else if (zfsvfs->z_version >
773 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
774 (void) printk("Can't mount a version %lld file system "
775 "on a version %lld pool\n. Pool must be upgraded to mount "
776 "this file system.", (u_longlong_t)zfsvfs->z_version,
777 (u_longlong_t)spa_version(dmu_objset_spa(os)));
781 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
783 zfsvfs->z_norm = (int)zval;
785 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
787 zfsvfs->z_utf8 = (zval != 0);
789 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
791 zfsvfs->z_case = (uint_t)zval;
794 * Fold case on file systems that are always or sometimes case
797 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
798 zfsvfs->z_case == ZFS_CASE_MIXED)
799 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
801 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
802 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
804 if (zfsvfs->z_use_sa) {
805 /* should either have both of these objects or none */
806 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
812 * Pre SA versions file systems should never touch
813 * either the attribute registration or layout objects.
818 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
819 &zfsvfs->z_attr_table);
823 if (zfsvfs->z_version >= ZPL_VERSION_SA)
824 sa_register_update_callback(os, zfs_sa_upgrade);
826 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
830 ASSERT(zfsvfs->z_root != 0);
832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
833 &zfsvfs->z_unlinkedobj);
837 error = zap_lookup(os, MASTER_NODE_OBJ,
838 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
839 8, 1, &zfsvfs->z_userquota_obj);
840 if (error && error != ENOENT)
843 error = zap_lookup(os, MASTER_NODE_OBJ,
844 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
845 8, 1, &zfsvfs->z_groupquota_obj);
846 if (error && error != ENOENT)
849 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
850 &zfsvfs->z_fuid_obj);
851 if (error && error != ENOENT)
854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
855 &zfsvfs->z_shares_dir);
856 if (error && error != ENOENT)
859 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
860 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
861 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
862 offsetof(znode_t, z_link_node));
863 rrw_init(&zfsvfs->z_teardown_lock);
864 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
865 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
866 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
867 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
873 dmu_objset_disown(os, zfsvfs);
875 kmem_free(zfsvfs, sizeof (zfsvfs_t));
880 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
884 error = zfs_register_callbacks(zfsvfs->z_vfs);
889 * Set the objset user_ptr to track its zfsvfs.
891 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
892 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
893 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
895 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
898 * If we are not mounting (ie: online recv), then we don't
899 * have to worry about replaying the log as we blocked all
900 * operations out since we closed the ZIL.
906 * During replay we remove the read only flag to
907 * allow replays to succeed.
909 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
911 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
913 zfs_unlinked_drain(zfsvfs);
916 * Parse and replay the intent log.
918 * Because of ziltest, this must be done after
919 * zfs_unlinked_drain(). (Further note: ziltest
920 * doesn't use readonly mounts, where
921 * zfs_unlinked_drain() isn't called.) This is because
922 * ziltest causes spa_sync() to think it's committed,
923 * but actually it is not, so the intent log contains
924 * many txg's worth of changes.
926 * In particular, if object N is in the unlinked set in
927 * the last txg to actually sync, then it could be
928 * actually freed in a later txg and then reallocated
929 * in a yet later txg. This would write a "create
930 * object N" record to the intent log. Normally, this
931 * would be fine because the spa_sync() would have
932 * written out the fact that object N is free, before
933 * we could write the "create object N" intent log
936 * But when we are in ziltest mode, we advance the "open
937 * txg" without actually spa_sync()-ing the changes to
938 * disk. So we would see that object N is still
939 * allocated and in the unlinked set, and there is an
940 * intent log record saying to allocate it.
942 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
943 if (zil_replay_disable) {
944 zil_destroy(zfsvfs->z_log, B_FALSE);
946 zfsvfs->z_replay = B_TRUE;
947 zil_replay(zfsvfs->z_os, zfsvfs,
949 zfsvfs->z_replay = B_FALSE;
952 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
959 zfsvfs_free(zfsvfs_t *zfsvfs)
962 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
965 * This is a barrier to prevent the filesystem from going away in
966 * zfs_znode_move() until we can safely ensure that the filesystem is
967 * not unmounted. We consider the filesystem valid before the barrier
968 * and invalid after the barrier.
970 rw_enter(&zfsvfs_lock, RW_READER);
971 rw_exit(&zfsvfs_lock);
973 zfs_fuid_destroy(zfsvfs);
975 mutex_destroy(&zfsvfs->z_znodes_lock);
976 mutex_destroy(&zfsvfs->z_lock);
977 list_destroy(&zfsvfs->z_all_znodes);
978 rrw_destroy(&zfsvfs->z_teardown_lock);
979 rw_destroy(&zfsvfs->z_teardown_inactive_lock);
980 rw_destroy(&zfsvfs->z_fuid_lock);
981 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
982 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
983 kmem_free(zfsvfs, sizeof (zfsvfs_t));
987 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
989 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
990 if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
991 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
992 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
993 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
994 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
995 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
996 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
998 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1002 zfs_domount(vfs_t *vfsp, char *osname)
1004 uint64_t recordsize, fsid_guid;
1011 error = zfsvfs_create(osname, &zfsvfs);
1014 zfsvfs->z_vfs = vfsp;
1016 /* Initialize the generic filesystem structure. */
1017 vfsp->vfs_bcount = 0;
1018 vfsp->vfs_data = NULL;
1020 if ((error = dsl_prop_get_integer(osname, "recordsize",
1021 &recordsize, NULL)))
1024 vfsp->vfs_bsize = recordsize;
1025 vfsp->vfs_flag |= VFS_NOTRUNC;
1026 vfsp->vfs_data = zfsvfs;
1029 * The fsid is 64 bits, composed of an 8-bit fs type, which
1030 * separates our fsid from any other filesystem types, and a
1031 * 56-bit objset unique ID. The objset unique ID is unique to
1032 * all objsets open on this system, provided by unique_create().
1033 * The 8-bit fs type must be put in the low bits of fsid[1]
1034 * because that's where other Solaris filesystems put it.
1036 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1037 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1038 vfsp->vfs_fsid.val[0] = fsid_guid;
1039 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8);
1042 * Set features for file system.
1044 zfs_set_fuid_feature(zfsvfs);
1045 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1046 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1047 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1048 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1049 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1050 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1051 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1053 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1055 if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1058 atime_changed_cb(zfsvfs, B_FALSE);
1059 readonly_changed_cb(zfsvfs, B_TRUE);
1060 if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL)))
1062 xattr_changed_cb(zfsvfs, pval);
1063 zfsvfs->z_issnap = B_TRUE;
1064 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1066 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1067 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1068 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1070 error = zfsvfs_setup(zfsvfs, B_TRUE);
1073 if (!zfsvfs->z_issnap)
1074 zfsctl_create(zfsvfs);
1077 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1078 zfsvfs_free(zfsvfs);
1083 EXPORT_SYMBOL(zfs_domount);
1086 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1088 objset_t *os = zfsvfs->z_os;
1089 struct dsl_dataset *ds;
1092 * Unregister properties.
1094 if (!dmu_objset_is_snapshot(os)) {
1095 ds = dmu_objset_ds(os);
1096 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
1099 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
1102 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
1105 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
1108 VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
1111 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
1114 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
1117 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
1120 VERIFY(dsl_prop_unregister(ds, "aclinherit",
1121 acl_inherit_changed_cb, zfsvfs) == 0);
1123 VERIFY(dsl_prop_unregister(ds, "vscan",
1124 vscan_changed_cb, zfsvfs) == 0);
1127 EXPORT_SYMBOL(zfs_unregister_callbacks);
1129 #ifdef HAVE_MLSLABEL
1131 * zfs_check_global_label:
1132 * Check that the hex label string is appropriate for the dataset
1133 * being mounted into the global_zone proper.
1135 * Return an error if the hex label string is not default or
1136 * admin_low/admin_high. For admin_low labels, the corresponding
1137 * dataset must be readonly.
1140 zfs_check_global_label(const char *dsname, const char *hexsl)
1142 if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1144 if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1146 if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1147 /* must be readonly */
1150 if (dsl_prop_get_integer(dsname,
1151 zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1153 return (rdonly ? 0 : EACCES);
1157 #endif /* HAVE_MLSLABEL */
1160 zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
1162 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1164 uint64_t refdbytes, availbytes, usedobjs, availobjs;
1168 dmu_objset_space(zfsvfs->z_os,
1169 &refdbytes, &availbytes, &usedobjs, &availobjs);
1172 * The underlying storage pool actually uses multiple block sizes.
1173 * We report the fragsize as the smallest block size we support,
1174 * and we report our blocksize as the filesystem's maximum blocksize.
1176 statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
1177 statp->f_bsize = zfsvfs->z_max_blksz;
1180 * The following report "total" blocks of various kinds in the
1181 * file system, but reported in terms of f_frsize - the
1185 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1186 statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
1187 statp->f_bavail = statp->f_bfree; /* no root reservation */
1190 * statvfs() should really be called statufs(), because it assumes
1191 * static metadata. ZFS doesn't preallocate files, so the best
1192 * we can do is report the max that could possibly fit in f_files,
1193 * and that minus the number actually used in f_ffree.
1194 * For f_ffree, report the smaller of the number of object available
1195 * and the number of blocks (each object will take at least a block).
1197 statp->f_ffree = MIN(availobjs, statp->f_bfree);
1198 statp->f_favail = statp->f_ffree; /* no "root reservation" */
1199 statp->f_files = statp->f_ffree + usedobjs;
1201 (void) cmpldev(&d32, vfsp->vfs_dev);
1202 statp->f_fsid = d32;
1205 * We're a zfs filesystem.
1207 (void) strcpy(statp->f_basetype, MNTTYPE_ZFS);
1209 statp->f_flag = vf_to_stf(vfsp->vfs_flag);
1211 statp->f_namemax = ZFS_MAXNAMELEN;
1214 * We have all of 32 characters to stuff a string here.
1215 * Is there anything useful we could/should provide?
1217 bzero(statp->f_fstr, sizeof (statp->f_fstr));
1222 EXPORT_SYMBOL(zfs_statvfs);
1225 zfs_root(vfs_t *vfsp, vnode_t **vpp)
1227 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1233 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1235 *vpp = ZTOV(rootzp);
1240 EXPORT_SYMBOL(zfs_root);
1243 * Teardown the zfsvfs::z_os.
1245 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1246 * and 'z_teardown_inactive_lock' held.
1249 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1253 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1257 * We purge the parent filesystem's vfsp as the parent
1258 * filesystem and all of its snapshots have their vnode's
1259 * v_vfsp set to the parent's filesystem's vfsp. Note,
1260 * 'z_parent' is self referential for non-snapshots.
1262 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1266 * Close the zil. NB: Can't close the zil while zfs_inactive
1267 * threads are blocked as zil_close can call zfs_inactive.
1269 if (zfsvfs->z_log) {
1270 zil_close(zfsvfs->z_log);
1271 zfsvfs->z_log = NULL;
1274 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1277 * If we are not unmounting (ie: online recv) and someone already
1278 * unmounted this file system while we were doing the switcheroo,
1279 * or a reopen of z_os failed then just bail out now.
1281 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1282 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1283 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1288 * At this point there are no vops active, and any new vops will
1289 * fail with EIO since we have z_teardown_lock for writer (only
1290 * relavent for forced unmount).
1292 * Release all holds on dbufs.
1294 mutex_enter(&zfsvfs->z_znodes_lock);
1295 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1296 zp = list_next(&zfsvfs->z_all_znodes, zp))
1298 ASSERT(ZTOV(zp)->v_count > 0);
1299 zfs_znode_dmu_fini(zp);
1301 mutex_exit(&zfsvfs->z_znodes_lock);
1304 * If we are unmounting, set the unmounted flag and let new vops
1305 * unblock. zfs_inactive will have the unmounted behavior, and all
1306 * other vops will fail with EIO.
1309 zfsvfs->z_unmounted = B_TRUE;
1310 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1311 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1315 * z_os will be NULL if there was an error in attempting to reopen
1316 * zfsvfs, so just return as the properties had already been
1317 * unregistered and cached data had been evicted before.
1319 if (zfsvfs->z_os == NULL)
1323 * Unregister properties.
1325 zfs_unregister_callbacks(zfsvfs);
1330 if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
1331 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
1332 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1333 (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
1340 zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
1342 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1346 ret = secpolicy_fs_unmount(cr, vfsp);
1348 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1349 ZFS_DELEG_PERM_MOUNT, cr))
1354 * We purge the parent filesystem's vfsp as the parent filesystem
1355 * and all of its snapshots have their vnode's v_vfsp set to the
1356 * parent's filesystem's vfsp. Note, 'z_parent' is self
1357 * referential for non-snapshots.
1359 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1362 * Unmount any snapshots mounted under .zfs before unmounting the
1365 if (zfsvfs->z_ctldir != NULL &&
1366 (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
1370 if (!(fflag & MS_FORCE)) {
1372 * Check the number of active vnodes in the file system.
1373 * Our count is maintained in the vfs structure, but the
1374 * number is off by 1 to indicate a hold on the vfs
1377 * The '.zfs' directory maintains a reference of its
1378 * own, and any active references underneath are
1379 * reflected in the vnode count.
1381 if (zfsvfs->z_ctldir == NULL) {
1382 if (vfsp->vfs_count > 1)
1385 if (vfsp->vfs_count > 2 ||
1386 zfsvfs->z_ctldir->v_count > 1)
1391 vfsp->vfs_flag |= VFS_UNMOUNTED;
1393 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1397 * z_os will be NULL if there was an error in
1398 * attempting to reopen zfsvfs.
1402 * Unset the objset user_ptr.
1404 mutex_enter(&os->os_user_ptr_lock);
1405 dmu_objset_set_user(os, NULL);
1406 mutex_exit(&os->os_user_ptr_lock);
1409 * Finally release the objset
1411 dmu_objset_disown(os, zfsvfs);
1415 * We can now safely destroy the '.zfs' directory node.
1417 if (zfsvfs->z_ctldir != NULL)
1418 zfsctl_destroy(zfsvfs);
1422 EXPORT_SYMBOL(zfs_umount);
1425 zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1427 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1429 uint64_t object = 0;
1430 uint64_t fid_gen = 0;
1439 if (fidp->fid_len == LONG_FID_LEN) {
1440 zfid_long_t *zlfid = (zfid_long_t *)fidp;
1441 uint64_t objsetid = 0;
1442 uint64_t setgen = 0;
1444 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1445 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1447 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1448 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1452 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1458 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1459 zfid_short_t *zfid = (zfid_short_t *)fidp;
1461 for (i = 0; i < sizeof (zfid->zf_object); i++)
1462 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1464 for (i = 0; i < sizeof (zfid->zf_gen); i++)
1465 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1471 /* A zero fid_gen means we are in the .zfs control directories */
1473 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
1474 *vpp = zfsvfs->z_ctldir;
1475 ASSERT(*vpp != NULL);
1476 if (object == ZFSCTL_INO_SNAPDIR) {
1477 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
1478 0, NULL, NULL, NULL, NULL, NULL) == 0);
1486 gen_mask = -1ULL >> (64 - 8 * i);
1488 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
1489 if ((err = zfs_zget(zfsvfs, object, &zp))) {
1493 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1495 zp_gen = zp_gen & gen_mask;
1498 if (zp->z_unlinked || zp_gen != fid_gen) {
1499 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
1507 zfs_inode_update(VTOZ(*vpp));
1512 EXPORT_SYMBOL(zfs_vget);
1515 * Block out VOPs and close zfsvfs_t::z_os
1517 * Note, if successful, then we return with the 'z_teardown_lock' and
1518 * 'z_teardown_inactive_lock' write held.
1521 zfs_suspend_fs(zfsvfs_t *zfsvfs)
1525 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1527 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1531 EXPORT_SYMBOL(zfs_suspend_fs);
1534 * Reopen zfsvfs_t::z_os and release VOPs.
1537 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
1541 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
1542 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
1544 err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
1547 zfsvfs->z_os = NULL;
1550 uint64_t sa_obj = 0;
1552 err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
1553 ZFS_SA_ATTRS, 8, 1, &sa_obj);
1555 if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
1559 if ((err = sa_setup(zfsvfs->z_os, sa_obj,
1560 zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0)
1563 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
1566 * Attempt to re-establish all the active znodes with
1567 * their dbufs. If a zfs_rezget() fails, then we'll let
1568 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1569 * when they try to use their znode.
1571 mutex_enter(&zfsvfs->z_znodes_lock);
1572 for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1573 zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1574 (void) zfs_rezget(zp);
1576 mutex_exit(&zfsvfs->z_znodes_lock);
1581 /* release the VOPs */
1582 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1583 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1587 * Since we couldn't reopen zfsvfs::z_os, force
1588 * unmount this file system.
1590 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
1591 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED());
1595 EXPORT_SYMBOL(zfs_resume_fs);
1598 zfs_freevfs(vfs_t *vfsp)
1600 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1602 zfsvfs_free(zfsvfs);
1604 #endif /* HAVE_ZPL */
1612 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
1624 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
1627 objset_t *os = zfsvfs->z_os;
1630 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
1633 if (newvers < zfsvfs->z_version)
1636 if (zfs_spa_version_map(newvers) >
1637 spa_version(dmu_objset_spa(zfsvfs->z_os)))
1640 tx = dmu_tx_create(os);
1641 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
1642 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
1643 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
1645 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1647 error = dmu_tx_assign(tx, TXG_WAIT);
1653 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1654 8, 1, &newvers, tx);
1661 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
1664 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
1666 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1667 DMU_OT_NONE, 0, tx);
1669 error = zap_add(os, MASTER_NODE_OBJ,
1670 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1671 ASSERT3U(error, ==, 0);
1673 VERIFY(0 == sa_set_sa_object(os, sa_obj));
1674 sa_register_update_callback(os, zfs_sa_upgrade);
1677 spa_history_log_internal(LOG_DS_UPGRADE,
1678 dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
1679 zfsvfs->z_version, newvers, dmu_objset_id(os));
1683 zfsvfs->z_version = newvers;
1685 if (zfsvfs->z_version >= ZPL_VERSION_FUID)
1686 zfs_set_fuid_feature(zfsvfs);
1690 EXPORT_SYMBOL(zfs_set_version);
1691 #endif /* HAVE_ZPL */
1694 * Read a property stored within the master node.
1697 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
1703 * Look up the file system's value for the property. For the
1704 * version property, we look up a slightly different string.
1706 if (prop == ZFS_PROP_VERSION)
1707 pname = ZPL_VERSION_STR;
1709 pname = zfs_prop_to_name(prop);
1712 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
1714 if (error == ENOENT) {
1715 /* No value set, use the default value */
1717 case ZFS_PROP_VERSION:
1718 *value = ZPL_VERSION;
1720 case ZFS_PROP_NORMALIZE:
1721 case ZFS_PROP_UTF8ONLY:
1725 *value = ZFS_CASE_SENSITIVE;