* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/* Portions Copyright 2010 Robert Milkowski */
+
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/zap.h>
+#include <sys/sa.h>
#include <sys/varargs.h>
#include <sys/policy.h>
#include <sys/atomic.h>
#include <sys/dnlc.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
+#include <sys/sa.h>
+#include "zfs_comutil.h"
-int zfsfstype;
-vfsops_t *zfs_vfsops = NULL;
-static major_t zfs_major;
-static minor_t zfs_minor;
-static kmutex_t zfs_dev_mtx;
-
+#ifdef HAVE_ZPL
extern int sys_shutdown;
-static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
-static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
-static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
-static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
-static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
-static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
-static void zfs_freevfs(vfs_t *vfsp);
-
-static const fs_operation_def_t zfs_vfsops_template[] = {
- VFSNAME_MOUNT, { .vfs_mount = zfs_mount },
- VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot },
- VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount },
- VFSNAME_ROOT, { .vfs_root = zfs_root },
- VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs },
- VFSNAME_SYNC, { .vfs_sync = zfs_sync },
- VFSNAME_VGET, { .vfs_vget = zfs_vget },
- VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs },
- NULL, NULL
-};
-
-static const fs_operation_def_t zfs_vfsops_eio_template[] = {
- VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs },
- NULL, NULL
-};
-
-/*
- * We need to keep a count of active fs's.
- * This is necessary to prevent our module
- * from being unloaded after a umount -f
- */
-static uint32_t zfs_active_fs_count = 0;
-
-static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
-static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
-static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
-static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
-
-/*
- * MO_DEFAULT is not used since the default value is determined
- * by the equivalent property.
- */
-static mntopt_t mntopts[] = {
- { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
- { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
- { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
- { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
-};
-
-static mntopts_t zfs_mntopts = {
- sizeof (mntopts) / sizeof (mntopt_t),
- mntopts
-};
-
/*ARGSUSED*/
int
zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
if (panicstr)
return (0);
- /*
- * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
- * to sync metadata, which they would otherwise cache indefinitely.
- * Semantically, the only requirement is that the sync be initiated.
- * The DMU syncs out txgs frequently, so there's nothing to do.
- */
- if (flag & SYNC_ATTR)
- return (0);
-
if (vfsp != NULL) {
/*
* Sync a specific filesystem.
}
if (zfsvfs->z_log != NULL)
- zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
- else
- txg_wait_synced(dp, 0);
+ zil_commit(zfsvfs->z_log, 0);
+
ZFS_EXIT(zfsvfs);
} else {
/*
return (0);
}
-
-static int
-zfs_create_unique_device(dev_t *dev)
-{
- major_t new_major;
-
- do {
- ASSERT3U(zfs_minor, <=, MAXMIN32);
- minor_t start = zfs_minor;
- do {
- mutex_enter(&zfs_dev_mtx);
- if (zfs_minor >= MAXMIN32) {
- /*
- * If we're still using the real major
- * keep out of /dev/zfs and /dev/zvol minor
- * number space. If we're using a getudev()'ed
- * major number, we can use all of its minors.
- */
- if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
- zfs_minor = ZFS_MIN_MINOR;
- else
- zfs_minor = 0;
- } else {
- zfs_minor++;
- }
- *dev = makedevice(zfs_major, zfs_minor);
- mutex_exit(&zfs_dev_mtx);
- } while (vfs_devismounted(*dev) && zfs_minor != start);
- if (zfs_minor == start) {
- /*
- * We are using all ~262,000 minor numbers for the
- * current major number. Create a new major number.
- */
- if ((new_major = getudev()) == (major_t)-1) {
- cmn_err(CE_WARN,
- "zfs_mount: Can't get unique major "
- "device number.");
- return (-1);
- }
- mutex_enter(&zfs_dev_mtx);
- zfs_major = new_major;
- zfs_minor = 0;
-
- mutex_exit(&zfs_dev_mtx);
- } else {
- break;
- }
- /* CONSTANTCONDITION */
- } while (1);
-
- return (0);
-}
+EXPORT_SYMBOL(zfs_sync);
static void
atime_changed_cb(void *arg, uint64_t newval)
}
static void
-acl_mode_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_mode = newval;
-}
-
-static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
zfsvfs->z_acl_inherit = newval;
}
-static int
+int
zfs_register_callbacks(vfs_t *vfsp)
{
struct dsl_dataset *ds = NULL;
* of mount options, we stash away the current values and
* restore them after we register the callbacks.
*/
- if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
+ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
+ !spa_writeable(dmu_objset_spa(os))) {
readonly = B_TRUE;
do_readonly = B_TRUE;
} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
char osname[MAXNAMELEN];
dmu_objset_name(os, osname);
- if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
- NULL)) {
+ if ((error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
+ NULL))) {
return (error);
}
}
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
return (error);
}
+EXPORT_SYMBOL(zfs_register_callbacks);
+#endif /* HAVE_ZPL */
-static void
-uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
- int64_t delta, dmu_tx_t *tx)
-{
- uint64_t used = 0;
- char buf[32];
- int err;
- uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
-
- if (delta == 0)
- return;
-
- (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
- err = zap_lookup(os, obj, buf, 8, 1, &used);
- ASSERT(err == 0 || err == ENOENT);
- /* no underflow/overflow */
- ASSERT(delta > 0 || used >= -delta);
- ASSERT(delta < 0 || used + delta > used);
- used += delta;
- if (used == 0)
- err = zap_remove(os, obj, buf, tx);
- else
- err = zap_update(os, obj, buf, 8, 1, &used, tx);
- ASSERT(err == 0);
-}
-
-static void
-zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
- void *oldbonus, void *newbonus,
- uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
+static int
+zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
+ uint64_t *userp, uint64_t *groupp)
{
- znode_phys_t *oldznp = oldbonus;
- znode_phys_t *newznp = newbonus;
+ znode_phys_t *znp = data;
+ int error = 0;
- if (bonustype != DMU_OT_ZNODE)
- return;
+ /*
+ * Is it a valid type of object to track?
+ */
+ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
+ return (ENOENT);
- /* We charge 512 for the dnode (if it's allocated). */
- if (oldznp->zp_gen != 0)
- oldused += DNODE_SIZE;
- if (newznp->zp_gen != 0)
- newused += DNODE_SIZE;
+ /*
+ * If we have a NULL data pointer
+ * then assume the id's aren't changing and
+ * return EEXIST to the dmu to let it know to
+ * use the same ids
+ */
+ if (data == NULL)
+ return (EEXIST);
- if (oldznp->zp_uid == newznp->zp_uid) {
- uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
+ if (bonustype == DMU_OT_ZNODE) {
+ *userp = znp->zp_uid;
+ *groupp = znp->zp_gid;
} else {
- uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
- uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
- }
+ int hdrsize;
- if (oldznp->zp_gid == newznp->zp_gid) {
- uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
- } else {
- uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
- uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
+ ASSERT(bonustype == DMU_OT_SA);
+ hdrsize = sa_hdrsize(data);
+
+ if (hdrsize != 0) {
+ *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_UID_OFFSET));
+ *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_GID_OFFSET));
+ } else {
+ /*
+ * This should only happen for newly created
+ * files that haven't had the znode data filled
+ * in yet.
+ */
+ *userp = 0;
+ *groupp = 0;
+ }
}
+ return (error);
}
+#ifdef HAVE_ZPL
static void
fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
char *domainbuf, int buflen, uid_t *ridp)
{
- extern uint64_t strtonum(const char *str, char **nptr);
uint64_t fuid;
const char *domain;
return (zfsvfs->z_userquota_obj);
case ZFS_PROP_GROUPQUOTA:
return (zfsvfs->z_groupquota_obj);
+ default:
+ return (ENOTSUP);
}
return (0);
}
zap_cursor_fini(&zc);
return (error);
}
+EXPORT_SYMBOL(zfs_userspace_many);
/*
* buf must be big enough (eg, 32 bytes)
err = 0;
return (err);
}
+EXPORT_SYMBOL(zfs_userspace_one);
int
zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
dmu_tx_commit(tx);
return (err);
}
+EXPORT_SYMBOL(zfs_set_userquota);
boolean_t
-zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
+zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
char buf[32];
uint64_t used, quota, usedobj, quotaobj;
return (B_FALSE);
return (used >= quota);
}
+EXPORT_SYMBOL(zfs_fuid_overquota);
+
+boolean_t
+zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
+{
+ uint64_t fuid;
+ uint64_t quotaobj;
+
+ quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+ fuid = isgroup ? zp->z_gid : zp->z_uid;
+
+ if (quotaobj == 0 || zfsvfs->z_replay)
+ return (B_FALSE);
+
+ return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
+}
+EXPORT_SYMBOL(zfs_owner_overquota);
int
-zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
{
objset_t *os;
zfsvfs_t *zfsvfs;
uint64_t zval;
int i, error;
+ uint64_t sa_obj;
- if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
- return (error);
- if (zval)
- mode |= DS_MODE_READONLY;
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- if (error == EROFS) {
- mode |= DS_MODE_READONLY;
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- }
- if (error)
+ /*
+ * We claim to always be readonly so we can open snapshots;
+ * other ZPL code will prevent us from writing to snapshots.
+ */
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ if (error) {
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
+ }
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
- zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
zfsvfs->z_vfs = NULL;
zfsvfs->z_parent = zfsvfs;
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
if (error) {
goto out;
- } else if (zfsvfs->z_version > ZPL_VERSION) {
- (void) printf("Mismatched versions: File system "
- "is version %llu on-disk format, which is "
- "incompatible with this software version %lld!",
- (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
+ } else if (zfsvfs->z_version >
+ zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+ (void) printk("Can't mount a version %lld file system "
+ "on a version %lld pool\n. Pool must be upgraded to mount "
+ "this file system.", (u_longlong_t)zfsvfs->z_version,
+ (u_longlong_t)spa_version(dmu_objset_spa(os)));
error = ENOTSUP;
goto out;
}
-
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
goto out;
zfsvfs->z_norm = (int)zval;
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+ if (zfsvfs->z_use_sa) {
+ /* should either have both of these objects or none */
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+ &sa_obj);
+ if (error)
+ return (error);
+ } else {
+ /*
+ * Pre SA versions file systems should never touch
+ * either the attribute registration or layout objects.
+ */
+ sa_obj = 0;
+ }
+
+ error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+ &zfsvfs->z_attr_table);
+ if (error)
+ goto out;
+
+ if (zfsvfs->z_version >= ZPL_VERSION_SA)
+ sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
&zfsvfs->z_root);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
- *zvp = zfsvfs;
+ *zfvp = zfsvfs;
return (0);
out:
- dmu_objset_close(os);
- *zvp = NULL;
+ dmu_objset_disown(os, zfsvfs);
+ *zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
}
/*
* Set the objset user_ptr to track its zfsvfs.
*/
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
- if (zil_disable) {
- zil_destroy(zfsvfs->z_log, 0);
- zfsvfs->z_log = NULL;
- }
/*
* If we are not mounting (ie: online recv), then we don't
else
zfs_unlinked_drain(zfsvfs);
- if (zfsvfs->z_log) {
- /*
- * Parse and replay the intent log.
- *
- * Because of ziltest, this must be done after
- * zfs_unlinked_drain(). (Further note: ziltest
- * doesn't use readonly mounts, where
- * zfs_unlinked_drain() isn't called.) This is because
- * ziltest causes spa_sync() to think it's committed,
- * but actually it is not, so the intent log contains
- * many txg's worth of changes.
- *
- * In particular, if object N is in the unlinked set in
- * the last txg to actually sync, then it could be
- * actually freed in a later txg and then reallocated
- * in a yet later txg. This would write a "create
- * object N" record to the intent log. Normally, this
- * would be fine because the spa_sync() would have
- * written out the fact that object N is free, before
- * we could write the "create object N" intent log
- * record.
- *
- * But when we are in ziltest mode, we advance the "open
- * txg" without actually spa_sync()-ing the changes to
- * disk. So we would see that object N is still
- * allocated and in the unlinked set, and there is an
- * intent log record saying to allocate it.
- */
- zfsvfs->z_replay = B_TRUE;
- zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
- zfsvfs->z_replay = B_FALSE;
+ /*
+ * Parse and replay the intent log.
+ *
+ * Because of ziltest, this must be done after
+ * zfs_unlinked_drain(). (Further note: ziltest
+ * doesn't use readonly mounts, where
+ * zfs_unlinked_drain() isn't called.) This is because
+ * ziltest causes spa_sync() to think it's committed,
+ * but actually it is not, so the intent log contains
+ * many txg's worth of changes.
+ *
+ * In particular, if object N is in the unlinked set in
+ * the last txg to actually sync, then it could be
+ * actually freed in a later txg and then reallocated
+ * in a yet later txg. This would write a "create
+ * object N" record to the intent log. Normally, this
+ * would be fine because the spa_sync() would have
+ * written out the fact that object N is free, before
+ * we could write the "create object N" intent log
+ * record.
+ *
+ * But when we are in ziltest mode, we advance the "open
+ * txg" without actually spa_sync()-ing the changes to
+ * disk. So we would see that object N is still
+ * allocated and in the unlinked set, and there is an
+ * intent log record saying to allocate it.
+ */
+ if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+ if (zil_replay_disable) {
+ zil_destroy(zfsvfs->z_log, B_FALSE);
+ } else {
+ zfsvfs->z_replay = B_TRUE;
+ zil_replay(zfsvfs->z_os, zfsvfs,
+ zfs_replay_vector);
+ zfsvfs->z_replay = B_FALSE;
+ }
}
zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
}
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
}
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
}
-static int
+int
zfs_domount(vfs_t *vfsp, char *osname)
{
- dev_t mount_dev;
uint64_t recordsize, fsid_guid;
int error = 0;
zfsvfs_t *zfsvfs;
ASSERT(vfsp);
ASSERT(osname);
- error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+ error = zfsvfs_create(osname, &zfsvfs);
if (error)
return (error);
zfsvfs->z_vfs = vfsp;
vfsp->vfs_bcount = 0;
vfsp->vfs_data = NULL;
- if (zfs_create_unique_device(&mount_dev) == -1) {
- error = ENODEV;
- goto out;
- }
- ASSERT(vfs_devismounted(mount_dev) == 0);
-
- if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
- NULL))
+ if ((error = dsl_prop_get_integer(osname, "recordsize",
+ &recordsize, NULL)))
goto out;
- vfsp->vfs_dev = mount_dev;
- vfsp->vfs_fstype = zfsfstype;
vfsp->vfs_bsize = recordsize;
vfsp->vfs_flag |= VFS_NOTRUNC;
vfsp->vfs_data = zfsvfs;
fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
vfsp->vfs_fsid.val[0] = fsid_guid;
- vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
- zfsfstype & 0xFF;
+ vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8);
/*
* Set features for file system.
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
}
+ vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
uint64_t pval;
atime_changed_cb(zfsvfs, B_FALSE);
readonly_changed_cb(zfsvfs, B_TRUE);
- if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
+ if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL)))
goto out;
xattr_changed_cb(zfsvfs, pval);
zfsvfs->z_issnap = B_TRUE;
+ zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
} else {
error = zfsvfs_setup(zfsvfs, B_TRUE);
}
zfsctl_create(zfsvfs);
out:
if (error) {
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
zfsvfs_free(zfsvfs);
- } else {
- atomic_add_32(&zfs_active_fs_count, 1);
}
return (error);
}
+EXPORT_SYMBOL(zfs_domount);
void
zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
zfsvfs) == 0);
- VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
- zfsvfs) == 0);
-
VERIFY(dsl_prop_unregister(ds, "aclinherit",
acl_inherit_changed_cb, zfsvfs) == 0);
vscan_changed_cb, zfsvfs) == 0);
}
}
+EXPORT_SYMBOL(zfs_unregister_callbacks);
+#ifdef HAVE_MLSLABEL
/*
- * Convert a decimal digit string to a uint64_t integer.
- */
-static int
-str_to_uint64(char *str, uint64_t *objnum)
-{
- uint64_t num = 0;
-
- while (*str) {
- if (*str < '0' || *str > '9')
- return (EINVAL);
-
- num = num*10 + *str++ - '0';
- }
-
- *objnum = num;
- return (0);
-}
-
-/*
- * The boot path passed from the boot loader is in the form of
- * "rootpool-name/root-filesystem-object-number'. Convert this
- * string to a dataset name: "rootpool-name/root-filesystem-name".
+ * zfs_check_global_label:
+ * Check that the hex label string is appropriate for the dataset
+ * being mounted into the global_zone proper.
+ *
+ * Return an error if the hex label string is not default or
+ * admin_low/admin_high. For admin_low labels, the corresponding
+ * dataset must be readonly.
*/
-static int
-zfs_parse_bootfs(char *bpath, char *outpath)
+int
+zfs_check_global_label(const char *dsname, const char *hexsl)
{
- char *slashp;
- uint64_t objnum;
- int error;
-
- if (*bpath == 0 || *bpath == '/')
- return (EINVAL);
-
- (void) strcpy(outpath, bpath);
-
- slashp = strchr(bpath, '/');
-
- /* if no '/', just return the pool name */
- if (slashp == NULL) {
+ if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
return (0);
- }
-
- /* if not a number, just return the root dataset name */
- if (str_to_uint64(slashp+1, &objnum)) {
- return (0);
- }
-
- *slashp = '\0';
- error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
- *slashp = '/';
-
- return (error);
-}
-
-static int
-zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
-{
- int error = 0;
- static int zfsrootdone = 0;
- zfsvfs_t *zfsvfs = NULL;
- znode_t *zp = NULL;
- vnode_t *vp = NULL;
- char *zfs_bootfs;
- char *zfs_devid;
-
- ASSERT(vfsp);
-
- /*
- * The filesystem that we mount as root is defined in the
- * boot property "zfs-bootfs" with a format of
- * "poolname/root-dataset-objnum".
- */
- if (why == ROOT_INIT) {
- if (zfsrootdone++)
- return (EBUSY);
- /*
- * the process of doing a spa_load will require the
- * clock to be set before we could (for example) do
- * something better by looking at the timestamp on
- * an uberblock, so just set it to -1.
- */
- clkset(-1);
-
- if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
- cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
- "bootfs name");
- return (EINVAL);
- }
- zfs_devid = spa_get_bootprop("diskdevid");
- error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
- if (zfs_devid)
- spa_free_bootprop(zfs_devid);
- if (error) {
- spa_free_bootprop(zfs_bootfs);
- cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
- error);
- return (error);
- }
- if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
- spa_free_bootprop(zfs_bootfs);
- cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
- error);
- return (error);
- }
-
- spa_free_bootprop(zfs_bootfs);
-
- if (error = vfs_lock(vfsp))
- return (error);
-
- if (error = zfs_domount(vfsp, rootfs.bo_name)) {
- cmn_err(CE_NOTE, "zfs_domount: error %d", error);
- goto out;
- }
-
- zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
- ASSERT(zfsvfs);
- if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
- cmn_err(CE_NOTE, "zfs_zget: error %d", error);
- goto out;
- }
-
- vp = ZTOV(zp);
- mutex_enter(&vp->v_lock);
- vp->v_flag |= VROOT;
- mutex_exit(&vp->v_lock);
- rootvp = vp;
-
- /*
- * Leave rootvp held. The root file system is never unmounted.
- */
-
- vfs_add((struct vnode *)0, vfsp,
- (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
-out:
- vfs_unlock(vfsp);
- return (error);
- } else if (why == ROOT_REMOUNT) {
- readonly_changed_cb(vfsp->vfs_data, B_FALSE);
- vfsp->vfs_flag |= VFS_REMOUNT;
-
- /* refresh mount options */
- zfs_unregister_callbacks(vfsp->vfs_data);
- return (zfs_register_callbacks(vfsp));
-
- } else if (why == ROOT_UNMOUNT) {
- zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
- (void) zfs_sync(vfsp, 0, 0);
+ if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
return (0);
+ if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
+ /* must be readonly */
+ uint64_t rdonly;
+
+ if (dsl_prop_get_integer(dsname,
+ zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
+ return (EACCES);
+ return (rdonly ? 0 : EACCES);
}
-
- /*
- * if "why" is equal to anything else other than ROOT_INIT,
- * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
- */
- return (ENOTSUP);
+ return (EACCES);
}
+#endif /* HAVE_MLSLABEL */
-/*ARGSUSED*/
-static int
-zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
-{
- char *osname;
- pathname_t spn;
- int error = 0;
- uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ?
- UIO_SYSSPACE : UIO_USERSPACE;
- int canwrite;
-
- if (mvp->v_type != VDIR)
- return (ENOTDIR);
-
- mutex_enter(&mvp->v_lock);
- if ((uap->flags & MS_REMOUNT) == 0 &&
- (uap->flags & MS_OVERLAY) == 0 &&
- (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
- mutex_exit(&mvp->v_lock);
- return (EBUSY);
- }
- mutex_exit(&mvp->v_lock);
-
- /*
- * ZFS does not support passing unparsed data in via MS_DATA.
- * Users should use the MS_OPTIONSTR interface; this means
- * that all option parsing is already done and the options struct
- * can be interrogated.
- */
- if ((uap->flags & MS_DATA) && uap->datalen > 0)
- return (EINVAL);
-
- /*
- * Get the objset name (the "special" mount argument).
- */
- if (error = pn_get(uap->spec, fromspace, &spn))
- return (error);
-
- osname = spn.pn_path;
-
- /*
- * Check for mount privilege?
- *
- * If we don't have privilege then see if
- * we have local permission to allow it
- */
- error = secpolicy_fs_mount(cr, mvp, vfsp);
- if (error) {
- error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
- if (error == 0) {
- vattr_t vattr;
-
- /*
- * Make sure user is the owner of the mount point
- * or has sufficient privileges.
- */
-
- vattr.va_mask = AT_UID;
-
- if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
- goto out;
- }
-
- if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
- VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
- error = EPERM;
- goto out;
- }
-
- secpolicy_fs_mount_clearopts(cr, vfsp);
- } else {
- goto out;
- }
- }
-
- /*
- * Refuse to mount a filesystem if we are in a local zone and the
- * dataset is not visible.
- */
- if (!INGLOBALZONE(curproc) &&
- (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
- error = EPERM;
- goto out;
- }
-
- /*
- * When doing a remount, we simply refresh our temporary properties
- * according to those options set in the current VFS options.
- */
- if (uap->flags & MS_REMOUNT) {
- /* refresh mount options */
- zfs_unregister_callbacks(vfsp->vfs_data);
- error = zfs_register_callbacks(vfsp);
- goto out;
- }
-
- error = zfs_domount(vfsp, osname);
-
- /*
- * Add an extra VFS_HOLD on our parent vfs so that it can't
- * disappear due to a forced unmount.
- */
- if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
- VFS_HOLD(mvp->v_vfsp);
-
-out:
- pn_free(&spn);
- return (error);
-}
-
-static int
+int
zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
{
zfsvfs_t *zfsvfs = vfsp->vfs_data;
/*
* We're a zfs filesystem.
*/
- (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
+ (void) strcpy(statp->f_basetype, MNTTYPE_ZFS);
statp->f_flag = vf_to_stf(vfsp->vfs_flag);
ZFS_EXIT(zfsvfs);
return (0);
}
+EXPORT_SYMBOL(zfs_statvfs);
-static int
+int
zfs_root(vfs_t *vfsp, vnode_t **vpp)
{
zfsvfs_t *zfsvfs = vfsp->vfs_data;
ZFS_EXIT(zfsvfs);
return (error);
}
+EXPORT_SYMBOL(zfs_root);
/*
* Teardown the zfsvfs::z_os.
mutex_enter(&zfsvfs->z_znodes_lock);
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
zp = list_next(&zfsvfs->z_all_znodes, zp))
- if (zp->z_dbuf) {
+ if (zp->z_sa_hdl) {
ASSERT(ZTOV(zp)->v_count > 0);
zfs_znode_dmu_fini(zp);
}
/*
* Evict cached data
*/
- if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
- txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
- (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
- }
+ if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
+ if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
+ txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+ (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}
/*ARGSUSED*/
-static int
+int
zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
{
zfsvfs_t *zfsvfs = vfsp->vfs_data;
ret = secpolicy_fs_unmount(cr, vfsp);
if (ret) {
- ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
- ZFS_DELEG_PERM_MOUNT, cr);
- if (ret)
+ if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
+ ZFS_DELEG_PERM_MOUNT, cr))
return (ret);
}
/*
* Unset the objset user_ptr.
*/
- mutex_enter(&os->os->os_user_ptr_lock);
+ mutex_enter(&os->os_user_ptr_lock);
dmu_objset_set_user(os, NULL);
- mutex_exit(&os->os->os_user_ptr_lock);
+ mutex_exit(&os->os_user_ptr_lock);
/*
* Finally release the objset
*/
- dmu_objset_close(os);
+ dmu_objset_disown(os, zfsvfs);
}
/*
return (0);
}
+EXPORT_SYMBOL(zfs_umount);
-static int
+int
zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
{
zfsvfs_t *zfsvfs = vfsp->vfs_data;
gen_mask = -1ULL >> (64 - 8 * i);
dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
- if (err = zfs_zget(zfsvfs, object, &zp)) {
+ if ((err = zfs_zget(zfsvfs, object, &zp))) {
ZFS_EXIT(zfsvfs);
return (err);
}
- zp_gen = zp->z_phys->zp_gen & gen_mask;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+ sizeof (uint64_t));
+ zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
if (zp->z_unlinked || zp_gen != fid_gen) {
}
*vpp = ZTOV(zp);
+ if (*vpp)
+ zfs_inode_update(VTOZ(*vpp));
+
ZFS_EXIT(zfsvfs);
return (0);
}
+EXPORT_SYMBOL(zfs_vget);
/*
* Block out VOPs and close zfsvfs_t::z_os
* 'z_teardown_inactive_lock' write held.
*/
int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
{
int error;
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
return (error);
-
- *modep = zfsvfs->z_os->os_mode;
- if (name)
- dmu_objset_name(zfsvfs->z_os, name);
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
return (0);
}
+EXPORT_SYMBOL(zfs_suspend_fs);
/*
* Reopen zfsvfs_t::z_os and release VOPs.
*/
int
-zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
+zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
{
- int err;
+ int err, err2;
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
- err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
+ err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
+ &zfsvfs->z_os);
if (err) {
zfsvfs->z_os = NULL;
} else {
znode_t *zp;
+ uint64_t sa_obj = 0;
+
+ err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj);
+
+ if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
+ goto bail;
+
+
+ if ((err = sa_setup(zfsvfs->z_os, sa_obj,
+ zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0)
+ goto bail;
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
}
+bail:
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
}
return (err);
}
+EXPORT_SYMBOL(zfs_resume_fs);
static void
zfs_freevfs(vfs_t *vfsp)
{
zfsvfs_t *zfsvfs = vfsp->vfs_data;
- /*
- * If this is a snapshot, we have an extra VFS_HOLD on our parent
- * from zfs_mount(). Release it here.
- */
- if (zfsvfs->z_issnap)
- VFS_RELE(zfsvfs->z_parent->z_vfs);
-
zfsvfs_free(zfsvfs);
-
- atomic_add_32(&zfs_active_fs_count, -1);
-}
-
-/*
- * VFS_INIT() initialization. Note that there is no VFS_FINI(),
- * so we can't safely do any non-idempotent initialization here.
- * Leave that to zfs_init() and zfs_fini(), which are called
- * from the module's _init() and _fini() entry points.
- */
-/*ARGSUSED*/
-static int
-zfs_vfsinit(int fstype, char *name)
-{
- int error;
-
- zfsfstype = fstype;
-
- /*
- * Setup vfsops and vnodeops tables.
- */
- error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
- if (error != 0) {
- cmn_err(CE_WARN, "zfs: bad vfs ops template");
- }
-
- error = zfs_create_op_tables();
- if (error) {
- zfs_remove_op_tables();
- cmn_err(CE_WARN, "zfs: bad vnode ops template");
- (void) vfs_freevfsops_by_type(zfsfstype);
- return (error);
- }
-
- mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
-
- /*
- * Unique major number for all zfs mounts.
- * If we run out of 32-bit minors, we'll getudev() another major.
- */
- zfs_major = ddi_name_to_major(ZFS_DRIVER);
- zfs_minor = ZFS_MIN_MINOR;
-
- return (0);
}
+#endif /* HAVE_ZPL */
void
zfs_init(void)
{
- /*
- * Initialize .zfs directory structures
- */
zfsctl_init();
-
- /*
- * Initialize znode cache, vnode ops, etc...
- */
zfs_znode_init();
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
zfs_znode_fini();
}
-int
-zfs_busy(void)
-{
- return (zfs_active_fs_count != 0);
-}
-
+#ifdef HAVE_ZPL
int
zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
{
if (newvers < zfsvfs->z_version)
return (EINVAL);
+ if (zfs_spa_version_map(newvers) >
+ spa_version(dmu_objset_spa(zfsvfs->z_os)))
+ return (ENOTSUP);
+
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+ ZFS_SA_ATTRS);
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+ }
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
}
+
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &newvers, tx);
return (error);
}
- spa_history_internal_log(LOG_DS_UPGRADE,
- dmu_objset_spa(os), tx, CRED(),
- "oldver=%llu newver=%llu dataset = %llu",
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ uint64_t sa_obj;
+
+ ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+ SPA_VERSION_SA);
+ sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+ DMU_OT_NONE, 0, tx);
+
+ error = zap_add(os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+ ASSERT3U(error, ==, 0);
+
+ VERIFY(0 == sa_set_sa_object(os, sa_obj));
+ sa_register_update_callback(os, zfs_sa_upgrade);
+ }
+
+ spa_history_log_internal(LOG_DS_UPGRADE,
+ dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
zfsvfs->z_version, newvers, dmu_objset_id(os));
dmu_tx_commit(tx);
return (0);
}
+EXPORT_SYMBOL(zfs_set_version);
+#endif /* HAVE_ZPL */
/*
* Read a property stored within the master node.
}
return (error);
}
-
-static vfsdef_t vfw = {
- VFSDEF_VERSION,
- MNTTYPE_ZFS,
- zfs_vfsinit,
- VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
- VSW_XID,
- &zfs_mntopts
-};
-
-struct modlfs zfs_modlfs = {
- &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
-};