*
* CDDL HEADER END
*/
+
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/* Portions Copyright 2010 Robert Milkowski */
+
#ifndef _SYS_FS_ZFS_H
#define _SYS_FS_ZFS_H
+#include <sys/time.h>
+
#ifdef __cplusplus
extern "C" {
#endif
ZFS_PROP_READONLY,
ZFS_PROP_ZONED,
ZFS_PROP_SNAPDIR,
- ZFS_PROP_ACLMODE,
+ ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
ZFS_PROP_CREATETXG, /* not exposed to the user */
ZFS_PROP_NAME, /* not exposed to the user */
ZFS_PROP_CANMOUNT,
- ZFS_PROP_SHAREISCSI,
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
ZFS_PROP_XATTR,
ZFS_PROP_NUMCLONES, /* not exposed to the user */
ZFS_PROP_USEDREFRESERV,
ZFS_PROP_USERACCOUNTING, /* not exposed to the user */
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
+ ZFS_PROP_DEFER_DESTROY,
+ ZFS_PROP_USERREFS,
+ ZFS_PROP_LOGBIAS,
+ ZFS_PROP_UNIQUE, /* not exposed to the user */
+ ZFS_PROP_OBJSETID, /* not exposed to the user */
+ ZFS_PROP_DEDUP,
+ ZFS_PROP_MLSLABEL,
+ ZFS_PROP_SYNC,
ZFS_NUM_PROPS
} zfs_prop_t;
typedef enum {
ZPOOL_PROP_NAME,
ZPOOL_PROP_SIZE,
- ZPOOL_PROP_USED,
- ZPOOL_PROP_AVAILABLE,
ZPOOL_PROP_CAPACITY,
ZPOOL_PROP_ALTROOT,
ZPOOL_PROP_HEALTH,
ZPOOL_PROP_FAILUREMODE,
ZPOOL_PROP_LISTSNAPS,
ZPOOL_PROP_AUTOEXPAND,
+ ZPOOL_PROP_DEDUPDITTO,
+ ZPOOL_PROP_DEDUPRATIO,
+ ZPOOL_PROP_FREE,
+ ZPOOL_PROP_ALLOCATED,
+ ZPOOL_PROP_READONLY,
ZPOOL_NUM_PROPS
} zpool_prop_t;
ZPROP_SRC_DEFAULT = 0x2,
ZPROP_SRC_TEMPORARY = 0x4,
ZPROP_SRC_LOCAL = 0x8,
- ZPROP_SRC_INHERITED = 0x10
+ ZPROP_SRC_INHERITED = 0x10,
+ ZPROP_SRC_RECEIVED = 0x20
} zprop_source_t;
-#define ZPROP_SRC_ALL 0x1f
+#define ZPROP_SRC_ALL 0x3f
+
+#define ZPROP_SOURCE_VAL_RECVD "$recvd"
+#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS"
+/*
+ * Dataset flag implemented as a special entry in the props zap object
+ * indicating that the dataset has received properties on or after
+ * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
+ * just as it did in earlier versions, and thereafter, local properties are
+ * preserved.
+ */
+#define ZPROP_HAS_RECVD "$hasrecvd"
+
+typedef enum {
+ ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
+ ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
+} zprop_errflags_t;
typedef int (*zprop_func)(int, void *);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
-boolean_t zfs_prop_userquota(const char *name);
+boolean_t zfs_prop_userquota(const char *);
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
+uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
/*
boolean_t zpool_prop_readonly(zpool_prop_t);
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
+uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
/*
* Definitions for the Delegation.
#define ZFS_DELEG_PERM_GID "gid"
#define ZFS_DELEG_PERM_GROUPS "groups"
+#define ZFS_MLSLABEL_DEFAULT "none"
+
#define ZFS_SMB_ACL_SRC "src"
#define ZFS_SMB_ACL_TARGET "target"
ZFS_CANMOUNT_NOAUTO = 2
} zfs_canmount_type_t;
+typedef enum {
+ ZFS_LOGBIAS_LATENCY = 0,
+ ZFS_LOGBIAS_THROUGHPUT = 1
+} zfs_logbias_op_t;
+
typedef enum zfs_share_op {
ZFS_SHARE_NFS = 0,
ZFS_UNSHARE_NFS = 1,
ZFS_CACHE_ALL = 2
} zfs_cache_type_t;
+typedef enum {
+ ZFS_SYNC_STANDARD = 0,
+ ZFS_SYNC_ALWAYS = 1,
+ ZFS_SYNC_DISABLED = 2
+} zfs_sync_type_t;
+
/*
* On-disk version number.
#define SPA_VERSION_14 14ULL
#define SPA_VERSION_15 15ULL
#define SPA_VERSION_16 16ULL
+#define SPA_VERSION_17 17ULL
+#define SPA_VERSION_18 18ULL
+#define SPA_VERSION_19 19ULL
+#define SPA_VERSION_20 20ULL
+#define SPA_VERSION_21 21ULL
+#define SPA_VERSION_22 22ULL
+#define SPA_VERSION_23 23ULL
+#define SPA_VERSION_24 24ULL
+#define SPA_VERSION_25 25ULL
+#define SPA_VERSION_26 26ULL
+#define SPA_VERSION_27 27ULL
+#define SPA_VERSION_28 28ULL
+
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
-#define SPA_VERSION SPA_VERSION_16
-#define SPA_VERSION_STRING "16"
+#define SPA_VERSION SPA_VERSION_28
+#define SPA_VERSION_STRING "28"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
#define SPA_VERSION_INITIAL SPA_VERSION_1
#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2
#define SPA_VERSION_SPARES SPA_VERSION_3
-#define SPA_VERSION_RAID6 SPA_VERSION_3
-#define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3
+#define SPA_VERSION_RAIDZ2 SPA_VERSION_3
+#define SPA_VERSION_BPOBJ_ACCOUNT SPA_VERSION_3
#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
#define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
#define SPA_VERSION_USERSPACE SPA_VERSION_15
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
+#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
+#define SPA_VERSION_USERREFS SPA_VERSION_18
+#define SPA_VERSION_HOLES SPA_VERSION_19
+#define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20
+#define SPA_VERSION_DEDUP SPA_VERSION_21
+#define SPA_VERSION_RECVD_PROPS SPA_VERSION_22
+#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23
+#define SPA_VERSION_SA SPA_VERSION_24
+#define SPA_VERSION_SCAN SPA_VERSION_25
+#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
+#define SPA_VERSION_DEADLISTS SPA_VERSION_26
+#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
+#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
#define ZPL_VERSION_2 2ULL
#define ZPL_VERSION_3 3ULL
#define ZPL_VERSION_4 4ULL
-#define ZPL_VERSION ZPL_VERSION_4
-#define ZPL_VERSION_STRING "4"
+#define ZPL_VERSION_5 5ULL
+#define ZPL_VERSION ZPL_VERSION_5
+#define ZPL_VERSION_STRING "5"
#define ZPL_VERSION_INITIAL ZPL_VERSION_1
#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2
#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3
#define ZPL_VERSION_SYSATTR ZPL_VERSION_3
#define ZPL_VERSION_USERSPACE ZPL_VERSION_4
+#define ZPL_VERSION_SA ZPL_VERSION_5
+
+/* Rewind request information */
+#define ZPOOL_NO_REWIND 1 /* No policy - default behavior */
+#define ZPOOL_NEVER_REWIND 2 /* Do not search for best txg or rewind */
+#define ZPOOL_TRY_REWIND 4 /* Search for best txg, but do not rewind */
+#define ZPOOL_DO_REWIND 8 /* Rewind to best txg w/in deferred frees */
+#define ZPOOL_EXTREME_REWIND 16 /* Allow extreme measures to find best txg */
+#define ZPOOL_REWIND_MASK 28 /* All the possible rewind bits */
+#define ZPOOL_REWIND_POLICIES 31 /* All the possible policy bits */
+
+typedef struct zpool_rewind_policy {
+ uint32_t zrp_request; /* rewind behavior requested */
+ uint64_t zrp_maxmeta; /* max acceptable meta-data errors */
+ uint64_t zrp_maxdata; /* max acceptable data errors */
+ uint64_t zrp_txg; /* specific txg to load */
+} zpool_rewind_policy_t;
/*
* The following are configuration names used in the nvlist describing a pool's
#define ZPOOL_CONFIG_ASHIFT "ashift"
#define ZPOOL_CONFIG_ASIZE "asize"
#define ZPOOL_CONFIG_DTL "DTL"
-#define ZPOOL_CONFIG_STATS "stats"
+#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
+#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
#define ZPOOL_CONFIG_NPARITY "nparity"
#define ZPOOL_CONFIG_HOSTID "hostid"
#define ZPOOL_CONFIG_HOSTNAME "hostname"
+#define ZPOOL_CONFIG_LOADED_TIME "initial_load_time"
#define ZPOOL_CONFIG_UNSPARE "unspare"
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
#define ZPOOL_CONFIG_IS_LOG "is_log"
#define ZPOOL_CONFIG_L2CACHE "l2cache"
+#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array"
+#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children"
+#define ZPOOL_CONFIG_IS_HOLE "is_hole"
+#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram"
+#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats"
+#define ZPOOL_CONFIG_DDT_STATS "ddt_stats"
+#define ZPOOL_CONFIG_SPLIT "splitcfg"
+#define ZPOOL_CONFIG_ORIG_GUID "orig_guid"
+#define ZPOOL_CONFIG_SPLIT_GUID "split_guid"
+#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
+#define ZPOOL_CONFIG_REMOVING "removing"
+#define ZPOOL_CONFIG_RESILVERING "resilvering"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
+#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
+#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
#define ZPOOL_CONFIG_DEGRADED "degraded"
#define ZPOOL_CONFIG_REMOVED "removed"
#define ZPOOL_CONFIG_FRU "fru"
+#define ZPOOL_CONFIG_AUX_STATE "aux_state"
+
+/* Rewind policy parameters */
+#define ZPOOL_REWIND_POLICY "rewind-policy"
+#define ZPOOL_REWIND_REQUEST "rewind-request"
+#define ZPOOL_REWIND_REQUEST_TXG "rewind-request-txg"
+#define ZPOOL_REWIND_META_THRESH "rewind-meta-thresh"
+#define ZPOOL_REWIND_DATA_THRESH "rewind-data-thresh"
+
+/* Rewind data discovered */
+#define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts"
+#define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors"
+#define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
#define VDEV_TYPE_DISK "disk"
#define VDEV_TYPE_FILE "file"
#define VDEV_TYPE_MISSING "missing"
+#define VDEV_TYPE_HOLE "hole"
#define VDEV_TYPE_SPARE "spare"
#define VDEV_TYPE_LOG "log"
#define VDEV_TYPE_L2CACHE "l2cache"
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
- VDEV_AUX_BAD_LOG /* cannot read log chain(s) */
+ VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
+ VDEV_AUX_EXTERNAL, /* external diagnosis */
+ VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
} vdev_aux_t;
/*
} pool_state_t;
/*
- * Scrub types.
+ * Scan Functions.
*/
-typedef enum pool_scrub_type {
- POOL_SCRUB_NONE,
- POOL_SCRUB_RESILVER,
- POOL_SCRUB_EVERYTHING,
- POOL_SCRUB_TYPES
-} pool_scrub_type_t;
+typedef enum pool_scan_func {
+ POOL_SCAN_NONE,
+ POOL_SCAN_SCRUB,
+ POOL_SCAN_RESILVER,
+ POOL_SCAN_FUNCS
+} pool_scan_func_t;
/*
* ZIO types. Needed to interpret vdev statistics below.
} zio_type_t;
/*
+ * Pool statistics. Note: all fields should be 64-bit because this
+ * is passed between kernel and userland as an nvlist uint64 array.
+ */
+typedef struct pool_scan_stat {
+ /* values stored on disk */
+ uint64_t pss_func; /* pool_scan_func_t */
+ uint64_t pss_state; /* dsl_scan_state_t */
+ uint64_t pss_start_time; /* scan start time */
+ uint64_t pss_end_time; /* scan end time */
+ uint64_t pss_to_examine; /* total bytes to scan */
+ uint64_t pss_examined; /* total examined bytes */
+ uint64_t pss_to_process; /* total bytes to process */
+ uint64_t pss_processed; /* total processed bytes */
+ uint64_t pss_errors; /* scan errors */
+
+ /* values not stored on disk */
+ uint64_t pss_pass_exam; /* examined bytes per scan pass */
+ uint64_t pss_pass_start; /* start time of a scan pass */
+} pool_scan_stat_t;
+
+typedef enum dsl_scan_state {
+ DSS_NONE,
+ DSS_SCANNING,
+ DSS_FINISHED,
+ DSS_CANCELED,
+ DSS_NUM_STATES
+} dsl_scan_state_t;
+
+
+/*
* Vdev statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
uint64_t vs_write_errors; /* write errors */
uint64_t vs_checksum_errors; /* checksum errors */
uint64_t vs_self_healed; /* self-healed bytes */
- uint64_t vs_scrub_type; /* pool_scrub_type_t */
- uint64_t vs_scrub_complete; /* completed? */
- uint64_t vs_scrub_examined; /* bytes examined; top */
- uint64_t vs_scrub_repaired; /* bytes repaired; leaf */
- uint64_t vs_scrub_errors; /* errors during scrub */
- uint64_t vs_scrub_start; /* UTC scrub start time */
- uint64_t vs_scrub_end; /* UTC scrub end time */
+ uint64_t vs_scan_removing; /* removing? */
+ uint64_t vs_scan_processed; /* scan processed bytes */
} vdev_stat_t;
+/*
+ * DDT statistics. Note: all fields should be 64-bit because this
+ * is passed between kernel and userland as an nvlist uint64 array.
+ */
+typedef struct ddt_object {
+ uint64_t ddo_count; /* number of elments in ddt */
+ uint64_t ddo_dspace; /* size of ddt on disk */
+ uint64_t ddo_mspace; /* size of ddt in-core */
+} ddt_object_t;
+
+typedef struct ddt_stat {
+ uint64_t dds_blocks; /* blocks */
+ uint64_t dds_lsize; /* logical size */
+ uint64_t dds_psize; /* physical size */
+ uint64_t dds_dsize; /* deflated allocated size */
+ uint64_t dds_ref_blocks; /* referenced blocks */
+ uint64_t dds_ref_lsize; /* referenced lsize * refcnt */
+ uint64_t dds_ref_psize; /* referenced psize * refcnt */
+ uint64_t dds_ref_dsize; /* referenced dsize * refcnt */
+} ddt_stat_t;
+
+typedef struct ddt_histogram {
+ ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */
+} ddt_histogram_t;
+
#define ZVOL_DRIVER "zvol"
#define ZFS_DRIVER "zfs"
#define ZFS_DEV "/dev/zfs"
-/*
- * zvol paths. Irritatingly, the devfsadm interfaces want all these
- * paths without the /dev prefix, but for some things, we want the
- * /dev prefix. Below are the names without /dev.
- */
-#define ZVOL_DEV_DIR "zvol/dsk"
-#define ZVOL_RDEV_DIR "zvol/rdsk"
+/* general zvol path */
+#define ZVOL_DIR "/dev"
-/*
- * And here are the things we need with /dev, etc. in front of them.
- */
-#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:"
-#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/"
+#define ZVOL_MAJOR 230
+#define ZVOL_MINOR_BITS 4
+#define ZVOL_MINOR_MASK ((1U << ZVOL_MINOR_BITS) - 1)
+#define ZVOL_MINORS (1 << 4)
#define ZVOL_PROP_NAME "name"
+#define ZVOL_DEFAULT_BLOCKSIZE 8192
/*
* /dev/zfs ioctl numbers.
ZFS_IOC_POOL_CONFIGS,
ZFS_IOC_POOL_STATS,
ZFS_IOC_POOL_TRYIMPORT,
- ZFS_IOC_POOL_SCRUB,
+ ZFS_IOC_POOL_SCAN,
ZFS_IOC_POOL_FREEZE,
ZFS_IOC_POOL_UPGRADE,
ZFS_IOC_POOL_GET_HISTORY,
ZFS_IOC_POOL_GET_PROPS,
ZFS_IOC_SET_FSACL,
ZFS_IOC_GET_FSACL,
- ZFS_IOC_ISCSI_PERM_CHECK,
ZFS_IOC_SHARE,
ZFS_IOC_INHERIT_PROP,
ZFS_IOC_SMB_ACL,
ZFS_IOC_USERSPACE_ONE,
ZFS_IOC_USERSPACE_MANY,
- ZFS_IOC_USERSPACE_UPGRADE
+ ZFS_IOC_USERSPACE_UPGRADE,
+ ZFS_IOC_HOLD,
+ ZFS_IOC_RELEASE,
+ ZFS_IOC_GET_HOLDS,
+ ZFS_IOC_OBJSET_RECVD_PROPS,
+ ZFS_IOC_VDEV_SPLIT,
+ ZFS_IOC_NEXT_OBJ,
+ ZFS_IOC_DIFF,
+ ZFS_IOC_TMP_SNAPSHOT,
+ ZFS_IOC_OBJ_TO_STATS,
+ ZFS_IOC_EVENTS_NEXT,
+ ZFS_IOC_EVENTS_CLEAR,
} zfs_ioc_t;
/*
* Internal SPA load state. Used by FMA diagnosis engine.
*/
typedef enum {
- SPA_LOAD_NONE, /* no load in progress */
- SPA_LOAD_OPEN, /* normal open */
- SPA_LOAD_IMPORT, /* import in progress */
- SPA_LOAD_TRYIMPORT /* tryimport in progress */
+ SPA_LOAD_NONE, /* no load in progress */
+ SPA_LOAD_OPEN, /* normal open */
+ SPA_LOAD_IMPORT, /* import in progress */
+ SPA_LOAD_TRYIMPORT, /* tryimport in progress */
+ SPA_LOAD_RECOVER, /* recovery requested */
+ SPA_LOAD_ERROR /* load failed */
} spa_load_state_t;
/*
#define ZFS_OFFLINE_TEMPORARY 0x1
/*
+ * Flags for ZFS_IOC_POOL_IMPORT
+ */
+#define ZFS_IMPORT_NORMAL 0x0
+#define ZFS_IMPORT_VERBATIM 0x1
+#define ZFS_IMPORT_ANY_HOST 0x2
+#define ZFS_IMPORT_MISSING_LOG 0x4
+#define ZFS_IMPORT_ONLY 0x8
+
+/*
* Sysevent payload members. ZFS will generate the following sysevents with the
* given payloads:
*
/*
* Note: This is encoded on-disk, so new events must be added to the
* end, and unused events can not be removed. Be sure to edit
- * zpool_main.c: hist_event_table[].
+ * libzfs_pool.c: hist_event_table[].
*/
typedef enum history_internal_events {
LOG_NO_EVENT = 0,
LOG_POOL_VDEV_OFFLINE,
LOG_POOL_UPGRADE,
LOG_POOL_CLEAR,
- LOG_POOL_SCRUB,
+ LOG_POOL_SCAN,
LOG_POOL_PROPSET,
LOG_DS_CREATE,
LOG_DS_CLONE,
LOG_DS_UPGRADE,
LOG_DS_REFQUOTA,
LOG_DS_REFRESERV,
- LOG_POOL_SCRUB_DONE,
+ LOG_POOL_SCAN_DONE,
+ LOG_DS_USER_HOLD,
+ LOG_DS_USER_RELEASE,
+ LOG_POOL_SPLIT,
LOG_END
} history_internal_events_t;