* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/spa.h>
#include <sys/zfs_fuid.h>
#include <sys/ddi.h>
+#include <sys/dsl_dataset.h>
/*
- * All the functions in this file are used to construct the log entries
- * to record transactions. They allocate * an intent log transaction
- * structure (itx_t) and save within it all the information necessary to
- * possibly replay the transaction. The itx is then assigned a sequence
- * number and inserted in the in-memory list anchored in the zilog.
+ * These zfs_log_* functions must be called within a dmu tx, in one
+ * of 2 contexts depending on zilog->z_replay:
+ *
+ * Non replay mode
+ * ---------------
+ * We need to record the transaction so that if it is committed to
+ * the Intent Log then it can be replayed. An intent log transaction
+ * structure (itx_t) is allocated and all the information necessary to
+ * possibly replay the transaction is saved in it. The itx is then assigned
+ * a sequence number and inserted in the in-memory list anchored in the zilog.
+ *
+ * Replay mode
+ * -----------
+ * We need to mark the intent log record as replayed in the log header.
+ * This is done in the same transaction as the replay so that they
+ * commit atomically.
*/
int
zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
{
- int isxvattr = (vap->va_mask & AT_XVATTR);
+ int isxvattr = (vap->va_mask & ATTR_XVATTR);
switch (type) {
case Z_FILE:
if (vsecp == NULL && !isxvattr)
ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
+ *attrs |= (xoap->xoa_reparse == 0) ? 0 :
+ XAT0_REPARSE;
+ if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
+ *attrs |= (xoap->xoa_offline == 0) ? 0 :
+ XAT0_OFFLINE;
+ if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
+ *attrs |= (xoap->xoa_sparse == 0) ? 0 :
+ XAT0_SPARSE;
}
static void *
zfs_fuid_info_t *fuidp, vattr_t *vap)
{
itx_t *itx;
- uint64_t seq;
lr_create_t *lr;
lr_acl_create_t *lracl;
- size_t aclsize;
+ xvattr_t *xvap = (xvattr_t *)vap;
+ size_t aclsize = 0;
size_t xvatsize = 0;
size_t txsize;
- xvattr_t *xvap = (xvattr_t *)vap;
void *end;
size_t lrsize;
size_t namesize = strlen(name) + 1;
size_t fuidsz = 0;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
/*
fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
}
- if (vap->va_mask & AT_XVATTR)
+ if (vap->va_mask & ATTR_XVATTR)
xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) {
- lr->lr_uid = (uint64_t)zp->z_phys->zp_uid;
+ lr->lr_mode = zp->z_mode;
+ if (!IS_EPHEMERAL(zp->z_uid)) {
+ lr->lr_uid = (uint64_t)zp->z_uid;
} else {
lr->lr_uid = fuidp->z_fuid_owner;
}
- if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) {
- lr->lr_gid = (uint64_t)zp->z_phys->zp_gid;
+ if (!IS_EPHEMERAL(zp->z_gid)) {
+ lr->lr_gid = (uint64_t)zp->z_gid;
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
- lr->lr_rdev = zp->z_phys->zp_rdev;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
+ sizeof (uint64_t));
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+ lr->lr_crtime, sizeof (uint64_t) * 2);
+
+ if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev,
+ sizeof (lr->lr_rdev)) != 0)
+ lr->lr_rdev = 0;
/*
* Fill in xvattr info if any
*/
- if (vap->va_mask & AT_XVATTR) {
+ if (vap->va_mask & ATTR_XVATTR) {
zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
end = (caddr_t)lr + lrsize + xvatsize;
} else {
*/
bcopy(name, end, namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
*/
void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, char *name)
+ znode_t *dzp, char *name, uint64_t foid)
{
itx_t *itx;
- uint64_t seq;
lr_remove_t *lr;
size_t namesize = strlen(name) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr->lr_doid = dzp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
+ itx->itx_oid = foid;
+
+ zil_itx_assign(zilog, itx, tx);
}
/*
znode_t *dzp, znode_t *zp, char *name)
{
itx_t *itx;
- uint64_t seq;
lr_link_t *lr;
size_t namesize = strlen(name) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr->lr_link_obj = zp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
znode_t *dzp, znode_t *zp, char *name, char *link)
{
itx_t *itx;
- uint64_t seq;
lr_create_t *lr;
size_t namesize = strlen(name) + 1;
size_t linksize = strlen(link) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- lr->lr_uid = zp->z_phys->zp_uid;
- lr->lr_gid = zp->z_phys->zp_gid;
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
+ lr->lr_uid = zp->z_uid;
+ lr->lr_gid = zp->z_gid;
+ lr->lr_mode = zp->z_mode;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
+ sizeof (uint64_t));
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+ lr->lr_crtime, sizeof (uint64_t) * 2);
bcopy(name, (char *)(lr + 1), namesize);
bcopy(link, (char *)(lr + 1) + namesize, linksize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
{
itx_t *itx;
- uint64_t seq;
lr_rename_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr->lr_tdoid = tdzp->z_id;
bcopy(sname, (char *)(lr + 1), snamesize);
bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
+ itx->itx_oid = szp->z_id;
- seq = zil_itx_assign(zilog, itx, tx);
- sdzp->z_last_itx = seq;
- tdzp->z_last_itx = seq;
- szp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
*/
ssize_t zfs_immediate_write_sz = 32768;
-#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
- sizeof (lr_write_t))
-
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
itx_wr_state_t write_state;
boolean_t slogging;
uintptr_t fsync_cnt;
+ ssize_t immediate_write_sz;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- /*
- * Writes are handled in three different ways:
- *
- * WR_INDIRECT:
- * In this mode, if we need to commit the write later, then the block
- * is immediately written into the file system (using dmu_sync),
- * and a pointer to the block is put into the log record.
- * When the txg commits the block is linked in.
- * This saves additionally writing the data into the log record.
- * There are a few requirements for this to occur:
- * - write is greater than zfs_immediate_write_sz
- * - not using slogs (as slogs are assumed to always be faster
- * than writing into the main pool)
- * - the write occupies only one block
- * WR_COPIED:
- * If we know we'll immediately be committing the
- * transaction (FSYNC or FDSYNC), the we allocate a larger
- * log record here for the data and copy the data in.
- * WR_NEED_COPY:
- * Otherwise we don't allocate a buffer, and *if* we need to
- * flush the write later then a buffer is allocated and
- * we retrieve the data using the dmu.
- */
- slogging = spa_has_slogs(zilog->zl_spa);
- if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zfs_immediate_write_sz;
+
+ slogging = spa_has_slogs(zilog->zl_spa) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+ if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
itx = zil_itx_create(txtype, sizeof (*lr) +
(write_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
- zp->z_id, off, len, lr + 1) != 0) {
- kmem_free(itx, offsetof(itx_t, itx_lr) +
- itx->itx_lr.lrc_reclen);
+ if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
+ zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
+ zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
write_state = WR_NEED_COPY;
lr->lr_blkoff = 0;
BP_ZERO(&lr->lr_blkptr);
- itx->itx_private = zp->z_zfsvfs;
+ itx->itx_private = ZTOZSB(zp);
- if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) ||
- (ioflag & (FSYNC | FDSYNC)))
- itx->itx_sync = B_TRUE;
- else
+ if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
+ (fsync_cnt == 0))
itx->itx_sync = B_FALSE;
- zp->z_last_itx = zil_itx_assign(zilog, itx, tx);
+ zil_itx_assign(zilog, itx, tx);
off += len;
resid -= len;
znode_t *zp, uint64_t off, uint64_t len)
{
itx_t *itx;
- uint64_t seq;
lr_truncate_t *lr;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
itx = zil_itx_create(txtype, sizeof (*lr));
lr->lr_length = len;
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
*/
void
zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
+ znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
- uint64_t seq;
lr_setattr_t *lr;
xvattr_t *xvap = (xvattr_t *)vap;
size_t recsize = sizeof (lr_setattr_t);
void *start;
-
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
/*
* for lr_attr_t + xvattr mask, mapsize and create time
* plus actual attribute values
*/
- if (vap->va_mask & AT_XVATTR)
+ if (vap->va_mask & ATTR_XVATTR)
recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
if (fuidp)
lr->lr_foid = zp->z_id;
lr->lr_mask = (uint64_t)mask_applied;
lr->lr_mode = (uint64_t)vap->va_mode;
- if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid))
+ if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(vap->va_uid))
lr->lr_uid = fuidp->z_fuid_owner;
else
lr->lr_uid = (uint64_t)vap->va_uid;
- if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid))
+ if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(vap->va_gid))
lr->lr_gid = fuidp->z_fuid_group;
else
lr->lr_gid = (uint64_t)vap->va_gid;
ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
start = (lr_setattr_t *)(lr + 1);
- if (vap->va_mask & AT_XVATTR) {
+ if (vap->va_mask & ATTR_XVATTR) {
zfs_log_xvattr((lr_attr_t *)start, xvap);
start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
}
(void) zfs_log_fuid_domains(fuidp, start);
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
- uint64_t seq;
lr_acl_v0_t *lrv0;
lr_acl_t *lr;
int txtype;
size_t txsize;
size_t aclbytes = vsecp->vsa_aclentsz;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ?
+ txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ?
TX_ACL_V0 : TX_ACL;
if (txtype == TX_ACL)
}
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}