X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_log.c;h=cbd6f1cb41a6bb518db1d4ea2d3965f27e66fde1;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=11cd4c264b5734929f24561dc2b4f3d3b199d298;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 11cd4c2..cbd6f1c 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -19,10 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ + #include #include #include @@ -45,19 +45,31 @@ #include #include #include +#include /* - * All the functions in this file are used to construct the log entries - * to record transactions. They allocate * an intent log transaction - * structure (itx_t) and save within it all the information necessary to - * possibly replay the transaction. The itx is then assigned a sequence - * number and inserted in the in-memory list anchored in the zilog. + * These zfs_log_* functions must be called within a dmu tx, in one + * of 2 contexts depending on zilog->z_replay: + * + * Non replay mode + * --------------- + * We need to record the transaction so that if it is committed to + * the Intent Log then it can be replayed. An intent log transaction + * structure (itx_t) is allocated and all the information necessary to + * possibly replay the transaction is saved in it. The itx is then assigned + * a sequence number and inserted in the in-memory list anchored in the zilog. + * + * Replay mode + * ----------- + * We need to mark the intent log record as replayed in the log header. + * This is done in the same transaction as the replay so that they + * commit atomically. */ int zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) { - int isxvattr = (vap->va_mask & AT_XVATTR); + int isxvattr = (vap->va_mask & ATTR_XVATTR); switch (type) { case Z_FILE: if (vsecp == NULL && !isxvattr) @@ -155,6 +167,15 @@ zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) + *attrs |= (xoap->xoa_reparse == 0) ? 0 : + XAT0_REPARSE; + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) + *attrs |= (xoap->xoa_offline == 0) ? 0 : + XAT0_OFFLINE; + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) + *attrs |= (xoap->xoa_sparse == 0) ? 0 : + XAT0_SPARSE; } static void * @@ -216,19 +237,18 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zfs_fuid_info_t *fuidp, vattr_t *vap) { itx_t *itx; - uint64_t seq; lr_create_t *lr; lr_acl_create_t *lracl; - size_t aclsize; + xvattr_t *xvap = (xvattr_t *)vap; + size_t aclsize = 0; size_t xvatsize = 0; size_t txsize; - xvattr_t *xvap = (xvattr_t *)vap; void *end; size_t lrsize; size_t namesize = strlen(name) + 1; size_t fuidsz = 0; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; /* @@ -240,7 +260,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); } - if (vap->va_mask & AT_XVATTR) + if (vap->va_mask & ATTR_XVATTR) xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || @@ -261,26 +281,30 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr = (lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; - lr->lr_mode = zp->z_phys->zp_mode; - if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) { - lr->lr_uid = (uint64_t)zp->z_phys->zp_uid; + lr->lr_mode = zp->z_mode; + if (!IS_EPHEMERAL(zp->z_uid)) { + lr->lr_uid = (uint64_t)zp->z_uid; } else { lr->lr_uid = fuidp->z_fuid_owner; } - if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) { - lr->lr_gid = (uint64_t)zp->z_phys->zp_gid; + if (!IS_EPHEMERAL(zp->z_gid)) { + lr->lr_gid = (uint64_t)zp->z_gid; } else { lr->lr_gid = fuidp->z_fuid_group; } - lr->lr_gen = zp->z_phys->zp_gen; - lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; - lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; - lr->lr_rdev = zp->z_phys->zp_rdev; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), + lr->lr_crtime, sizeof (uint64_t) * 2); + + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev, + sizeof (lr->lr_rdev)) != 0) + lr->lr_rdev = 0; /* * Fill in xvattr info if any */ - if (vap->va_mask & AT_XVATTR) { + if (vap->va_mask & ATTR_XVATTR) { zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); end = (caddr_t)lr + lrsize + xvatsize; } else { @@ -314,9 +338,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ bcopy(name, end, namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -324,14 +346,13 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name) + znode_t *dzp, char *name, uint64_t foid) { itx_t *itx; - uint64_t seq; lr_remove_t *lr; size_t namesize = strlen(name) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; itx = zil_itx_create(txtype, sizeof (*lr) + namesize); @@ -339,8 +360,9 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr->lr_doid = dzp->z_id; bcopy(name, (char *)(lr + 1), namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; + itx->itx_oid = foid; + + zil_itx_assign(zilog, itx, tx); } /* @@ -351,11 +373,10 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name) { itx_t *itx; - uint64_t seq; lr_link_t *lr; size_t namesize = strlen(name) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; itx = zil_itx_create(txtype, sizeof (*lr) + namesize); @@ -364,9 +385,7 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr->lr_link_obj = zp->z_id; bcopy(name, (char *)(lr + 1), namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -377,30 +396,28 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name, char *link) { itx_t *itx; - uint64_t seq; lr_create_t *lr; size_t namesize = strlen(name) + 1; size_t linksize = strlen(link) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); lr = (lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; - lr->lr_mode = zp->z_phys->zp_mode; - lr->lr_uid = zp->z_phys->zp_uid; - lr->lr_gid = zp->z_phys->zp_gid; - lr->lr_gen = zp->z_phys->zp_gen; - lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; - lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; + lr->lr_uid = zp->z_uid; + lr->lr_gid = zp->z_gid; + lr->lr_mode = zp->z_mode; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), + lr->lr_crtime, sizeof (uint64_t) * 2); bcopy(name, (char *)(lr + 1), namesize); bcopy(link, (char *)(lr + 1) + namesize, linksize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -411,12 +428,11 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) { itx_t *itx; - uint64_t seq; lr_rename_t *lr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); @@ -425,20 +441,15 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr->lr_tdoid = tdzp->z_id; bcopy(sname, (char *)(lr + 1), snamesize); bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); + itx->itx_oid = szp->z_id; - seq = zil_itx_assign(zilog, itx, tx); - sdzp->z_last_itx = seq; - tdzp->z_last_itx = seq; - szp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* * zfs_log_write() handles TX_WRITE transactions. */ -ssize_t zfs_immediate_write_sz = 32768; - -#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ - sizeof (lr_write_t)) +long zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, @@ -447,35 +458,17 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; + ssize_t immediate_write_sz; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - /* - * Writes are handled in three different ways: - * - * WR_INDIRECT: - * In this mode, if we need to commit the write later, then the block - * is immediately written into the file system (using dmu_sync), - * and a pointer to the block is put into the log record. - * When the txg commits the block is linked in. - * This saves additionally writing the data into the log record. - * There are a few requirements for this to occur: - * - write is greater than zfs_immediate_write_sz - * - not using slogs (as slogs are assumed to always be faster - * than writing into the main pool) - * - the write occupies only one block - * WR_COPIED: - * If we know we'll immediately be committing the - * transaction (FSYNC or FDSYNC), the we allocate a larger - * log record here for the data and copy the data in. - * WR_NEED_COPY: - * Otherwise we don't allocate a buffer, and *if* we need to - * flush the write later then a buffer is allocated and - * we retrieve the data using the dmu. - */ - slogging = spa_has_slogs(zilog->zl_spa); - if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : (ssize_t)zfs_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; @@ -502,10 +495,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; - if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, - zp->z_id, off, len, lr + 1) != 0) { - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, + zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; @@ -520,15 +512,13 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); - itx->itx_private = zp->z_zfsvfs; + itx->itx_private = ZTOZSB(zp); - if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || - (ioflag & (FSYNC | FDSYNC))) - itx->itx_sync = B_TRUE; - else + if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && + (fsync_cnt == 0)) itx->itx_sync = B_FALSE; - zp->z_last_itx = zil_itx_assign(zilog, itx, tx); + zil_itx_assign(zilog, itx, tx); off += len; resid -= len; @@ -543,10 +533,9 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len) { itx_t *itx; - uint64_t seq; lr_truncate_t *lr; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; itx = zil_itx_create(txtype, sizeof (*lr)); @@ -556,8 +545,7 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_length = len; itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -565,17 +553,15 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, */ void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) + znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) { itx_t *itx; - uint64_t seq; lr_setattr_t *lr; xvattr_t *xvap = (xvattr_t *)vap; size_t recsize = sizeof (lr_setattr_t); void *start; - - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; /* @@ -583,7 +569,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, * for lr_attr_t + xvattr mask, mapsize and create time * plus actual attribute values */ - if (vap->va_mask & AT_XVATTR) + if (vap->va_mask & ATTR_XVATTR) recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); if (fuidp) @@ -594,12 +580,12 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_foid = zp->z_id; lr->lr_mask = (uint64_t)mask_applied; lr->lr_mode = (uint64_t)vap->va_mode; - if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) + if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) lr->lr_uid = fuidp->z_fuid_owner; else lr->lr_uid = (uint64_t)vap->va_uid; - if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) + if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) lr->lr_gid = fuidp->z_fuid_group; else lr->lr_gid = (uint64_t)vap->va_gid; @@ -608,7 +594,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); start = (lr_setattr_t *)(lr + 1); - if (vap->va_mask & AT_XVATTR) { + if (vap->va_mask & ATTR_XVATTR) { zfs_log_xvattr((lr_attr_t *)start, xvap); start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); } @@ -621,8 +607,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, (void) zfs_log_fuid_domains(fuidp, start); itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -633,7 +618,6 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) { itx_t *itx; - uint64_t seq; lr_acl_v0_t *lrv0; lr_acl_t *lr; int txtype; @@ -641,10 +625,10 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, size_t txsize; size_t aclbytes = vsecp->vsa_aclentsz; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? + txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ? TX_ACL_V0 : TX_ACL; if (txtype == TX_ACL) @@ -689,6 +673,10 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, } itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(zfs_immediate_write_sz, long, 0644); +MODULE_PARM_DESC(zfs_immediate_write_sz, "Largest data block to write to zil"); +#endif