X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_log.c;h=5cdbb6c8d744a9ee3ee8b9b61de5ecb24ddd86b4;hb=633e8030b3582f3579a457a803c1382fc81f56e5;hp=84d64b4df6b1f5e71226a3a17b3a6c809491c453;hpb=fb5f0bc83330c8a0236c4d34a23723ac1974971a;p=zfs.git diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 84d64b4..5cdbb6c 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -19,10 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ + #include #include #include @@ -47,14 +47,6 @@ #include #include -#define ZFS_HANDLE_REPLAY(zilog, tx) \ - if (zilog->zl_replay) { \ - dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); \ - zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = \ - zilog->zl_replaying_seq; \ - return; \ - } - /* * These zfs_log_* functions must be called within a dmu tx, in one * of 2 contexts depending on zilog->z_replay: @@ -77,7 +69,11 @@ int zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) { +#ifdef HAVE_XVATTR int isxvattr = (vap->va_mask & AT_XVATTR); +#else + int isxvattr = 0; +#endif /* HAVE_XVATTR */ switch (type) { case Z_FILE: if (vsecp == NULL && !isxvattr) @@ -105,6 +101,7 @@ zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) return (TX_MAX_TYPE); } +#ifdef HAVE_XVATTR /* * build up the log data necessary for logging xvattr_t * First lr_attr_t is initialized. following the lr_attr_t @@ -175,6 +172,15 @@ zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) + *attrs |= (xoap->xoa_reparse == 0) ? 0 : + XAT0_REPARSE; + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) + *attrs |= (xoap->xoa_offline == 0) ? 0 : + XAT0_OFFLINE; + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) + *attrs |= (xoap->xoa_sparse == 0) ? 0 : + XAT0_SPARSE; } static void * @@ -209,6 +215,7 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) } return (start); } +#endif /* HAVE_XVATTR */ /* * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, @@ -236,23 +243,22 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zfs_fuid_info_t *fuidp, vattr_t *vap) { itx_t *itx; - uint64_t seq; lr_create_t *lr; +#ifdef HAVE_XVATTR lr_acl_create_t *lracl; + xvattr_t *xvap = (xvattr_t *)vap; +#endif /* HAVE_XVATTR */ size_t aclsize; size_t xvatsize = 0; size_t txsize; - xvattr_t *xvap = (xvattr_t *)vap; void *end; size_t lrsize; size_t namesize = strlen(name) + 1; size_t fuidsz = 0; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - /* * If we have FUIDs present then add in space for * domains and ACE fuid's if any. @@ -262,8 +268,10 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); } +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); +#endif /* HAVE_XVATTR */ if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || @@ -283,25 +291,30 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr = (lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; - lr->lr_mode = zp->z_phys->zp_mode; - if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) { - lr->lr_uid = (uint64_t)zp->z_phys->zp_uid; + lr->lr_mode = zp->z_mode; + if (!IS_EPHEMERAL(zp->z_uid)) { + lr->lr_uid = (uint64_t)zp->z_uid; } else { lr->lr_uid = fuidp->z_fuid_owner; } - if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) { - lr->lr_gid = (uint64_t)zp->z_phys->zp_gid; + if (!IS_EPHEMERAL(zp->z_gid)) { + lr->lr_gid = (uint64_t)zp->z_gid; } else { lr->lr_gid = fuidp->z_fuid_group; } - lr->lr_gen = zp->z_phys->zp_gen; - lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; - lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; - lr->lr_rdev = zp->z_phys->zp_rdev; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), + lr->lr_crtime, sizeof (uint64_t) * 2); + + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev, + sizeof (lr->lr_rdev)) != 0) + lr->lr_rdev = 0; /* * Fill in xvattr info if any */ +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) { zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); end = (caddr_t)lr + lrsize + xvatsize; @@ -331,14 +344,15 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, end = zfs_log_fuid_ids(fuidp, end); end = zfs_log_fuid_domains(fuidp, end); } +#else + end = (caddr_t)lr + lrsize; +#endif /* HAVE_XVATTR */ /* * Now place file name in log record */ bcopy(name, end, namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -346,25 +360,23 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name) + znode_t *dzp, char *name, uint64_t foid) { itx_t *itx; - uint64_t seq; lr_remove_t *lr; size_t namesize = strlen(name) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - itx = zil_itx_create(txtype, sizeof (*lr) + namesize); lr = (lr_remove_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; bcopy(name, (char *)(lr + 1), namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; + itx->itx_oid = foid; + + zil_itx_assign(zilog, itx, tx); } /* @@ -375,24 +387,19 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name) { itx_t *itx; - uint64_t seq; lr_link_t *lr; size_t namesize = strlen(name) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - itx = zil_itx_create(txtype, sizeof (*lr) + namesize); lr = (lr_link_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_link_obj = zp->z_id; bcopy(name, (char *)(lr + 1), namesize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -403,32 +410,28 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name, char *link) { itx_t *itx; - uint64_t seq; lr_create_t *lr; size_t namesize = strlen(name) + 1; size_t linksize = strlen(link) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); lr = (lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; - lr->lr_mode = zp->z_phys->zp_mode; - lr->lr_uid = zp->z_phys->zp_uid; - lr->lr_gid = zp->z_phys->zp_gid; - lr->lr_gen = zp->z_phys->zp_gen; - lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; - lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; + lr->lr_uid = zp->z_uid; + lr->lr_gid = zp->z_gid; + lr->lr_mode = zp->z_mode; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), + lr->lr_crtime, sizeof (uint64_t) * 2); bcopy(name, (char *)(lr + 1), namesize); bcopy(link, (char *)(lr + 1) + namesize, linksize); - seq = zil_itx_assign(zilog, itx, tx); - dzp->z_last_itx = seq; - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -439,27 +442,22 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) { itx_t *itx; - uint64_t seq; lr_rename_t *lr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; - if (zilog == NULL) + if (zil_replaying(zilog, tx)) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); lr = (lr_rename_t *)&itx->itx_lr; lr->lr_sdoid = sdzp->z_id; lr->lr_tdoid = tdzp->z_id; bcopy(sname, (char *)(lr + 1), snamesize); bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); + itx->itx_oid = szp->z_id; - seq = zil_itx_assign(zilog, itx, tx); - sdzp->z_last_itx = seq; - tdzp->z_last_itx = seq; - szp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -467,9 +465,6 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ ssize_t zfs_immediate_write_sz = 32768; -#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ - sizeof (lr_write_t)) - void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag) @@ -477,37 +472,17 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; + ssize_t immediate_write_sz; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zfs_immediate_write_sz; - /* - * Writes are handled in three different ways: - * - * WR_INDIRECT: - * In this mode, if we need to commit the write later, then the block - * is immediately written into the file system (using dmu_sync), - * and a pointer to the block is put into the log record. - * When the txg commits the block is linked in. - * This saves additionally writing the data into the log record. - * There are a few requirements for this to occur: - * - write is greater than zfs_immediate_write_sz - * - not using slogs (as slogs are assumed to always be faster - * than writing into the main pool) - * - the write occupies only one block - * WR_COPIED: - * If we know we'll immediately be committing the - * transaction (FSYNC or FDSYNC), the we allocate a larger - * log record here for the data and copy the data in. - * WR_NEED_COPY: - * Otherwise we don't allocate a buffer, and *if* we need to - * flush the write later then a buffer is allocated and - * we retrieve the data using the dmu. - */ - slogging = spa_has_slogs(zilog->zl_spa); - if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; @@ -534,10 +509,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; - if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, - zp->z_id, off, len, lr + 1) != 0) { - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, + zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; @@ -552,15 +526,13 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); - itx->itx_private = zp->z_zfsvfs; + itx->itx_private = ZTOZSB(zp); - if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || - (ioflag & (FSYNC | FDSYNC))) - itx->itx_sync = B_TRUE; - else + if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && + (fsync_cnt == 0)) itx->itx_sync = B_FALSE; - zp->z_last_itx = zil_itx_assign(zilog, itx, tx); + zil_itx_assign(zilog, itx, tx); off += len; resid -= len; @@ -575,14 +547,11 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len) { itx_t *itx; - uint64_t seq; lr_truncate_t *lr; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_truncate_t *)&itx->itx_lr; lr->lr_foid = zp->z_id; @@ -590,61 +559,61 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_length = len; itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* * zfs_log_setattr() handles TX_SETATTR transactions. */ void -zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) +zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, + struct iattr *attr, uint_t mask_applied, zfs_fuid_info_t *fuidp) { itx_t *itx; - uint64_t seq; lr_setattr_t *lr; +#ifdef HAVE_XVATTR xvattr_t *xvap = (xvattr_t *)vap; +#endif /* HAVEXVATTR */ size_t recsize = sizeof (lr_setattr_t); void *start; - - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - /* * If XVATTR set, then log record size needs to allow * for lr_attr_t + xvattr mask, mapsize and create time * plus actual attribute values */ - if (vap->va_mask & AT_XVATTR) +#ifdef HAVE_XVATTR + if (attr->ia_valid & AT_XVATTR) recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); if (fuidp) recsize += fuidp->z_domain_str_sz; +#endif /* HAVE_XVATTR */ itx = zil_itx_create(txtype, recsize); lr = (lr_setattr_t *)&itx->itx_lr; lr->lr_foid = zp->z_id; lr->lr_mask = (uint64_t)mask_applied; - lr->lr_mode = (uint64_t)vap->va_mode; - if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) + lr->lr_mode = (uint64_t)attr->ia_mode; + if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(attr->ia_uid)) lr->lr_uid = fuidp->z_fuid_owner; else - lr->lr_uid = (uint64_t)vap->va_uid; + lr->lr_uid = (uint64_t)attr->ia_uid; - if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) + if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(attr->ia_gid)) lr->lr_gid = fuidp->z_fuid_group; else - lr->lr_gid = (uint64_t)vap->va_gid; + lr->lr_gid = (uint64_t)attr->ia_gid; - lr->lr_size = (uint64_t)vap->va_size; - ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); - ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); + lr->lr_size = (uint64_t)attr->ia_size; + ZFS_TIME_ENCODE(&attr->ia_atime, lr->lr_atime); + ZFS_TIME_ENCODE(&attr->ia_mtime, lr->lr_mtime); start = (lr_setattr_t *)(lr + 1); - if (vap->va_mask & AT_XVATTR) { +#ifdef HAVE_XVATTR + if (attr->ia_valid & ATTR_XVATTR) { zfs_log_xvattr((lr_attr_t *)start, xvap); start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); } @@ -655,10 +624,10 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, if (fuidp) (void) zfs_log_fuid_domains(fuidp, start); +#endif /* HAVE_XVATTR */ itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); } /* @@ -669,7 +638,6 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) { itx_t *itx; - uint64_t seq; lr_acl_v0_t *lrv0; lr_acl_t *lr; int txtype; @@ -677,12 +645,10 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, size_t txsize; size_t aclbytes = vsecp->vsa_aclentsz; - if (zilog == NULL || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - - txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? + txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ? TX_ACL_V0 : TX_ACL; if (txtype == TX_ACL) @@ -720,13 +686,14 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); +#ifdef HAVE_XVATTR if (fuidp) { start = zfs_log_fuid_ids(fuidp, start); (void) zfs_log_fuid_domains(fuidp, start); } +#endif /* HAVE_XVATTR */ } itx->itx_sync = (zp->z_sync_cnt != 0); - seq = zil_itx_assign(zilog, itx, tx); - zp->z_last_itx = seq; + zil_itx_assign(zilog, itx, tx); }