X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzfs_replay.c;h=2c258948b3fdf7ec00d8c9368f53cdd558266f8e;hb=a405c8a665bf7d96880c97bcb783cf84d04e3e76;hp=85b79703a78074a52963d2771a6f16ea5320d52c;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 85b7970..2c25894 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" +#ifdef HAVE_ZPL #include #include @@ -42,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +129,12 @@ zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime); if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ); + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) + xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) + xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) + xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0); } static int @@ -275,9 +281,9 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, uint64_t txtype; int error; + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); if (byteswap) { byteswap_uint64_array(lracl, sizeof (*lracl)); - txtype = (int)lr->lr_common.lrc_txtype; if (txtype == TX_CREATE_ACL_ATTR || txtype == TX_MKDIR_ACL_ATTR) { lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); @@ -305,7 +311,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, /* * All forms of zfs create (create, mkdir, mkxattrdir, symlink) * eventually end up in zfs_mknode(), which assigns the object's - * creation time and generation number. The generic VOP_CREATE() + * creation time and generation number. The generic zfs_create() * doesn't have either concept, so we smuggle the values inside * the vattr's otherwise unused va_ctime and va_nblocks fields. */ @@ -318,7 +324,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - switch ((int)lr->lr_common.lrc_txtype) { + switch (txtype) { case TX_CREATE_ACL: aclstart = (caddr_t)(lracl + 1); fuidstart = (caddr_t)aclstart + @@ -348,7 +354,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, lr->lr_uid, lr->lr_gid); } - error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, + error = zfs_create(ZTOV(dzp), name, &xva.xva_vattr, 0, 0, &vp, kcred, vflg, NULL, &vsec); break; case TX_MKDIR_ACL: @@ -378,7 +384,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); } - error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, + error = zfs_mkdir(ZTOV(dzp), name, &xva.xva_vattr, &vp, kcred, NULL, vflg, &vsec); break; default: @@ -391,7 +397,8 @@ bail: VN_RELE(ZTOV(dzp)); - zfs_fuid_info_free(zfsvfs->z_fuid_replay); + if (zfsvfs->z_fuid_replay) + zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; return (error); @@ -413,9 +420,9 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) uint64_t txtype; int error; + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); - txtype = (int)lr->lr_common.lrc_txtype; if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); } @@ -431,7 +438,7 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) /* * All forms of zfs create (create, mkdir, mkxattrdir, symlink) * eventually end up in zfs_mknode(), which assigns the object's - * creation time and generation number. The generic VOP_CREATE() + * creation time and generation number. The generic zfs_create() * doesn't have either concept, so we smuggle the values inside * the vattr's otherwise unused va_ctime and va_nblocks fields. */ @@ -460,7 +467,7 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) lr->lr_uid, lr->lr_gid); } - switch ((int)lr->lr_common.lrc_txtype) { + switch (txtype) { case TX_CREATE_ATTR: lrattr = (lr_attr_t *)(caddr_t)(lr + 1); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); @@ -476,7 +483,7 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) if (name == NULL) name = (char *)start; - error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, + error = zfs_create(ZTOV(dzp), name, &xva.xva_vattr, 0, 0, &vp, kcred, vflg, NULL, NULL); break; case TX_MKDIR_ATTR: @@ -494,17 +501,16 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) if (name == NULL) name = (char *)(lr + 1); - error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, + error = zfs_mkdir(ZTOV(dzp), name, &xva.xva_vattr, &vp, kcred, NULL, vflg, NULL); break; case TX_MKXATTR: - name = (char *)(lr + 1); error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred); break; case TX_SYMLINK: name = (char *)(lr + 1); link = name + strlen(name) + 1; - error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr, + error = zfs_symlink(ZTOV(dzp), name, &xva.xva_vattr, link, kcred, NULL, vflg); break; default: @@ -542,10 +548,10 @@ zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) switch ((int)lr->lr_common.lrc_txtype) { case TX_REMOVE: - error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg); + error = zfs_remove(ZTOV(dzp), name, kcred, NULL, vflg); break; case TX_RMDIR: - error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg); + error = zfs_rmdir(ZTOV(dzp), name, NULL, kcred, NULL, vflg); break; default: error = ENOTSUP; @@ -578,7 +584,7 @@ zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap) if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); + error = zfs_link(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); VN_RELE(ZTOV(zp)); VN_RELE(ZTOV(dzp)); @@ -609,7 +615,7 @@ zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, + error = zfs_rename(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, NULL, vflg); VN_RELE(ZTOV(tdzp)); @@ -625,6 +631,7 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) znode_t *zp; int error; ssize_t resid; + uint64_t eod, offset, length; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -640,8 +647,88 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) return (error); } - error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length, - lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + offset = lr->lr_offset; + length = lr->lr_length; + eod = offset + length; /* end of data for this write */ + + /* + * This may be a write from a dmu_sync() for a whole block, + * and may extend beyond the current end of the file. + * We can't just replay what was written for this TX_WRITE as + * a future TX_WRITE2 may extend the eof and the data for that + * write needs to be there. So we write the whole block and + * reduce the eof. This needs to be done within the single dmu + * transaction created within vn_rdwr -> zfs_write. So a possible + * new end of file is passed through in zfsvfs->z_replay_eof + */ + + zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ + + /* If it's a dmu_sync() block, write the whole block */ + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); + if (length < blocksize) { + offset -= offset % blocksize; + length = blocksize; + } + if (zp->z_size < eod) + zfsvfs->z_replay_eof = eod; + } + + error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, + UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + + VN_RELE(ZTOV(zp)); + zfsvfs->z_replay_eof = 0; /* safety */ + + return (error); +} + +/* + * TX_WRITE2 are only generated when dmu_sync() returns EALREADY + * meaning the pool block is already being synced. So now that we always write + * out full blocks, all we have to do is expand the eof if + * the file is grown. + */ +static int +zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) +{ + znode_t *zp; + int error; + uint64_t end; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + +top: + end = lr->lr_offset + lr->lr_length; + if (end > zp->z_size) { + dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + + zp->z_size = end; + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + VN_RELE(ZTOV(zp)); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + return (error); + } + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void *)&zp->z_size, sizeof (uint64_t), tx); + + /* Ensure the replayed seq is updated */ + (void) zil_replaying(zfsvfs->z_log, tx); + + dmu_tx_commit(tx); + } VN_RELE(ZTOV(zp)); @@ -658,16 +745,8 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { - /* - * As we can log truncates out of order, it's possible the - * file has been removed. In this case just drop the truncate - * and return success. - */ - if (error == ENOENT) - error = 0; + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); - } bzero(&fl, sizeof (fl)); fl.l_type = F_WRLCK; @@ -675,7 +754,7 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; - error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, + error = zfs_space(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, lr->lr_offset, kcred, NULL); VN_RELE(ZTOV(zp)); @@ -701,16 +780,8 @@ zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { - /* - * As we can log setattrs out of order, it's possible the - * file has been removed. In this case just drop the setattr - * and return success. - */ - if (error == ENOENT) - error = 0; + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); - } zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); @@ -734,7 +805,7 @@ zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); - error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); + error = zfs_setattr(ZTOV(zp), vap, 0, kcred, NULL); zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; @@ -756,16 +827,8 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) zfs_oldace_byteswap(ace, lr->lr_aclcnt); } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { - /* - * As we can log acls out of order, it's possible the - * file has been removed. In this case just drop the acl - * and return success. - */ - if (error == ENOENT) - error = 0; + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); - } bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT; @@ -774,7 +837,7 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) vsa.vsa_aclflags = 0; vsa.vsa_aclentp = ace; - error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + error = zfs_setsecattr(ZTOV(zp), &vsa, 0, kcred, NULL); VN_RELE(ZTOV(zp)); @@ -813,16 +876,8 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) } } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { - /* - * As we can log acls out of order, it's possible the - * file has been removed. In this case just drop the acl - * and return success. - */ - if (error == ENOENT) - error = 0; + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); - } bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; @@ -840,7 +895,7 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); } - error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + error = zfs_setsecattr(ZTOV(zp), &vsa, 0, kcred, NULL); if (zfsvfs->z_fuid_replay) zfs_fuid_info_free(zfsvfs->z_fuid_replay); @@ -875,4 +930,6 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create_acl, /* TX_MKDIR_ACL */ zfs_replay_create, /* TX_MKDIR_ATTR */ zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ + zfs_replay_write2, /* TX_WRITE2 */ }; +#endif /* HAVE_ZPL */