X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu_send.c;h=2f06132116e8da7f5c9b5f44021f77ceafc7037b;hb=bafc4e9e2ac3b4783a598afb1d2f048ab30c903c;hp=6b00b73b43be124143e7582da4ba1edcefb04f1b;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 6b00b73..2f06132 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -20,6 +20,9 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -42,6 +45,10 @@ #include #include #include +#include + +/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ +int zfs_send_corrupt_data = B_FALSE; static char *dmu_recv_tag = "dmu_recv_tag"; @@ -364,8 +371,20 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { + if (zfs_send_corrupt_data) { + uint64_t *ptr; + /* Send a block filled with 0x"zfs badd bloc" */ + abuf = arc_buf_alloc(spa, blksz, &abuf, + ARC_BUFC_DATA); + for (ptr = abuf->b_data; + (char *)ptr < (char *)abuf->b_data + blksz; + ptr++) + *ptr = 0x2f5baddb10c; + } else { + return (EIO); + } + } err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); @@ -494,6 +513,85 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, return (0); } +int +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + uint64_t *sizep) +{ + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + int err; + uint64_t size, recordsize; + + /* tosnap must be a snapshot */ + if (ds->ds_phys->ds_next_snap_obj == 0) + return (EINVAL); + + /* fromsnap must be an earlier snapshot from the same fs as tosnap */ + if (fromds && (ds->ds_dir != fromds->ds_dir || + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + return (EXDEV); + + if (fromorigin) { + if (fromsnap) + return (EINVAL); + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + /* Get uncompressed size estimate of changed data. */ + if (fromds == NULL) { + size = ds->ds_phys->ds_uncompressed_bytes; + } else { + uint64_t used, comp; + err = dsl_dataset_space_written(fromds, ds, + &used, &comp, &size); + if (fromorigin) + dsl_dataset_rele(fromds, FTAG); + if (err) + return (err); + } + + /* + * Assume that space (both on-disk and in-stream) is dominated by + * data. We will adjust for indirect blocks and the copies property, + * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). + */ + + /* + * Subtract out approximate space used by indirect blocks. + * Assume most space is used by data blocks (non-indirect, non-dnode). + * Assume all blocks are recordsize. Assume ditto blocks and + * internal fragmentation counter out compression. + * + * Therefore, space used by indirect blocks is sizeof(blkptr_t) per + * block, which we observe in practice. + */ + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_prop_get_ds(ds, "recordsize", + sizeof (recordsize), 1, &recordsize, NULL); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + size -= size / recordsize * sizeof (blkptr_t); + + /* Add in the space for the record associated with each block. */ + size += size / recordsize * sizeof (dmu_replay_record_t); + + *sizep = size; + + return (0); +} + struct recvbeginsyncarg { const char *tofs; const char *tosnap; @@ -573,6 +671,14 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) return (ETXTBSY); + /* new snapshot name must not exist */ + err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + if (rbsa->fromguid) { /* if incremental, most recent snapshot must match fromguid */ if (ds->ds_prev == NULL) @@ -620,13 +726,6 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) if (err != ENOENT) return (err); - /* new snapshot name must not exist */ - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); return (0); } @@ -661,7 +760,6 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) dp->dp_spa, tx, "dataset = %lld", dsobj); } - static boolean_t dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) { @@ -786,7 +884,7 @@ dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, return (err); if (dmu_recv_verify_features(ds, drrb)) { - dsl_dataset_rele(ds, dmu_recv_tag); + dsl_dataset_rele(ds, FTAG); return (ENOTSUP); } @@ -810,7 +908,7 @@ struct restorearg { uint64_t voff; int bufsize; /* amount of memory allocated for buf */ zio_cksum_t cksum; - avl_tree_t guid_to_ds_map; + avl_tree_t *guid_to_ds_map; }; typedef struct guid_map_entry { @@ -832,59 +930,19 @@ guid_compare(const void *arg1, const void *arg2) return (0); } -/* - * This function is a callback used by dmu_objset_find() (which - * enumerates the object sets) to build an avl tree that maps guids - * to datasets. The resulting table is used when processing DRR_WRITE_BYREF - * send stream records. These records, which are used in dedup'ed - * streams, do not contain data themselves, but refer to a copy - * of the data block that has already been written because it was - * earlier in the stream. That previous copy is identified by the - * guid of the dataset with the referenced data. - */ -int -find_ds_by_guid(const char *name, void *arg) +static void +free_guid_map_onexit(void *arg) { - avl_tree_t *guid_map = arg; - dsl_dataset_t *ds, *snapds; + avl_tree_t *ca = arg; + void *cookie = NULL; guid_map_entry_t *gmep; - dsl_pool_t *dp; - int err; - uint64_t lastobj, firstobj; - - if (dsl_dataset_hold(name, FTAG, &ds) != 0) - return (0); - - dp = ds->ds_dir->dd_pool; - rw_enter(&dp->dp_config_rwlock, RW_READER); - firstobj = ds->ds_dir->dd_phys->dd_origin_obj; - lastobj = ds->ds_phys->ds_prev_snap_obj; - while (lastobj != firstobj) { - err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds); - if (err) { - /* - * Skip this snapshot and move on. It's not - * clear why this would ever happen, but the - * remainder of the snapshot streadm can be - * processed. - */ - rw_exit(&dp->dp_config_rwlock); - dsl_dataset_rele(ds, FTAG); - return (0); - } - - gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); - gmep->guid = snapds->ds_phys->ds_guid; - gmep->gme_ds = snapds; - avl_add(guid_map, gmep); - lastobj = snapds->ds_phys->ds_prev_snap_obj; + while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { + dsl_dataset_rele(gmep->gme_ds, ca); + kmem_free(gmep, sizeof (guid_map_entry_t)); } - - rw_exit(&dp->dp_config_rwlock); - dsl_dataset_rele(ds, FTAG); - - return (0); + avl_destroy(ca); + kmem_free(ca, sizeof (avl_tree_t)); } static void * @@ -921,7 +979,7 @@ restore_read(struct restorearg *ra, int len) return (rv); } -static void +noinline static void backup_byteswap(dmu_replay_record_t *drr) { #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) @@ -996,12 +1054,14 @@ backup_byteswap(dmu_replay_record_t *drr) DO64(drr_end.drr_checksum.zc_word[3]); DO64(drr_end.drr_toguid); break; + default: + break; } #undef DO64 #undef DO32 } -static int +noinline static int restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) { int err; @@ -1085,7 +1145,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) } /* ARGSUSED */ -static int +noinline static int restore_freeobjects(struct restorearg *ra, objset_t *os, struct drr_freeobjects *drrfo) { @@ -1109,7 +1169,7 @@ restore_freeobjects(struct restorearg *ra, objset_t *os, return (0); } -static int +noinline static int restore_write(struct restorearg *ra, objset_t *os, struct drr_write *drrw) { @@ -1173,7 +1233,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os, */ if (drrwbr->drr_toguid != drrwbr->drr_refguid) { gmesrch.guid = drrwbr->drr_refguid; - if ((gmep = avl_find(&ra->guid_to_ds_map, &gmesrch, + if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, &where)) == NULL) { return (EINVAL); } @@ -1183,8 +1243,9 @@ restore_write_byref(struct restorearg *ra, objset_t *os, ref_os = os; } - if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, - drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) + err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, + drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); + if (err) return (err); tx = dmu_tx_create(os); @@ -1254,7 +1315,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) } /* ARGSUSED */ -static int +noinline static int restore_free(struct restorearg *ra, objset_t *os, struct drr_free *drrf) { @@ -1276,13 +1337,13 @@ restore_free(struct restorearg *ra, objset_t *os, * NB: callers *must* call dmu_recv_end() if this succeeds. */ int -dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) +dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep) { struct restorearg ra = { 0 }; dmu_replay_record_t *drr; objset_t *os; zio_cksum_t pcksum; - guid_map_entry_t *gmep; int featureflags; if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) @@ -1318,7 +1379,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) ra.vp = vp; ra.voff = *voffp; ra.bufsize = 1<<20; - ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); + ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == @@ -1336,12 +1397,37 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { - avl_create(&ra.guid_to_ds_map, guid_compare, - sizeof (guid_map_entry_t), - offsetof(guid_map_entry_t, avlnode)); - (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, - (void *)&ra.guid_to_ds_map, - DS_FIND_CHILDREN); + minor_t minor; + + if (cleanup_fd == -1) { + ra.err = EBADF; + goto out; + } + ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (ra.err) { + cleanup_fd = -1; + goto out; + } + + if (*action_handlep == 0) { + ra.guid_to_ds_map = + kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + avl_create(ra.guid_to_ds_map, guid_compare, + sizeof (guid_map_entry_t), + offsetof(guid_map_entry_t, avlnode)); + ra.err = zfs_onexit_add_cb(minor, + free_guid_map_onexit, ra.guid_to_ds_map, + action_handlep); + if (ra.err) + goto out; + } else { + ra.err = zfs_onexit_cb_data(minor, *action_handlep, + (void **)&ra.guid_to_ds_map); + if (ra.err) + goto out; + } + + drc->drc_guid_to_ds_map = ra.guid_to_ds_map; } /* @@ -1423,6 +1509,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) ASSERT(ra.err != 0); out: + if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) + zfs_onexit_fd_rele(cleanup_fd); + if (ra.err != 0) { /* * destroy what we created, so we don't leave it in the @@ -1438,17 +1527,7 @@ out: } } - if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { - void *cookie = NULL; - - while (gmep = avl_destroy_nodes(&ra.guid_to_ds_map, &cookie)) { - dsl_dataset_rele(gmep->gme_ds, &ra.guid_to_ds_map); - kmem_free(gmep, sizeof (guid_map_entry_t)); - } - avl_destroy(&ra.guid_to_ds_map); - } - - kmem_free(ra.buf, ra.bufsize); + vmem_free(ra.buf, ra.bufsize); *voffp = ra.voff; return (ra.err); } @@ -1487,11 +1566,35 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static int +add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; + dsl_dataset_t *snapds; + guid_map_entry_t *gmep; + int err; + + ASSERT(guid_map != NULL); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); + if (err == 0) { + gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); + gmep->guid = snapds->ds_phys->ds_guid; + gmep->gme_ds = snapds; + avl_add(guid_map, gmep); + } + + rw_exit(&dp->dp_config_rwlock); + return (err); +} + +static int dmu_recv_existing_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; - int err; + int err, myerr; /* * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() @@ -1526,8 +1629,11 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc) out: mutex_exit(&ds->ds_recvlock); + if (err == 0 && drc->drc_guid_to_ds_map != NULL) + (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); dsl_dataset_disown(ds, dmu_recv_tag); - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + ASSERT3U(myerr, ==, 0); return (err); } @@ -1555,6 +1661,8 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc) /* clean up the fs we just recv'd into */ (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); } else { + if (drc->drc_guid_to_ds_map != NULL) + (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); /* release the hold from dmu_recv_begin */ dsl_dataset_disown(ds, dmu_recv_tag); }