94e64e63f9d9a14332545ba313930a24ed12ea5e
[zfs.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <assert.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <libintl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <strings.h>
33 #include <unistd.h>
34 #include <stddef.h>
35 #include <fcntl.h>
36 #include <sys/mount.h>
37 #include <sys/mntent.h>
38 #include <sys/mnttab.h>
39 #include <sys/avl.h>
40 #include <sys/debug.h>
41 #include <stddef.h>
42 #include <pthread.h>
43 #include <umem.h>
44
45 #include <libzfs.h>
46
47 #include "zfs_namecheck.h"
48 #include "zfs_prop.h"
49 #include "zfs_fletcher.h"
50 #include "libzfs_impl.h"
51 #include <sys/zio_checksum.h>
52 #include <sys/ddt.h>
53
54 /* in libzfs_dataset.c */
55 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
56
57 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
58     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
59
60 static const zio_cksum_t zero_cksum = { { 0 } };
61
62 typedef struct dedup_arg {
63         int     inputfd;
64         int     outputfd;
65         libzfs_handle_t  *dedup_hdl;
66 } dedup_arg_t;
67
68 typedef struct dataref {
69         uint64_t ref_guid;
70         uint64_t ref_object;
71         uint64_t ref_offset;
72 } dataref_t;
73
74 typedef struct dedup_entry {
75         struct dedup_entry      *dde_next;
76         zio_cksum_t dde_chksum;
77         uint64_t dde_prop;
78         dataref_t dde_ref;
79 } dedup_entry_t;
80
81 #define MAX_DDT_PHYSMEM_PERCENT         20
82 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
83
84 typedef struct dedup_table {
85         dedup_entry_t   **dedup_hash_array;
86         umem_cache_t    *ddecache;
87         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
88         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
89         uint64_t        ddt_count;
90         int             numhashbits;
91         boolean_t       ddt_full;
92 } dedup_table_t;
93
94 static int
95 high_order_bit(uint64_t n)
96 {
97         int count;
98
99         for (count = 0; n != 0; count++)
100                 n >>= 1;
101         return (count);
102 }
103
104 static size_t
105 ssread(void *buf, size_t len, FILE *stream)
106 {
107         size_t outlen;
108
109         if ((outlen = fread(buf, len, 1, stream)) == 0)
110                 return (0);
111
112         return (outlen);
113 }
114
115 static void
116 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
117     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
118 {
119         dedup_entry_t   *dde;
120
121         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
122                 if (ddt->ddt_full == B_FALSE) {
123                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
124                             "Dedup table full.  Deduplication will continue "
125                             "with existing table entries"));
126                         ddt->ddt_full = B_TRUE;
127                 }
128                 return;
129         }
130
131         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
132             != NULL) {
133                 assert(*ddepp == NULL);
134                 dde->dde_next = NULL;
135                 dde->dde_chksum = *cs;
136                 dde->dde_prop = prop;
137                 dde->dde_ref = *dr;
138                 *ddepp = dde;
139                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
140                 ddt->ddt_count++;
141         }
142 }
143
144 /*
145  * Using the specified dedup table, do a lookup for an entry with
146  * the checksum cs.  If found, return the block's reference info
147  * in *dr. Otherwise, insert a new entry in the dedup table, using
148  * the reference information specified by *dr.
149  *
150  * return value:  true - entry was found
151  *                false - entry was not found
152  */
153 static boolean_t
154 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
155     uint64_t prop, dataref_t *dr)
156 {
157         uint32_t hashcode;
158         dedup_entry_t **ddepp;
159
160         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
161
162         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
163             ddepp = &((*ddepp)->dde_next)) {
164                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
165                     (*ddepp)->dde_prop == prop) {
166                         *dr = (*ddepp)->dde_ref;
167                         return (B_TRUE);
168                 }
169         }
170         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
171         return (B_FALSE);
172 }
173
174 static int
175 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
176 {
177         fletcher_4_incremental_native(buf, len, zc);
178         return (write(outfd, buf, len));
179 }
180
181 /*
182  * This function is started in a separate thread when the dedup option
183  * has been requested.  The main send thread determines the list of
184  * snapshots to be included in the send stream and makes the ioctl calls
185  * for each one.  But instead of having the ioctl send the output to the
186  * the output fd specified by the caller of zfs_send()), the
187  * ioctl is told to direct the output to a pipe, which is read by the
188  * alternate thread running THIS function.  This function does the
189  * dedup'ing by:
190  *  1. building a dedup table (the DDT)
191  *  2. doing checksums on each data block and inserting a record in the DDT
192  *  3. looking for matching checksums, and
193  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
194  *      a duplicate block is found.
195  * The output of this function then goes to the output fd requested
196  * by the caller of zfs_send().
197  */
198 static void *
199 cksummer(void *arg)
200 {
201         dedup_arg_t *dda = arg;
202         char *buf = malloc(1<<20);
203         dmu_replay_record_t thedrr;
204         dmu_replay_record_t *drr = &thedrr;
205         struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
206         struct drr_end *drre = &thedrr.drr_u.drr_end;
207         struct drr_object *drro = &thedrr.drr_u.drr_object;
208         struct drr_write *drrw = &thedrr.drr_u.drr_write;
209         struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
210         FILE *ofp;
211         int outfd;
212         dmu_replay_record_t wbr_drr = {0};
213         struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
214         dedup_table_t ddt;
215         zio_cksum_t stream_cksum;
216         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
217         uint64_t numbuckets;
218
219         ddt.max_ddt_size =
220             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
221             SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
222
223         numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
224
225         /*
226          * numbuckets must be a power of 2.  Increase number to
227          * a power of 2 if necessary.
228          */
229         if (!ISP2(numbuckets))
230                 numbuckets = 1 << high_order_bit(numbuckets);
231
232         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
233         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
234             NULL, NULL, NULL, NULL, NULL, 0);
235         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
236         ddt.numhashbits = high_order_bit(numbuckets) - 1;
237         ddt.ddt_full = B_FALSE;
238
239         /* Initialize the write-by-reference block. */
240         wbr_drr.drr_type = DRR_WRITE_BYREF;
241         wbr_drr.drr_payloadlen = 0;
242
243         outfd = dda->outputfd;
244         ofp = fdopen(dda->inputfd, "r");
245         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
246
247                 switch (drr->drr_type) {
248                 case DRR_BEGIN:
249                 {
250                         int     fflags;
251                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
252
253                         /* set the DEDUP feature flag for this stream */
254                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
255                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
256                             DMU_BACKUP_FEATURE_DEDUPPROPS);
257                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
258
259                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
260                             &stream_cksum, outfd) == -1)
261                                 goto out;
262                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
263                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
264                                 int sz = drr->drr_payloadlen;
265
266                                 if (sz > 1<<20) {
267                                         free(buf);
268                                         buf = malloc(sz);
269                                 }
270                                 (void) ssread(buf, sz, ofp);
271                                 if (ferror(stdin))
272                                         perror("fread");
273                                 if (cksum_and_write(buf, sz, &stream_cksum,
274                                     outfd) == -1)
275                                         goto out;
276                         }
277                         break;
278                 }
279
280                 case DRR_END:
281                 {
282                         /* use the recalculated checksum */
283                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
284                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
285                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
286                         if ((write(outfd, drr,
287                             sizeof (dmu_replay_record_t))) == -1)
288                                 goto out;
289                         break;
290                 }
291
292                 case DRR_OBJECT:
293                 {
294                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
295                             &stream_cksum, outfd) == -1)
296                                 goto out;
297                         if (drro->drr_bonuslen > 0) {
298                                 (void) ssread(buf,
299                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
300                                     ofp);
301                                 if (cksum_and_write(buf,
302                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
303                                     &stream_cksum, outfd) == -1)
304                                         goto out;
305                         }
306                         break;
307                 }
308
309                 case DRR_SPILL:
310                 {
311                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
312                             &stream_cksum, outfd) == -1)
313                                 goto out;
314                         (void) ssread(buf, drrs->drr_length, ofp);
315                         if (cksum_and_write(buf, drrs->drr_length,
316                             &stream_cksum, outfd) == -1)
317                                 goto out;
318                         break;
319                 }
320
321                 case DRR_FREEOBJECTS:
322                 {
323                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
324                             &stream_cksum, outfd) == -1)
325                                 goto out;
326                         break;
327                 }
328
329                 case DRR_WRITE:
330                 {
331                         dataref_t       dataref;
332
333                         (void) ssread(buf, drrw->drr_length, ofp);
334
335                         /*
336                          * Use the existing checksum if it's dedup-capable,
337                          * else calculate a SHA256 checksum for it.
338                          */
339
340                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
341                             zero_cksum) ||
342                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
343                                 zio_cksum_t tmpsha256;
344
345                                 zio_checksum_SHA256(buf,
346                                     drrw->drr_length, &tmpsha256);
347
348                                 drrw->drr_key.ddk_cksum.zc_word[0] =
349                                     BE_64(tmpsha256.zc_word[0]);
350                                 drrw->drr_key.ddk_cksum.zc_word[1] =
351                                     BE_64(tmpsha256.zc_word[1]);
352                                 drrw->drr_key.ddk_cksum.zc_word[2] =
353                                     BE_64(tmpsha256.zc_word[2]);
354                                 drrw->drr_key.ddk_cksum.zc_word[3] =
355                                     BE_64(tmpsha256.zc_word[3]);
356                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
357                                 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
358                         }
359
360                         dataref.ref_guid = drrw->drr_toguid;
361                         dataref.ref_object = drrw->drr_object;
362                         dataref.ref_offset = drrw->drr_offset;
363
364                         if (ddt_update(dda->dedup_hdl, &ddt,
365                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
366                             &dataref)) {
367                                 /* block already present in stream */
368                                 wbr_drrr->drr_object = drrw->drr_object;
369                                 wbr_drrr->drr_offset = drrw->drr_offset;
370                                 wbr_drrr->drr_length = drrw->drr_length;
371                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
372                                 wbr_drrr->drr_refguid = dataref.ref_guid;
373                                 wbr_drrr->drr_refobject =
374                                     dataref.ref_object;
375                                 wbr_drrr->drr_refoffset =
376                                     dataref.ref_offset;
377
378                                 wbr_drrr->drr_checksumtype =
379                                     drrw->drr_checksumtype;
380                                 wbr_drrr->drr_checksumflags =
381                                     drrw->drr_checksumtype;
382                                 wbr_drrr->drr_key.ddk_cksum =
383                                     drrw->drr_key.ddk_cksum;
384                                 wbr_drrr->drr_key.ddk_prop =
385                                     drrw->drr_key.ddk_prop;
386
387                                 if (cksum_and_write(&wbr_drr,
388                                     sizeof (dmu_replay_record_t), &stream_cksum,
389                                     outfd) == -1)
390                                         goto out;
391                         } else {
392                                 /* block not previously seen */
393                                 if (cksum_and_write(drr,
394                                     sizeof (dmu_replay_record_t), &stream_cksum,
395                                     outfd) == -1)
396                                         goto out;
397                                 if (cksum_and_write(buf,
398                                     drrw->drr_length,
399                                     &stream_cksum, outfd) == -1)
400                                         goto out;
401                         }
402                         break;
403                 }
404
405                 case DRR_FREE:
406                 {
407                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
408                             &stream_cksum, outfd) == -1)
409                                 goto out;
410                         break;
411                 }
412
413                 default:
414                         (void) printf("INVALID record type 0x%x\n",
415                             drr->drr_type);
416                         /* should never happen, so assert */
417                         assert(B_FALSE);
418                 }
419         }
420 out:
421         umem_cache_destroy(ddt.ddecache);
422         free(ddt.dedup_hash_array);
423         free(buf);
424         (void) fclose(ofp);
425
426         return (NULL);
427 }
428
429 /*
430  * Routines for dealing with the AVL tree of fs-nvlists
431  */
432 typedef struct fsavl_node {
433         avl_node_t fn_node;
434         nvlist_t *fn_nvfs;
435         char *fn_snapname;
436         uint64_t fn_guid;
437 } fsavl_node_t;
438
439 static int
440 fsavl_compare(const void *arg1, const void *arg2)
441 {
442         const fsavl_node_t *fn1 = arg1;
443         const fsavl_node_t *fn2 = arg2;
444
445         if (fn1->fn_guid > fn2->fn_guid)
446                 return (+1);
447         else if (fn1->fn_guid < fn2->fn_guid)
448                 return (-1);
449         else
450                 return (0);
451 }
452
453 /*
454  * Given the GUID of a snapshot, find its containing filesystem and
455  * (optionally) name.
456  */
457 static nvlist_t *
458 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
459 {
460         fsavl_node_t fn_find;
461         fsavl_node_t *fn;
462
463         fn_find.fn_guid = snapguid;
464
465         fn = avl_find(avl, &fn_find, NULL);
466         if (fn) {
467                 if (snapname)
468                         *snapname = fn->fn_snapname;
469                 return (fn->fn_nvfs);
470         }
471         return (NULL);
472 }
473
474 static void
475 fsavl_destroy(avl_tree_t *avl)
476 {
477         fsavl_node_t *fn;
478         void *cookie;
479
480         if (avl == NULL)
481                 return;
482
483         cookie = NULL;
484         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
485                 free(fn);
486         avl_destroy(avl);
487         free(avl);
488 }
489
490 /*
491  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
492  */
493 static avl_tree_t *
494 fsavl_create(nvlist_t *fss)
495 {
496         avl_tree_t *fsavl;
497         nvpair_t *fselem = NULL;
498
499         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
500                 return (NULL);
501
502         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
503             offsetof(fsavl_node_t, fn_node));
504
505         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
506                 nvlist_t *nvfs, *snaps;
507                 nvpair_t *snapelem = NULL;
508
509                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
510                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
511
512                 while ((snapelem =
513                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
514                         fsavl_node_t *fn;
515                         uint64_t guid;
516
517                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
518                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
519                                 fsavl_destroy(fsavl);
520                                 return (NULL);
521                         }
522                         fn->fn_nvfs = nvfs;
523                         fn->fn_snapname = nvpair_name(snapelem);
524                         fn->fn_guid = guid;
525
526                         /*
527                          * Note: if there are multiple snaps with the
528                          * same GUID, we ignore all but one.
529                          */
530                         if (avl_find(fsavl, fn, NULL) == NULL)
531                                 avl_add(fsavl, fn);
532                         else
533                                 free(fn);
534                 }
535         }
536
537         return (fsavl);
538 }
539
540 /*
541  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
542  */
543 typedef struct send_data {
544         uint64_t parent_fromsnap_guid;
545         nvlist_t *parent_snaps;
546         nvlist_t *fss;
547         nvlist_t *snapprops;
548         const char *fromsnap;
549         const char *tosnap;
550         boolean_t recursive;
551
552         /*
553          * The header nvlist is of the following format:
554          * {
555          *   "tosnap" -> string
556          *   "fromsnap" -> string (if incremental)
557          *   "fss" -> {
558          *      id -> {
559          *
560          *       "name" -> string (full name; for debugging)
561          *       "parentfromsnap" -> number (guid of fromsnap in parent)
562          *
563          *       "props" -> { name -> value (only if set here) }
564          *       "snaps" -> { name (lastname) -> number (guid) }
565          *       "snapprops" -> { name (lastname) -> { name -> value } }
566          *
567          *       "origin" -> number (guid) (if clone)
568          *       "sent" -> boolean (not on-disk)
569          *      }
570          *   }
571          * }
572          *
573          */
574 } send_data_t;
575
576 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
577
578 static int
579 send_iterate_snap(zfs_handle_t *zhp, void *arg)
580 {
581         send_data_t *sd = arg;
582         uint64_t guid = zhp->zfs_dmustats.dds_guid;
583         char *snapname;
584         nvlist_t *nv;
585
586         snapname = strrchr(zhp->zfs_name, '@')+1;
587
588         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
589         /*
590          * NB: if there is no fromsnap here (it's a newly created fs in
591          * an incremental replication), we will substitute the tosnap.
592          */
593         if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
594             (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
595             strcmp(snapname, sd->tosnap) == 0)) {
596                 sd->parent_fromsnap_guid = guid;
597         }
598
599         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
600         send_iterate_prop(zhp, nv);
601         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
602         nvlist_free(nv);
603
604         zfs_close(zhp);
605         return (0);
606 }
607
608 static void
609 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
610 {
611         nvpair_t *elem = NULL;
612
613         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
614                 char *propname = nvpair_name(elem);
615                 zfs_prop_t prop = zfs_name_to_prop(propname);
616                 nvlist_t *propnv;
617
618                 if (!zfs_prop_user(propname)) {
619                         /*
620                          * Realistically, this should never happen.  However,
621                          * we want the ability to add DSL properties without
622                          * needing to make incompatible version changes.  We
623                          * need to ignore unknown properties to allow older
624                          * software to still send datasets containing these
625                          * properties, with the unknown properties elided.
626                          */
627                         if (prop == ZPROP_INVAL)
628                                 continue;
629
630                         if (zfs_prop_readonly(prop))
631                                 continue;
632                 }
633
634                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
635                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
636                     prop == ZFS_PROP_REFQUOTA ||
637                     prop == ZFS_PROP_REFRESERVATION) {
638                         char *source;
639                         uint64_t value;
640                         verify(nvlist_lookup_uint64(propnv,
641                             ZPROP_VALUE, &value) == 0);
642                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
643                                 continue;
644                         /*
645                          * May have no source before SPA_VERSION_RECVD_PROPS,
646                          * but is still modifiable.
647                          */
648                         if (nvlist_lookup_string(propnv,
649                             ZPROP_SOURCE, &source) == 0) {
650                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
651                                     (strcmp(source,
652                                     ZPROP_SOURCE_VAL_RECVD) != 0))
653                                         continue;
654                         }
655                 } else {
656                         char *source;
657                         if (nvlist_lookup_string(propnv,
658                             ZPROP_SOURCE, &source) != 0)
659                                 continue;
660                         if ((strcmp(source, zhp->zfs_name) != 0) &&
661                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
662                                 continue;
663                 }
664
665                 if (zfs_prop_user(propname) ||
666                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
667                         char *value;
668                         verify(nvlist_lookup_string(propnv,
669                             ZPROP_VALUE, &value) == 0);
670                         VERIFY(0 == nvlist_add_string(nv, propname, value));
671                 } else {
672                         uint64_t value;
673                         verify(nvlist_lookup_uint64(propnv,
674                             ZPROP_VALUE, &value) == 0);
675                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
676                 }
677         }
678 }
679
680 /*
681  * recursively generate nvlists describing datasets.  See comment
682  * for the data structure send_data_t above for description of contents
683  * of the nvlist.
684  */
685 static int
686 send_iterate_fs(zfs_handle_t *zhp, void *arg)
687 {
688         send_data_t *sd = arg;
689         nvlist_t *nvfs, *nv;
690         int rv = 0;
691         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
692         uint64_t guid = zhp->zfs_dmustats.dds_guid;
693         char guidstring[64];
694
695         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
696         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
697         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
698             sd->parent_fromsnap_guid));
699
700         if (zhp->zfs_dmustats.dds_origin[0]) {
701                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
702                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
703                 if (origin == NULL)
704                         return (-1);
705                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
706                     origin->zfs_dmustats.dds_guid));
707         }
708
709         /* iterate over props */
710         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
711         send_iterate_prop(zhp, nv);
712         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
713         nvlist_free(nv);
714
715         /* iterate over snaps, and set sd->parent_fromsnap_guid */
716         sd->parent_fromsnap_guid = 0;
717         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
718         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
719         (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
720         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
721         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
722         nvlist_free(sd->parent_snaps);
723         nvlist_free(sd->snapprops);
724
725         /* add this fs to nvlist */
726         (void) snprintf(guidstring, sizeof (guidstring),
727             "0x%llx", (longlong_t)guid);
728         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
729         nvlist_free(nvfs);
730
731         /* iterate over children */
732         if (sd->recursive)
733                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
734
735         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
736
737         zfs_close(zhp);
738         return (rv);
739 }
740
741 static int
742 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
743     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
744 {
745         zfs_handle_t *zhp;
746         send_data_t sd = { 0 };
747         int error;
748
749         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
750         if (zhp == NULL)
751                 return (EZFS_BADTYPE);
752
753         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
754         sd.fromsnap = fromsnap;
755         sd.tosnap = tosnap;
756         sd.recursive = recursive;
757
758         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
759                 nvlist_free(sd.fss);
760                 if (avlp != NULL)
761                         *avlp = NULL;
762                 *nvlp = NULL;
763                 return (error);
764         }
765
766         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
767                 nvlist_free(sd.fss);
768                 *nvlp = NULL;
769                 return (EZFS_NOMEM);
770         }
771
772         *nvlp = sd.fss;
773         return (0);
774 }
775
776 /*
777  * Routines for dealing with the sorted snapshot functionality
778  */
779 typedef struct zfs_node {
780         zfs_handle_t    *zn_handle;
781         avl_node_t      zn_avlnode;
782 } zfs_node_t;
783
784 static int
785 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
786 {
787         avl_tree_t *avl = data;
788         zfs_node_t *node;
789         zfs_node_t search;
790
791         search.zn_handle = zhp;
792         node = avl_find(avl, &search, NULL);
793         if (node) {
794                 /*
795                  * If this snapshot was renamed while we were creating the
796                  * AVL tree, it's possible that we already inserted it under
797                  * its old name. Remove the old handle before adding the new
798                  * one.
799                  */
800                 zfs_close(node->zn_handle);
801                 avl_remove(avl, node);
802                 free(node);
803         }
804
805         node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
806         node->zn_handle = zhp;
807         avl_add(avl, node);
808
809         return (0);
810 }
811
812 static int
813 zfs_snapshot_compare(const void *larg, const void *rarg)
814 {
815         zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
816         zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
817         uint64_t lcreate, rcreate;
818
819         /*
820          * Sort them according to creation time.  We use the hidden
821          * CREATETXG property to get an absolute ordering of snapshots.
822          */
823         lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
824         rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
825
826         if (lcreate < rcreate)
827                 return (-1);
828         else if (lcreate > rcreate)
829                 return (+1);
830         else
831                 return (0);
832 }
833
834 int
835 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
836 {
837         int ret = 0;
838         zfs_node_t *node;
839         avl_tree_t avl;
840         void *cookie = NULL;
841
842         avl_create(&avl, zfs_snapshot_compare,
843             sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
844
845         ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
846
847         for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
848                 ret |= callback(node->zn_handle, data);
849
850         while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
851                 free(node);
852
853         avl_destroy(&avl);
854
855         return (ret);
856 }
857
858 /*
859  * Routines specific to "zfs send"
860  */
861 typedef struct send_dump_data {
862         /* these are all just the short snapname (the part after the @) */
863         const char *fromsnap;
864         const char *tosnap;
865         char prevsnap[ZFS_MAXNAMELEN];
866         uint64_t prevsnap_obj;
867         boolean_t seenfrom, seento, replicate, doall, fromorigin;
868         boolean_t verbose;
869         int outfd;
870         boolean_t err;
871         nvlist_t *fss;
872         avl_tree_t *fsavl;
873         snapfilter_cb_t *filter_cb;
874         void *filter_cb_arg;
875         nvlist_t *debugnv;
876         char holdtag[ZFS_MAXNAMELEN];
877         int cleanup_fd;
878 } send_dump_data_t;
879
880 /*
881  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
882  * NULL) to the file descriptor specified by outfd.
883  */
884 static int
885 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
886     boolean_t fromorigin, int outfd, nvlist_t *debugnv)
887 {
888         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
889         libzfs_handle_t *hdl = zhp->zfs_hdl;
890         nvlist_t *thisdbg;
891
892         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
893         assert(fromsnap_obj == 0 || !fromorigin);
894
895         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
896         zc.zc_cookie = outfd;
897         zc.zc_obj = fromorigin;
898         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
899         zc.zc_fromobj = fromsnap_obj;
900
901         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
902         if (fromsnap && fromsnap[0] != '\0') {
903                 VERIFY(0 == nvlist_add_string(thisdbg,
904                     "fromsnap", fromsnap));
905         }
906
907         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
908                 char errbuf[1024];
909                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
910                     "warning: cannot send '%s'"), zhp->zfs_name);
911
912                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
913                 if (debugnv) {
914                         VERIFY(0 == nvlist_add_nvlist(debugnv,
915                             zhp->zfs_name, thisdbg));
916                 }
917                 nvlist_free(thisdbg);
918
919                 switch (errno) {
920
921                 case EXDEV:
922                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
923                             "not an earlier snapshot from the same fs"));
924                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
925
926                 case ENOENT:
927                         if (zfs_dataset_exists(hdl, zc.zc_name,
928                             ZFS_TYPE_SNAPSHOT)) {
929                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
930                                     "incremental source (@%s) does not exist"),
931                                     zc.zc_value);
932                         }
933                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
934
935                 case EDQUOT:
936                 case EFBIG:
937                 case EIO:
938                 case ENOLINK:
939                 case ENOSPC:
940                 case ENOSTR:
941                 case ENXIO:
942                 case EPIPE:
943                 case ERANGE:
944                 case EFAULT:
945                 case EROFS:
946                         zfs_error_aux(hdl, strerror(errno));
947                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
948
949                 default:
950                         return (zfs_standard_error(hdl, errno, errbuf));
951                 }
952         }
953
954         if (debugnv)
955                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
956         nvlist_free(thisdbg);
957
958         return (0);
959 }
960
961 static int
962 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
963 {
964         zfs_handle_t *pzhp;
965         int error = 0;
966         char *thissnap;
967
968         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
969
970         /*
971          * zfs_send() only opens a cleanup_fd for sends that need it,
972          * e.g. replication and doall.
973          */
974         if (sdd->cleanup_fd == -1)
975                 return (0);
976
977         thissnap = strchr(zhp->zfs_name, '@') + 1;
978         *(thissnap - 1) = '\0';
979         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
980         *(thissnap - 1) = '@';
981
982         /*
983          * It's OK if the parent no longer exists.  The send code will
984          * handle that error.
985          */
986         if (pzhp) {
987                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
988                     B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
989                     zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
990                     zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
991                 zfs_close(pzhp);
992         }
993
994         return (error);
995 }
996
997 static int
998 dump_snapshot(zfs_handle_t *zhp, void *arg)
999 {
1000         send_dump_data_t *sdd = arg;
1001         char *thissnap;
1002         int err;
1003         boolean_t isfromsnap, istosnap;
1004         boolean_t exclude = B_FALSE;
1005
1006         thissnap = strchr(zhp->zfs_name, '@') + 1;
1007         isfromsnap = (sdd->fromsnap != NULL &&
1008             strcmp(sdd->fromsnap, thissnap) == 0);
1009
1010         if (!sdd->seenfrom && isfromsnap) {
1011                 err = hold_for_send(zhp, sdd);
1012                 if (err == 0) {
1013                         sdd->seenfrom = B_TRUE;
1014                         (void) strcpy(sdd->prevsnap, thissnap);
1015                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1016                             ZFS_PROP_OBJSETID);
1017                 } else if (err == ENOENT) {
1018                         err = 0;
1019                 }
1020                 zfs_close(zhp);
1021                 return (err);
1022         }
1023
1024         if (sdd->seento || !sdd->seenfrom) {
1025                 zfs_close(zhp);
1026                 return (0);
1027         }
1028
1029         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1030         if (istosnap)
1031                 sdd->seento = B_TRUE;
1032
1033         if (!sdd->doall && !isfromsnap && !istosnap) {
1034                 if (sdd->replicate) {
1035                         char *snapname;
1036                         nvlist_t *snapprops;
1037                         /*
1038                          * Filter out all intermediate snapshots except origin
1039                          * snapshots needed to replicate clones.
1040                          */
1041                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1042                             zhp->zfs_dmustats.dds_guid, &snapname);
1043
1044                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1045                             "snapprops", &snapprops));
1046                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1047                             thissnap, &snapprops));
1048                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1049                 } else {
1050                         exclude = B_TRUE;
1051                 }
1052         }
1053
1054         /*
1055          * If a filter function exists, call it to determine whether
1056          * this snapshot will be sent.
1057          */
1058         if (exclude || (sdd->filter_cb != NULL &&
1059             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1060                 /*
1061                  * This snapshot is filtered out.  Don't send it, and don't
1062                  * set prevsnap_obj, so it will be as if this snapshot didn't
1063                  * exist, and the next accepted snapshot will be sent as
1064                  * an incremental from the last accepted one, or as the
1065                  * first (and full) snapshot in the case of a replication,
1066                  * non-incremental send.
1067                  */
1068                 zfs_close(zhp);
1069                 return (0);
1070         }
1071
1072         err = hold_for_send(zhp, sdd);
1073         if (err) {
1074                 if (err == ENOENT)
1075                         err = 0;
1076                 zfs_close(zhp);
1077                 return (err);
1078         }
1079
1080         /* send it */
1081         if (sdd->verbose) {
1082                 (void) fprintf(stderr, "sending from @%s to %s\n",
1083                     sdd->prevsnap, zhp->zfs_name);
1084         }
1085
1086         err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1087             sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
1088             sdd->outfd, sdd->debugnv);
1089
1090         (void) strcpy(sdd->prevsnap, thissnap);
1091         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1092         zfs_close(zhp);
1093         return (err);
1094 }
1095
1096 static int
1097 dump_filesystem(zfs_handle_t *zhp, void *arg)
1098 {
1099         int rv = 0;
1100         send_dump_data_t *sdd = arg;
1101         boolean_t missingfrom = B_FALSE;
1102         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1103
1104         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1105             zhp->zfs_name, sdd->tosnap);
1106         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1107                 (void) fprintf(stderr, "WARNING: "
1108                     "could not send %s@%s: does not exist\n",
1109                     zhp->zfs_name, sdd->tosnap);
1110                 sdd->err = B_TRUE;
1111                 return (0);
1112         }
1113
1114         if (sdd->replicate && sdd->fromsnap) {
1115                 /*
1116                  * If this fs does not have fromsnap, and we're doing
1117                  * recursive, we need to send a full stream from the
1118                  * beginning (or an incremental from the origin if this
1119                  * is a clone).  If we're doing non-recursive, then let
1120                  * them get the error.
1121                  */
1122                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1123                     zhp->zfs_name, sdd->fromsnap);
1124                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1125                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1126                         missingfrom = B_TRUE;
1127                 }
1128         }
1129
1130         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1131         sdd->prevsnap_obj = 0;
1132         if (sdd->fromsnap == NULL || missingfrom)
1133                 sdd->seenfrom = B_TRUE;
1134
1135         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1136         if (!sdd->seenfrom) {
1137                 (void) fprintf(stderr,
1138                     "WARNING: could not send %s@%s:\n"
1139                     "incremental source (%s@%s) does not exist\n",
1140                     zhp->zfs_name, sdd->tosnap,
1141                     zhp->zfs_name, sdd->fromsnap);
1142                 sdd->err = B_TRUE;
1143         } else if (!sdd->seento) {
1144                 if (sdd->fromsnap) {
1145                         (void) fprintf(stderr,
1146                             "WARNING: could not send %s@%s:\n"
1147                             "incremental source (%s@%s) "
1148                             "is not earlier than it\n",
1149                             zhp->zfs_name, sdd->tosnap,
1150                             zhp->zfs_name, sdd->fromsnap);
1151                 } else {
1152                         (void) fprintf(stderr, "WARNING: "
1153                             "could not send %s@%s: does not exist\n",
1154                             zhp->zfs_name, sdd->tosnap);
1155                 }
1156                 sdd->err = B_TRUE;
1157         }
1158
1159         return (rv);
1160 }
1161
1162 static int
1163 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1164 {
1165         send_dump_data_t *sdd = arg;
1166         nvpair_t *fspair;
1167         boolean_t needagain, progress;
1168
1169         if (!sdd->replicate)
1170                 return (dump_filesystem(rzhp, sdd));
1171
1172         /* Mark the clone origin snapshots. */
1173         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1174             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1175                 nvlist_t *nvfs;
1176                 uint64_t origin_guid = 0;
1177
1178                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1179                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1180                 if (origin_guid != 0) {
1181                         char *snapname;
1182                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1183                             origin_guid, &snapname);
1184                         if (origin_nv != NULL) {
1185                                 nvlist_t *snapprops;
1186                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1187                                     "snapprops", &snapprops));
1188                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1189                                     snapname, &snapprops));
1190                                 VERIFY(0 == nvlist_add_boolean(
1191                                     snapprops, "is_clone_origin"));
1192                         }
1193                 }
1194         }
1195 again:
1196         needagain = progress = B_FALSE;
1197         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1198             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1199                 nvlist_t *fslist;
1200                 char *fsname;
1201                 zfs_handle_t *zhp;
1202                 int err;
1203                 uint64_t origin_guid = 0;
1204
1205                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1206                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1207                         continue;
1208
1209                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1210                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1211
1212                 if (origin_guid != 0) {
1213                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1214                             origin_guid, NULL);
1215                         if (origin_nv != NULL &&
1216                             nvlist_lookup_boolean(origin_nv,
1217                             "sent") == ENOENT) {
1218                                 /*
1219                                  * origin has not been sent yet;
1220                                  * skip this clone.
1221                                  */
1222                                 needagain = B_TRUE;
1223                                 continue;
1224                         }
1225                 }
1226
1227                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1228                 if (zhp == NULL)
1229                         return (-1);
1230                 err = dump_filesystem(zhp, sdd);
1231                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1232                 progress = B_TRUE;
1233                 zfs_close(zhp);
1234                 if (err)
1235                         return (err);
1236         }
1237         if (needagain) {
1238                 assert(progress);
1239                 goto again;
1240         }
1241         return (0);
1242 }
1243
1244 /*
1245  * Generate a send stream for the dataset identified by the argument zhp.
1246  *
1247  * The content of the send stream is the snapshot identified by
1248  * 'tosnap'.  Incremental streams are requested in two ways:
1249  *     - from the snapshot identified by "fromsnap" (if non-null) or
1250  *     - from the origin of the dataset identified by zhp, which must
1251  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1252  *       is TRUE.
1253  *
1254  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1255  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1256  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1257  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1258  * case too. If "props" is set, send properties.
1259  */
1260 int
1261 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1262     sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
1263     void *cb_arg, nvlist_t **debugnvp)
1264 {
1265         char errbuf[1024];
1266         send_dump_data_t sdd = { 0 };
1267         int err;
1268         nvlist_t *fss = NULL;
1269         avl_tree_t *fsavl = NULL;
1270         static uint64_t holdseq;
1271         int spa_version;
1272         boolean_t holdsnaps = B_FALSE;
1273         pthread_t tid;
1274         int pipefd[2];
1275         dedup_arg_t dda = { 0 };
1276         int featureflags = 0;
1277
1278         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1279             "cannot send '%s'"), zhp->zfs_name);
1280
1281         if (fromsnap && fromsnap[0] == '\0') {
1282                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1283                     "zero-length incremental source"));
1284                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1285         }
1286
1287         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1288                 uint64_t version;
1289                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1290                 if (version >= ZPL_VERSION_SA) {
1291                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1292                 }
1293         }
1294
1295         if (zfs_spa_version(zhp, &spa_version) == 0 &&
1296             spa_version >= SPA_VERSION_USERREFS &&
1297             (flags.doall || flags.replicate))
1298                 holdsnaps = B_TRUE;
1299
1300         if (flags.dedup) {
1301                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1302                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1303                 if ((err = pipe(pipefd))) {
1304                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1305                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1306                             errbuf));
1307                 }
1308                 dda.outputfd = outfd;
1309                 dda.inputfd = pipefd[1];
1310                 dda.dedup_hdl = zhp->zfs_hdl;
1311                 if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
1312                         (void) close(pipefd[0]);
1313                         (void) close(pipefd[1]);
1314                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1315                         return (zfs_error(zhp->zfs_hdl,
1316                             EZFS_THREADCREATEFAILED, errbuf));
1317                 }
1318         }
1319
1320         if (flags.replicate || flags.doall || flags.props) {
1321                 dmu_replay_record_t drr = { 0 };
1322                 char *packbuf = NULL;
1323                 size_t buflen = 0;
1324                 zio_cksum_t zc = { { 0 } };
1325
1326                 if (flags.replicate || flags.props) {
1327                         nvlist_t *hdrnv;
1328
1329                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1330                         if (fromsnap) {
1331                                 VERIFY(0 == nvlist_add_string(hdrnv,
1332                                     "fromsnap", fromsnap));
1333                         }
1334                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1335                         if (!flags.replicate) {
1336                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1337                                     "not_recursive"));
1338                         }
1339
1340                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1341                             fromsnap, tosnap, flags.replicate, &fss, &fsavl);
1342                         if (err)
1343                                 goto err_out;
1344                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1345                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1346                             NV_ENCODE_XDR, 0);
1347                         if (debugnvp)
1348                                 *debugnvp = hdrnv;
1349                         else
1350                                 nvlist_free(hdrnv);
1351                         if (err) {
1352                                 fsavl_destroy(fsavl);
1353                                 nvlist_free(fss);
1354                                 goto stderr_out;
1355                         }
1356                 }
1357
1358                 /* write first begin record */
1359                 drr.drr_type = DRR_BEGIN;
1360                 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1361                 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
1362                     DMU_COMPOUNDSTREAM);
1363                 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
1364                     featureflags);
1365                 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1366                     sizeof (drr.drr_u.drr_begin.drr_toname),
1367                     "%s@%s", zhp->zfs_name, tosnap);
1368                 drr.drr_payloadlen = buflen;
1369                 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1370
1371                 /* write header nvlist */
1372                 if (err != -1 && packbuf != NULL) {
1373                         err = cksum_and_write(packbuf, buflen, &zc, outfd);
1374                 }
1375                 free(packbuf);
1376                 if (err == -1) {
1377                         fsavl_destroy(fsavl);
1378                         nvlist_free(fss);
1379                         err = errno;
1380                         goto stderr_out;
1381                 }
1382
1383                 /* write end record */
1384                 if (err != -1) {
1385                         bzero(&drr, sizeof (drr));
1386                         drr.drr_type = DRR_END;
1387                         drr.drr_u.drr_end.drr_checksum = zc;
1388                         err = write(outfd, &drr, sizeof (drr));
1389                         if (err == -1) {
1390                                 fsavl_destroy(fsavl);
1391                                 nvlist_free(fss);
1392                                 err = errno;
1393                                 goto stderr_out;
1394                         }
1395                 }
1396         }
1397
1398         /* dump each stream */
1399         sdd.fromsnap = fromsnap;
1400         sdd.tosnap = tosnap;
1401         if (flags.dedup)
1402                 sdd.outfd = pipefd[0];
1403         else
1404                 sdd.outfd = outfd;
1405         sdd.replicate = flags.replicate;
1406         sdd.doall = flags.doall;
1407         sdd.fromorigin = flags.fromorigin;
1408         sdd.fss = fss;
1409         sdd.fsavl = fsavl;
1410         sdd.verbose = flags.verbose;
1411         sdd.filter_cb = filter_func;
1412         sdd.filter_cb_arg = cb_arg;
1413         if (debugnvp)
1414                 sdd.debugnv = *debugnvp;
1415         if (holdsnaps) {
1416                 ++holdseq;
1417                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1418                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1419                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
1420                 if (sdd.cleanup_fd < 0) {
1421                         err = errno;
1422                         goto stderr_out;
1423                 }
1424         } else {
1425                 sdd.cleanup_fd = -1;
1426         }
1427         err = dump_filesystems(zhp, &sdd);
1428         fsavl_destroy(fsavl);
1429         nvlist_free(fss);
1430
1431         if (flags.dedup) {
1432                 (void) close(pipefd[0]);
1433                 (void) pthread_join(tid, NULL);
1434         }
1435
1436         if (sdd.cleanup_fd != -1) {
1437                 VERIFY(0 == close(sdd.cleanup_fd));
1438                 sdd.cleanup_fd = -1;
1439         }
1440
1441         if (flags.replicate || flags.doall || flags.props) {
1442                 /*
1443                  * write final end record.  NB: want to do this even if
1444                  * there was some error, because it might not be totally
1445                  * failed.
1446                  */
1447                 dmu_replay_record_t drr = { 0 };
1448                 drr.drr_type = DRR_END;
1449                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1450                         return (zfs_standard_error(zhp->zfs_hdl,
1451                             errno, errbuf));
1452                 }
1453         }
1454
1455         return (err || sdd.err);
1456
1457 stderr_out:
1458         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1459 err_out:
1460         if (sdd.cleanup_fd != -1)
1461                 VERIFY(0 == close(sdd.cleanup_fd));
1462         if (flags.dedup) {
1463                 (void) pthread_cancel(tid);
1464                 (void) pthread_join(tid, NULL);
1465                 (void) close(pipefd[0]);
1466         }
1467         return (err);
1468 }
1469
1470 /*
1471  * Routines specific to "zfs recv"
1472  */
1473
1474 static int
1475 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1476     boolean_t byteswap, zio_cksum_t *zc)
1477 {
1478         char *cp = buf;
1479         int rv;
1480         int len = ilen;
1481
1482         do {
1483                 rv = read(fd, cp, len);
1484                 cp += rv;
1485                 len -= rv;
1486         } while (rv > 0);
1487
1488         if (rv < 0 || len != 0) {
1489                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1490                     "failed to read from stream"));
1491                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1492                     "cannot receive")));
1493         }
1494
1495         if (zc) {
1496                 if (byteswap)
1497                         fletcher_4_incremental_byteswap(buf, ilen, zc);
1498                 else
1499                         fletcher_4_incremental_native(buf, ilen, zc);
1500         }
1501         return (0);
1502 }
1503
1504 static int
1505 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1506     boolean_t byteswap, zio_cksum_t *zc)
1507 {
1508         char *buf;
1509         int err;
1510
1511         buf = zfs_alloc(hdl, len);
1512         if (buf == NULL)
1513                 return (ENOMEM);
1514
1515         err = recv_read(hdl, fd, buf, len, byteswap, zc);
1516         if (err != 0) {
1517                 free(buf);
1518                 return (err);
1519         }
1520
1521         err = nvlist_unpack(buf, len, nvp, 0);
1522         free(buf);
1523         if (err != 0) {
1524                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1525                     "stream (malformed nvlist)"));
1526                 return (EINVAL);
1527         }
1528         return (0);
1529 }
1530
1531 static int
1532 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1533     int baselen, char *newname, recvflags_t flags)
1534 {
1535         static int seq;
1536         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1537         int err;
1538         prop_changelist_t *clp;
1539         zfs_handle_t *zhp;
1540
1541         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1542         if (zhp == NULL)
1543                 return (-1);
1544         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1545             flags.force ? MS_FORCE : 0);
1546         zfs_close(zhp);
1547         if (clp == NULL)
1548                 return (-1);
1549         err = changelist_prefix(clp);
1550         if (err)
1551                 return (err);
1552
1553         zc.zc_objset_type = DMU_OST_ZFS;
1554         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1555
1556         if (tryname) {
1557                 (void) strcpy(newname, tryname);
1558
1559                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1560
1561                 if (flags.verbose) {
1562                         (void) printf("attempting rename %s to %s\n",
1563                             zc.zc_name, zc.zc_value);
1564                 }
1565                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1566                 if (err == 0)
1567                         changelist_rename(clp, name, tryname);
1568         } else {
1569                 err = ENOENT;
1570         }
1571
1572         if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
1573                 seq++;
1574
1575                 (void) strncpy(newname, name, baselen);
1576                 (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
1577                     "recv-%ld-%u", (long) getpid(), seq);
1578                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1579
1580                 if (flags.verbose) {
1581                         (void) printf("failed - trying rename %s to %s\n",
1582                             zc.zc_name, zc.zc_value);
1583                 }
1584                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1585                 if (err == 0)
1586                         changelist_rename(clp, name, newname);
1587                 if (err && flags.verbose) {
1588                         (void) printf("failed (%u) - "
1589                             "will try again on next pass\n", errno);
1590                 }
1591                 err = EAGAIN;
1592         } else if (flags.verbose) {
1593                 if (err == 0)
1594                         (void) printf("success\n");
1595                 else
1596                         (void) printf("failed (%u)\n", errno);
1597         }
1598
1599         (void) changelist_postfix(clp);
1600         changelist_free(clp);
1601
1602         return (err);
1603 }
1604
1605 static int
1606 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1607     char *newname, recvflags_t flags)
1608 {
1609         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1610         int err = 0;
1611         prop_changelist_t *clp;
1612         zfs_handle_t *zhp;
1613         boolean_t defer = B_FALSE;
1614         int spa_version;
1615
1616         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1617         if (zhp == NULL)
1618                 return (-1);
1619         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1620             flags.force ? MS_FORCE : 0);
1621         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1622             zfs_spa_version(zhp, &spa_version) == 0 &&
1623             spa_version >= SPA_VERSION_USERREFS)
1624                 defer = B_TRUE;
1625         zfs_close(zhp);
1626         if (clp == NULL)
1627                 return (-1);
1628         err = changelist_prefix(clp);
1629         if (err)
1630                 return (err);
1631
1632         zc.zc_objset_type = DMU_OST_ZFS;
1633         zc.zc_defer_destroy = defer;
1634         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1635
1636         if (flags.verbose)
1637                 (void) printf("attempting destroy %s\n", zc.zc_name);
1638         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1639         if (err == 0) {
1640                 if (flags.verbose)
1641                         (void) printf("success\n");
1642                 changelist_remove(clp, zc.zc_name);
1643         }
1644
1645         (void) changelist_postfix(clp);
1646         changelist_free(clp);
1647
1648         /*
1649          * Deferred destroy might destroy the snapshot or only mark it to be
1650          * destroyed later, and it returns success in either case.
1651          */
1652         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1653             ZFS_TYPE_SNAPSHOT))) {
1654                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1655         }
1656
1657         return (err);
1658 }
1659
1660 typedef struct guid_to_name_data {
1661         uint64_t guid;
1662         char *name;
1663 } guid_to_name_data_t;
1664
1665 static int
1666 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1667 {
1668         guid_to_name_data_t *gtnd = arg;
1669         int err;
1670
1671         if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1672                 (void) strcpy(gtnd->name, zhp->zfs_name);
1673                 zfs_close(zhp);
1674                 return (EEXIST);
1675         }
1676         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1677         zfs_close(zhp);
1678         return (err);
1679 }
1680
1681 static int
1682 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1683     char *name)
1684 {
1685         /* exhaustive search all local snapshots */
1686         guid_to_name_data_t gtnd;
1687         int err = 0;
1688         zfs_handle_t *zhp;
1689         char *cp;
1690
1691         gtnd.guid = guid;
1692         gtnd.name = name;
1693
1694         if (strchr(parent, '@') == NULL) {
1695                 zhp = make_dataset_handle(hdl, parent);
1696                 if (zhp != NULL) {
1697                         err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1698                         zfs_close(zhp);
1699                         if (err == EEXIST)
1700                                 return (0);
1701                 }
1702         }
1703
1704         cp = strchr(parent, '/');
1705         if (cp)
1706                 *cp = '\0';
1707         zhp = make_dataset_handle(hdl, parent);
1708         if (cp)
1709                 *cp = '/';
1710
1711         if (zhp) {
1712                 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1713                 zfs_close(zhp);
1714         }
1715
1716         return (err == EEXIST ? 0 : ENOENT);
1717
1718 }
1719
1720 /*
1721  * Return true if dataset guid1 is created before guid2.
1722  */
1723 static int
1724 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1725     uint64_t guid1, uint64_t guid2)
1726 {
1727         nvlist_t *nvfs;
1728         char *fsname, *snapname;
1729         char buf[ZFS_MAXNAMELEN];
1730         int rv;
1731         zfs_node_t zn1, zn2;
1732
1733         if (guid2 == 0)
1734                 return (0);
1735         if (guid1 == 0)
1736                 return (1);
1737
1738         nvfs = fsavl_find(avl, guid1, &snapname);
1739         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1740         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1741         zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1742         if (zn1.zn_handle == NULL)
1743                 return (-1);
1744
1745         nvfs = fsavl_find(avl, guid2, &snapname);
1746         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1747         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1748         zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1749         if (zn2.zn_handle == NULL) {
1750                 zfs_close(zn2.zn_handle);
1751                 return (-1);
1752         }
1753
1754         rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1755
1756         zfs_close(zn1.zn_handle);
1757         zfs_close(zn2.zn_handle);
1758
1759         return (rv);
1760 }
1761
1762 static int
1763 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1764     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
1765     nvlist_t *renamed)
1766 {
1767         nvlist_t *local_nv;
1768         avl_tree_t *local_avl;
1769         nvpair_t *fselem, *nextfselem;
1770         char *fromsnap;
1771         char newname[ZFS_MAXNAMELEN];
1772         int error;
1773         boolean_t needagain, progress, recursive;
1774         char *s1, *s2;
1775
1776         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1777
1778         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
1779             ENOENT);
1780
1781         if (flags.dryrun)
1782                 return (0);
1783
1784 again:
1785         needagain = progress = B_FALSE;
1786
1787         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1788             recursive, &local_nv, &local_avl)) != 0)
1789                 return (error);
1790
1791         /*
1792          * Process deletes and renames
1793          */
1794         for (fselem = nvlist_next_nvpair(local_nv, NULL);
1795             fselem; fselem = nextfselem) {
1796                 nvlist_t *nvfs, *snaps;
1797                 nvlist_t *stream_nvfs = NULL;
1798                 nvpair_t *snapelem, *nextsnapelem;
1799                 uint64_t fromguid = 0;
1800                 uint64_t originguid = 0;
1801                 uint64_t stream_originguid = 0;
1802                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1803                 char *fsname, *stream_fsname;
1804
1805                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
1806
1807                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1808                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1809                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1810                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1811                     &parent_fromsnap_guid));
1812                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1813
1814                 /*
1815                  * First find the stream's fs, so we can check for
1816                  * a different origin (due to "zfs promote")
1817                  */
1818                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1819                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1820                         uint64_t thisguid;
1821
1822                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1823                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1824
1825                         if (stream_nvfs != NULL)
1826                                 break;
1827                 }
1828
1829                 /* check for promote */
1830                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
1831                     &stream_originguid);
1832                 if (stream_nvfs && originguid != stream_originguid) {
1833                         switch (created_before(hdl, local_avl,
1834                             stream_originguid, originguid)) {
1835                         case 1: {
1836                                 /* promote it! */
1837                                 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1838                                 nvlist_t *origin_nvfs;
1839                                 char *origin_fsname;
1840
1841                                 if (flags.verbose)
1842                                         (void) printf("promoting %s\n", fsname);
1843
1844                                 origin_nvfs = fsavl_find(local_avl, originguid,
1845                                     NULL);
1846                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1847                                     "name", &origin_fsname));
1848                                 (void) strlcpy(zc.zc_value, origin_fsname,
1849                                     sizeof (zc.zc_value));
1850                                 (void) strlcpy(zc.zc_name, fsname,
1851                                     sizeof (zc.zc_name));
1852                                 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1853                                 if (error == 0)
1854                                         progress = B_TRUE;
1855                                 break;
1856                         }
1857                         default:
1858                                 break;
1859                         case -1:
1860                                 fsavl_destroy(local_avl);
1861                                 nvlist_free(local_nv);
1862                                 return (-1);
1863                         }
1864                         /*
1865                          * We had/have the wrong origin, therefore our
1866                          * list of snapshots is wrong.  Need to handle
1867                          * them on the next pass.
1868                          */
1869                         needagain = B_TRUE;
1870                         continue;
1871                 }
1872
1873                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1874                     snapelem; snapelem = nextsnapelem) {
1875                         uint64_t thisguid;
1876                         char *stream_snapname;
1877                         nvlist_t *found, *props;
1878
1879                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1880
1881                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1882                         found = fsavl_find(stream_avl, thisguid,
1883                             &stream_snapname);
1884
1885                         /* check for delete */
1886                         if (found == NULL) {
1887                                 char name[ZFS_MAXNAMELEN];
1888
1889                                 if (!flags.force)
1890                                         continue;
1891
1892                                 (void) snprintf(name, sizeof (name), "%s@%s",
1893                                     fsname, nvpair_name(snapelem));
1894
1895                                 error = recv_destroy(hdl, name,
1896                                     strlen(fsname)+1, newname, flags);
1897                                 if (error)
1898                                         needagain = B_TRUE;
1899                                 else
1900                                         progress = B_TRUE;
1901                                 continue;
1902                         }
1903
1904                         stream_nvfs = found;
1905
1906                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1907                             &props) && 0 == nvlist_lookup_nvlist(props,
1908                             stream_snapname, &props)) {
1909                                 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1910
1911                                 zc.zc_cookie = B_TRUE; /* received */
1912                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1913                                     "%s@%s", fsname, nvpair_name(snapelem));
1914                                 if (zcmd_write_src_nvlist(hdl, &zc,
1915                                     props) == 0) {
1916                                         (void) zfs_ioctl(hdl,
1917                                             ZFS_IOC_SET_PROP, &zc);
1918                                         zcmd_free_nvlists(&zc);
1919                                 }
1920                         }
1921
1922                         /* check for different snapname */
1923                         if (strcmp(nvpair_name(snapelem),
1924                             stream_snapname) != 0) {
1925                                 char name[ZFS_MAXNAMELEN];
1926                                 char tryname[ZFS_MAXNAMELEN];
1927
1928                                 (void) snprintf(name, sizeof (name), "%s@%s",
1929                                     fsname, nvpair_name(snapelem));
1930                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
1931                                     fsname, stream_snapname);
1932
1933                                 error = recv_rename(hdl, name, tryname,
1934                                     strlen(fsname)+1, newname, flags);
1935                                 if (error)
1936                                         needagain = B_TRUE;
1937                                 else
1938                                         progress = B_TRUE;
1939                         }
1940
1941                         if (strcmp(stream_snapname, fromsnap) == 0)
1942                                 fromguid = thisguid;
1943                 }
1944
1945                 /* check for delete */
1946                 if (stream_nvfs == NULL) {
1947                         if (!flags.force)
1948                                 continue;
1949
1950                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1951                             newname, flags);
1952                         if (error)
1953                                 needagain = B_TRUE;
1954                         else
1955                                 progress = B_TRUE;
1956                         continue;
1957                 }
1958
1959                 if (fromguid == 0) {
1960                         if (flags.verbose) {
1961                                 (void) printf("local fs %s does not have "
1962                                     "fromsnap (%s in stream); must have "
1963                                     "been deleted locally; ignoring\n",
1964                                     fsname, fromsnap);
1965                         }
1966                         continue;
1967                 }
1968
1969                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1970                     "name", &stream_fsname));
1971                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1972                     "parentfromsnap", &stream_parent_fromsnap_guid));
1973
1974                 s1 = strrchr(fsname, '/');
1975                 s2 = strrchr(stream_fsname, '/');
1976
1977                 /*
1978                  * Check for rename. If the exact receive path is specified, it
1979                  * does not count as a rename, but we still need to check the
1980                  * datasets beneath it.
1981                  */
1982                 if ((stream_parent_fromsnap_guid != 0 &&
1983                     parent_fromsnap_guid != 0 &&
1984                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1985                     ((flags.isprefix || strcmp(tofs, fsname) != 0) &&
1986                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
1987                         nvlist_t *parent;
1988                         char tryname[ZFS_MAXNAMELEN];
1989
1990                         parent = fsavl_find(local_avl,
1991                             stream_parent_fromsnap_guid, NULL);
1992                         /*
1993                          * NB: parent might not be found if we used the
1994                          * tosnap for stream_parent_fromsnap_guid,
1995                          * because the parent is a newly-created fs;
1996                          * we'll be able to rename it after we recv the
1997                          * new fs.
1998                          */
1999                         if (parent != NULL) {
2000                                 char *pname;
2001
2002                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
2003                                     &pname));
2004                                 (void) snprintf(tryname, sizeof (tryname),
2005                                     "%s%s", pname, strrchr(stream_fsname, '/'));
2006                         } else {
2007                                 tryname[0] = '\0';
2008                                 if (flags.verbose) {
2009                                         (void) printf("local fs %s new parent "
2010                                             "not found\n", fsname);
2011                                 }
2012                         }
2013
2014                         newname[0] = '\0';
2015
2016                         error = recv_rename(hdl, fsname, tryname,
2017                             strlen(tofs)+1, newname, flags);
2018
2019                         if (renamed != NULL && newname[0] != '\0') {
2020                                 VERIFY(0 == nvlist_add_boolean(renamed,
2021                                     newname));
2022                         }
2023
2024                         if (error)
2025                                 needagain = B_TRUE;
2026                         else
2027                                 progress = B_TRUE;
2028                 }
2029         }
2030
2031         fsavl_destroy(local_avl);
2032         nvlist_free(local_nv);
2033
2034         if (needagain && progress) {
2035                 /* do another pass to fix up temporary names */
2036                 if (flags.verbose)
2037                         (void) printf("another pass:\n");
2038                 goto again;
2039         }
2040
2041         return (needagain);
2042 }
2043
2044 static int
2045 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2046     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2047     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2048 {
2049         nvlist_t *stream_nv = NULL;
2050         avl_tree_t *stream_avl = NULL;
2051         char *fromsnap = NULL;
2052         char *cp;
2053         char tofs[ZFS_MAXNAMELEN];
2054         char sendfs[ZFS_MAXNAMELEN];
2055         char errbuf[1024];
2056         dmu_replay_record_t drre;
2057         int error;
2058         boolean_t anyerr = B_FALSE;
2059         boolean_t softerr = B_FALSE;
2060         boolean_t recursive;
2061
2062         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2063             "cannot receive"));
2064
2065         assert(drr->drr_type == DRR_BEGIN);
2066         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2067         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2068             DMU_COMPOUNDSTREAM);
2069
2070         /*
2071          * Read in the nvlist from the stream.
2072          */
2073         if (drr->drr_payloadlen != 0) {
2074                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2075                     &stream_nv, flags.byteswap, zc);
2076                 if (error) {
2077                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2078                         goto out;
2079                 }
2080         }
2081
2082         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2083             ENOENT);
2084
2085         if (recursive && strchr(destname, '@')) {
2086                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2087                     "cannot specify snapshot name for multi-snapshot stream"));
2088                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2089                 goto out;
2090         }
2091
2092         /*
2093          * Read in the end record and verify checksum.
2094          */
2095         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2096             flags.byteswap, NULL)))
2097                 goto out;
2098         if (flags.byteswap) {
2099                 drre.drr_type = BSWAP_32(drre.drr_type);
2100                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2101                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2102                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2103                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2104                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2105                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2106                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2107                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2108         }
2109         if (drre.drr_type != DRR_END) {
2110                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2111                 goto out;
2112         }
2113         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2114                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2115                     "incorrect header checksum"));
2116                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2117                 goto out;
2118         }
2119
2120         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2121
2122         if (drr->drr_payloadlen != 0) {
2123                 nvlist_t *stream_fss;
2124
2125                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2126                     &stream_fss));
2127                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2128                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2129                             "couldn't allocate avl tree"));
2130                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2131                         goto out;
2132                 }
2133
2134                 if (fromsnap != NULL) {
2135                         nvlist_t *renamed = NULL;
2136                         nvpair_t *pair = NULL;
2137
2138                         (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2139                         if (flags.isprefix) {
2140                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2141                                 int i;
2142
2143                                 if (flags.istail) {
2144                                         cp = strrchr(drrb->drr_toname, '/');
2145                                         if (cp == NULL) {
2146                                                 (void) strlcat(tofs, "/",
2147                                                     ZFS_MAXNAMELEN);
2148                                                 i = 0;
2149                                         } else {
2150                                                 i = (cp - drrb->drr_toname);
2151                                         }
2152                                 } else {
2153                                         i = strcspn(drrb->drr_toname, "/@");
2154                                 }
2155                                 /* zfs_receive_one() will create_parents() */
2156                                 (void) strlcat(tofs, &drrb->drr_toname[i],
2157                                     ZFS_MAXNAMELEN);
2158                                 *strchr(tofs, '@') = '\0';
2159                         }
2160
2161                         if (recursive && !flags.dryrun && !flags.nomount) {
2162                                 VERIFY(0 == nvlist_alloc(&renamed,
2163                                     NV_UNIQUE_NAME, 0));
2164                         }
2165
2166                         softerr = recv_incremental_replication(hdl, tofs, flags,
2167                             stream_nv, stream_avl, renamed);
2168
2169                         /* Unmount renamed filesystems before receiving. */
2170                         while ((pair = nvlist_next_nvpair(renamed,
2171                             pair)) != NULL) {
2172                                 zfs_handle_t *zhp;
2173                                 prop_changelist_t *clp = NULL;
2174
2175                                 zhp = zfs_open(hdl, nvpair_name(pair),
2176                                     ZFS_TYPE_FILESYSTEM);
2177                                 if (zhp != NULL) {
2178                                         clp = changelist_gather(zhp,
2179                                             ZFS_PROP_MOUNTPOINT, 0, 0);
2180                                         zfs_close(zhp);
2181                                         if (clp != NULL) {
2182                                                 softerr |=
2183                                                     changelist_prefix(clp);
2184                                                 changelist_free(clp);
2185                                         }
2186                                 }
2187                         }
2188
2189                         nvlist_free(renamed);
2190                 }
2191         }
2192
2193         /*
2194          * Get the fs specified by the first path in the stream (the top level
2195          * specified by 'zfs send') and pass it to each invocation of
2196          * zfs_receive_one().
2197          */
2198         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2199             ZFS_MAXNAMELEN);
2200         if ((cp = strchr(sendfs, '@')) != NULL)
2201                 *cp = '\0';
2202
2203         /* Finally, receive each contained stream */
2204         do {
2205                 /*
2206                  * we should figure out if it has a recoverable
2207                  * error, in which case do a recv_skip() and drive on.
2208                  * Note, if we fail due to already having this guid,
2209                  * zfs_receive_one() will take care of it (ie,
2210                  * recv_skip() and return 0).
2211                  */
2212                 error = zfs_receive_impl(hdl, destname, flags, fd,
2213                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2214                     action_handlep);
2215                 if (error == ENODATA) {
2216                         error = 0;
2217                         break;
2218                 }
2219                 anyerr |= error;
2220         } while (error == 0);
2221
2222         if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2223                 /*
2224                  * Now that we have the fs's they sent us, try the
2225                  * renames again.
2226                  */
2227                 softerr = recv_incremental_replication(hdl, tofs, flags,
2228                     stream_nv, stream_avl, NULL);
2229         }
2230
2231 out:
2232         fsavl_destroy(stream_avl);
2233         if (stream_nv)
2234                 nvlist_free(stream_nv);
2235         if (softerr)
2236                 error = -2;
2237         if (anyerr)
2238                 error = -1;
2239         return (error);
2240 }
2241
2242 static void
2243 trunc_prop_errs(int truncated)
2244 {
2245         ASSERT(truncated != 0);
2246
2247         if (truncated == 1)
2248                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2249                     "1 more property could not be set\n"));
2250         else
2251                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2252                     "%d more properties could not be set\n"), truncated);
2253 }
2254
2255 static int
2256 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2257 {
2258         dmu_replay_record_t *drr;
2259         void *buf = malloc(1<<20);
2260         char errbuf[1024];
2261
2262         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2263             "cannot receive:"));
2264
2265         /* XXX would be great to use lseek if possible... */
2266         drr = buf;
2267
2268         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2269             byteswap, NULL) == 0) {
2270                 if (byteswap)
2271                         drr->drr_type = BSWAP_32(drr->drr_type);
2272
2273                 switch (drr->drr_type) {
2274                 case DRR_BEGIN:
2275                         /* NB: not to be used on v2 stream packages */
2276                         if (drr->drr_payloadlen != 0) {
2277                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2278                                     "invalid substream header"));
2279                                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2280                         }
2281                         break;
2282
2283                 case DRR_END:
2284                         free(buf);
2285                         return (0);
2286
2287                 case DRR_OBJECT:
2288                         if (byteswap) {
2289                                 drr->drr_u.drr_object.drr_bonuslen =
2290                                     BSWAP_32(drr->drr_u.drr_object.
2291                                     drr_bonuslen);
2292                         }
2293                         (void) recv_read(hdl, fd, buf,
2294                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2295                             B_FALSE, NULL);
2296                         break;
2297
2298                 case DRR_WRITE:
2299                         if (byteswap) {
2300                                 drr->drr_u.drr_write.drr_length =
2301                                     BSWAP_64(drr->drr_u.drr_write.drr_length);
2302                         }
2303                         (void) recv_read(hdl, fd, buf,
2304                             drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2305                         break;
2306                 case DRR_SPILL:
2307                         if (byteswap) {
2308                                 drr->drr_u.drr_write.drr_length =
2309                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
2310                         }
2311                         (void) recv_read(hdl, fd, buf,
2312                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2313                         break;
2314                 case DRR_WRITE_BYREF:
2315                 case DRR_FREEOBJECTS:
2316                 case DRR_FREE:
2317                         break;
2318
2319                 default:
2320                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2321                             "invalid record type"));
2322                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2323                 }
2324         }
2325
2326         free(buf);
2327         return (-1);
2328 }
2329
2330 /*
2331  * Restores a backup of tosnap from the file descriptor specified by infd.
2332  */
2333 static int
2334 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2335     recvflags_t flags, dmu_replay_record_t *drr,
2336     dmu_replay_record_t *drr_noswap, const char *sendfs,
2337     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2338     uint64_t *action_handlep)
2339 {
2340         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2341         time_t begin_time;
2342         int ioctl_err, ioctl_errno, err;
2343         char *cp;
2344         struct drr_begin *drrb = &drr->drr_u.drr_begin;
2345         char errbuf[1024];
2346         char prop_errbuf[1024];
2347         const char *chopprefix;
2348         boolean_t newfs = B_FALSE;
2349         boolean_t stream_wantsnewfs;
2350         uint64_t parent_snapguid = 0;
2351         prop_changelist_t *clp = NULL;
2352         nvlist_t *snapprops_nvlist = NULL;
2353         zprop_errflags_t prop_errflags;
2354         boolean_t recursive;
2355
2356         begin_time = time(NULL);
2357
2358         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2359             "cannot receive"));
2360
2361         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2362             ENOENT);
2363
2364         if (stream_avl != NULL) {
2365                 char *snapname;
2366                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2367                     &snapname);
2368                 nvlist_t *props;
2369                 int ret;
2370
2371                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2372                     &parent_snapguid);
2373                 err = nvlist_lookup_nvlist(fs, "props", &props);
2374                 if (err)
2375                         VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2376
2377                 if (flags.canmountoff) {
2378                         VERIFY(0 == nvlist_add_uint64(props,
2379                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2380                 }
2381                 ret = zcmd_write_src_nvlist(hdl, &zc, props);
2382                 if (err)
2383                         nvlist_free(props);
2384
2385                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2386                         VERIFY(0 == nvlist_lookup_nvlist(props,
2387                             snapname, &snapprops_nvlist));
2388                 }
2389
2390                 if (ret != 0)
2391                         return (-1);
2392         }
2393
2394         cp = NULL;
2395
2396         /*
2397          * Determine how much of the snapshot name stored in the stream
2398          * we are going to tack on to the name they specified on the
2399          * command line, and how much we are going to chop off.
2400          *
2401          * If they specified a snapshot, chop the entire name stored in
2402          * the stream.
2403          */
2404         if (flags.istail) {
2405                 /*
2406                  * A filesystem was specified with -e. We want to tack on only
2407                  * the tail of the sent snapshot path.
2408                  */
2409                 if (strchr(tosnap, '@')) {
2410                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2411                             "argument - snapshot not allowed with -e"));
2412                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2413                 }
2414
2415                 chopprefix = strrchr(sendfs, '/');
2416
2417                 if (chopprefix == NULL) {
2418                         /*
2419                          * The tail is the poolname, so we need to
2420                          * prepend a path separator.
2421                          */
2422                         int len = strlen(drrb->drr_toname);
2423                         cp = malloc(len + 2);
2424                         cp[0] = '/';
2425                         (void) strcpy(&cp[1], drrb->drr_toname);
2426                         chopprefix = cp;
2427                 } else {
2428                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2429                 }
2430         } else if (flags.isprefix) {
2431                 /*
2432                  * A filesystem was specified with -d. We want to tack on
2433                  * everything but the first element of the sent snapshot path
2434                  * (all but the pool name).
2435                  */
2436                 if (strchr(tosnap, '@')) {
2437                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2438                             "argument - snapshot not allowed with -d"));
2439                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2440                 }
2441
2442                 chopprefix = strchr(drrb->drr_toname, '/');
2443                 if (chopprefix == NULL)
2444                         chopprefix = strchr(drrb->drr_toname, '@');
2445         } else if (strchr(tosnap, '@') == NULL) {
2446                 /*
2447                  * If a filesystem was specified without -d or -e, we want to
2448                  * tack on everything after the fs specified by 'zfs send'.
2449                  */
2450                 chopprefix = drrb->drr_toname + strlen(sendfs);
2451         } else {
2452                 /* A snapshot was specified as an exact path (no -d or -e). */
2453                 if (recursive) {
2454                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2455                             "cannot specify snapshot name for multi-snapshot "
2456                             "stream"));
2457                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2458                 }
2459                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2460         }
2461
2462         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2463         ASSERT(chopprefix > drrb->drr_toname);
2464         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2465         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2466             chopprefix[0] == '\0');
2467
2468         /*
2469          * Determine name of destination snapshot, store in zc_value.
2470          */
2471         (void) strcpy(zc.zc_top_ds, tosnap);
2472         (void) strcpy(zc.zc_value, tosnap);
2473         (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2474         free(cp);
2475         if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2476                 zcmd_free_nvlists(&zc);
2477                 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2478         }
2479
2480         /*
2481          * Determine the name of the origin snapshot, store in zc_string.
2482          */
2483         if (drrb->drr_flags & DRR_FLAG_CLONE) {
2484                 if (guid_to_name(hdl, tosnap,
2485                     drrb->drr_fromguid, zc.zc_string) != 0) {
2486                         zcmd_free_nvlists(&zc);
2487                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2488                             "local origin for clone %s does not exist"),
2489                             zc.zc_value);
2490                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2491                 }
2492                 if (flags.verbose)
2493                         (void) printf("found clone origin %s\n", zc.zc_string);
2494         }
2495
2496         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2497             (drrb->drr_flags & DRR_FLAG_CLONE));
2498
2499         if (stream_wantsnewfs) {
2500                 /*
2501                  * if the parent fs does not exist, look for it based on
2502                  * the parent snap GUID
2503                  */
2504                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2505                     "cannot receive new filesystem stream"));
2506
2507                 (void) strcpy(zc.zc_name, zc.zc_value);
2508                 cp = strrchr(zc.zc_name, '/');
2509                 if (cp)
2510                         *cp = '\0';
2511                 if (cp &&
2512                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2513                         char suffix[ZFS_MAXNAMELEN];
2514                         (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2515                         if (guid_to_name(hdl, tosnap, parent_snapguid,
2516                             zc.zc_value) == 0) {
2517                                 *strchr(zc.zc_value, '@') = '\0';
2518                                 (void) strcat(zc.zc_value, suffix);
2519                         }
2520                 }
2521         } else {
2522                 /*
2523                  * if the fs does not exist, look for it based on the
2524                  * fromsnap GUID
2525                  */
2526                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2527                     "cannot receive incremental stream"));
2528
2529                 (void) strcpy(zc.zc_name, zc.zc_value);
2530                 *strchr(zc.zc_name, '@') = '\0';
2531
2532                 /*
2533                  * If the exact receive path was specified and this is the
2534                  * topmost path in the stream, then if the fs does not exist we
2535                  * should look no further.
2536                  */
2537                 if ((flags.isprefix || (*(chopprefix = drrb->drr_toname +
2538                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2539                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2540                         char snap[ZFS_MAXNAMELEN];
2541                         (void) strcpy(snap, strchr(zc.zc_value, '@'));
2542                         if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
2543                             zc.zc_value) == 0) {
2544                                 *strchr(zc.zc_value, '@') = '\0';
2545                                 (void) strcat(zc.zc_value, snap);
2546                         }
2547                 }
2548         }
2549
2550         (void) strcpy(zc.zc_name, zc.zc_value);
2551         *strchr(zc.zc_name, '@') = '\0';
2552
2553         if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2554                 zfs_handle_t *zhp;
2555
2556                 /*
2557                  * Destination fs exists.  Therefore this should either
2558                  * be an incremental, or the stream specifies a new fs
2559                  * (full stream or clone) and they want us to blow it
2560                  * away (and have therefore specified -F and removed any
2561                  * snapshots).
2562                  */
2563                 if (stream_wantsnewfs) {
2564                         if (!flags.force) {
2565                                 zcmd_free_nvlists(&zc);
2566                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2567                                     "destination '%s' exists\n"
2568                                     "must specify -F to overwrite it"),
2569                                     zc.zc_name);
2570                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2571                         }
2572                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2573                             &zc) == 0) {
2574                                 zcmd_free_nvlists(&zc);
2575                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2576                                     "destination has snapshots (eg. %s)\n"
2577                                     "must destroy them to overwrite it"),
2578                                     zc.zc_name);
2579                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2580                         }
2581                 }
2582
2583                 if ((zhp = zfs_open(hdl, zc.zc_name,
2584                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2585                         zcmd_free_nvlists(&zc);
2586                         return (-1);
2587                 }
2588
2589                 if (stream_wantsnewfs &&
2590                     zhp->zfs_dmustats.dds_origin[0]) {
2591                         zcmd_free_nvlists(&zc);
2592                         zfs_close(zhp);
2593                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2594                             "destination '%s' is a clone\n"
2595                             "must destroy it to overwrite it"),
2596                             zc.zc_name);
2597                         return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2598                 }
2599
2600                 if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2601                     stream_wantsnewfs) {
2602                         /* We can't do online recv in this case */
2603                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2604                         if (clp == NULL) {
2605                                 zfs_close(zhp);
2606                                 zcmd_free_nvlists(&zc);
2607                                 return (-1);
2608                         }
2609                         if (changelist_prefix(clp) != 0) {
2610                                 changelist_free(clp);
2611                                 zfs_close(zhp);
2612                                 zcmd_free_nvlists(&zc);
2613                                 return (-1);
2614                         }
2615                 }
2616                 if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
2617                     zvol_remove_link(hdl, zhp->zfs_name) != 0) {
2618                         zfs_close(zhp);
2619                         zcmd_free_nvlists(&zc);
2620                         return (-1);
2621                 }
2622                 zfs_close(zhp);
2623         } else {
2624                 /*
2625                  * Destination filesystem does not exist.  Therefore we better
2626                  * be creating a new filesystem (either from a full backup, or
2627                  * a clone).  It would therefore be invalid if the user
2628                  * specified only the pool name (i.e. if the destination name
2629                  * contained no slash character).
2630                  */
2631                 if (!stream_wantsnewfs ||
2632                     (cp = strrchr(zc.zc_name, '/')) == NULL) {
2633                         zcmd_free_nvlists(&zc);
2634                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2635                             "destination '%s' does not exist"), zc.zc_name);
2636                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2637                 }
2638
2639                 /*
2640                  * Trim off the final dataset component so we perform the
2641                  * recvbackup ioctl to the filesystems's parent.
2642                  */
2643                 *cp = '\0';
2644
2645                 if (flags.isprefix && !flags.istail && !flags.dryrun &&
2646                     create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2647                         zcmd_free_nvlists(&zc);
2648                         return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2649                 }
2650
2651                 newfs = B_TRUE;
2652         }
2653
2654         zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2655         zc.zc_cookie = infd;
2656         zc.zc_guid = flags.force;
2657         if (flags.verbose) {
2658                 (void) printf("%s %s stream of %s into %s\n",
2659                     flags.dryrun ? "would receive" : "receiving",
2660                     drrb->drr_fromguid ? "incremental" : "full",
2661                     drrb->drr_toname, zc.zc_value);
2662                 (void) fflush(stdout);
2663         }
2664
2665         if (flags.dryrun) {
2666                 zcmd_free_nvlists(&zc);
2667                 return (recv_skip(hdl, infd, flags.byteswap));
2668         }
2669
2670         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2671         zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2672         zc.zc_cleanup_fd = cleanup_fd;
2673         zc.zc_action_handle = *action_handlep;
2674
2675         err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2676         ioctl_errno = errno;
2677         prop_errflags = (zprop_errflags_t)zc.zc_obj;
2678
2679         if (err == 0) {
2680                 nvlist_t *prop_errors;
2681                 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2682                     zc.zc_nvlist_dst_size, &prop_errors, 0));
2683
2684                 nvpair_t *prop_err = NULL;
2685
2686                 while ((prop_err = nvlist_next_nvpair(prop_errors,
2687                     prop_err)) != NULL) {
2688                         char tbuf[1024];
2689                         zfs_prop_t prop;
2690                         int intval;
2691
2692                         prop = zfs_name_to_prop(nvpair_name(prop_err));
2693                         (void) nvpair_value_int32(prop_err, &intval);
2694                         if (strcmp(nvpair_name(prop_err),
2695                             ZPROP_N_MORE_ERRORS) == 0) {
2696                                 trunc_prop_errs(intval);
2697                                 break;
2698                         } else {
2699                                 (void) snprintf(tbuf, sizeof (tbuf),
2700                                     dgettext(TEXT_DOMAIN,
2701                                     "cannot receive %s property on %s"),
2702                                     nvpair_name(prop_err), zc.zc_name);
2703                                 zfs_setprop_error(hdl, prop, intval, tbuf);
2704                         }
2705                 }
2706                 nvlist_free(prop_errors);
2707         }
2708
2709         zc.zc_nvlist_dst = 0;
2710         zc.zc_nvlist_dst_size = 0;
2711         zcmd_free_nvlists(&zc);
2712
2713         if (err == 0 && snapprops_nvlist) {
2714                 zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 };
2715
2716                 (void) strcpy(zc2.zc_name, zc.zc_value);
2717                 zc2.zc_cookie = B_TRUE; /* received */
2718                 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
2719                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
2720                         zcmd_free_nvlists(&zc2);
2721                 }
2722         }
2723
2724         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
2725                 /*
2726                  * It may be that this snapshot already exists,
2727                  * in which case we want to consume & ignore it
2728                  * rather than failing.
2729                  */
2730                 avl_tree_t *local_avl;
2731                 nvlist_t *local_nv, *fs;
2732                 cp = strchr(zc.zc_value, '@');
2733
2734                 /*
2735                  * XXX Do this faster by just iterating over snaps in
2736                  * this fs.  Also if zc_value does not exist, we will
2737                  * get a strange "does not exist" error message.
2738                  */
2739                 *cp = '\0';
2740                 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
2741                     &local_nv, &local_avl) == 0) {
2742                         *cp = '@';
2743                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
2744                         fsavl_destroy(local_avl);
2745                         nvlist_free(local_nv);
2746
2747                         if (fs != NULL) {
2748                                 if (flags.verbose) {
2749                                         (void) printf("snap %s already exists; "
2750                                             "ignoring\n", zc.zc_value);
2751                                 }
2752                                 err = ioctl_err = recv_skip(hdl, infd,
2753                                     flags.byteswap);
2754                         }
2755                 }
2756                 *cp = '@';
2757         }
2758
2759         if (ioctl_err != 0) {
2760                 switch (ioctl_errno) {
2761                 case ENODEV:
2762                         cp = strchr(zc.zc_value, '@');
2763                         *cp = '\0';
2764                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2765                             "most recent snapshot of %s does not\n"
2766                             "match incremental source"), zc.zc_value);
2767                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2768                         *cp = '@';
2769                         break;
2770                 case ETXTBSY:
2771                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2772                             "destination %s has been modified\n"
2773                             "since most recent snapshot"), zc.zc_name);
2774                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2775                         break;
2776                 case EEXIST:
2777                         cp = strchr(zc.zc_value, '@');
2778                         if (newfs) {
2779                                 /* it's the containing fs that exists */
2780                                 *cp = '\0';
2781                         }
2782                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2783                             "destination already exists"));
2784                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
2785                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
2786                             zc.zc_value);
2787                         *cp = '@';
2788                         break;
2789                 case EINVAL:
2790                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2791                         break;
2792                 case ECKSUM:
2793                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2794                             "invalid stream (checksum mismatch)"));
2795                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2796                         break;
2797                 case ENOTSUP:
2798                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2799                             "pool must be upgraded to receive this stream."));
2800                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
2801                         break;
2802                 case EDQUOT:
2803                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2804                             "destination %s space quota exceeded"), zc.zc_name);
2805                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2806                         break;
2807                 default:
2808                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
2809                 }
2810         }
2811
2812         /*
2813          * Mount the target filesystem (if created).  Also mount any
2814          * children of the target filesystem if we did a replication
2815          * receive (indicated by stream_avl being non-NULL).
2816          */
2817         cp = strchr(zc.zc_value, '@');
2818         if (cp && (ioctl_err == 0 || !newfs)) {
2819                 zfs_handle_t *h;
2820
2821                 *cp = '\0';
2822                 h = zfs_open(hdl, zc.zc_value,
2823                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2824                 if (h != NULL) {
2825                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
2826                                 *cp = '@';
2827                                 err = zvol_create_link(hdl, h->zfs_name);
2828                                 if (err == 0 && ioctl_err == 0)
2829                                         err = zvol_create_link(hdl,
2830                                             zc.zc_value);
2831                         } else if (newfs || stream_avl) {
2832                                 /*
2833                                  * Track the first/top of hierarchy fs,
2834                                  * for mounting and sharing later.
2835                                  */
2836                                 if (top_zfs && *top_zfs == NULL)
2837                                         *top_zfs = zfs_strdup(hdl, zc.zc_value);
2838                         }
2839                         zfs_close(h);
2840                 }
2841                 *cp = '@';
2842         }
2843
2844         if (clp) {
2845                 err |= changelist_postfix(clp);
2846                 changelist_free(clp);
2847         }
2848
2849         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
2850                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2851                     "failed to clear unreceived properties on %s"),
2852                     zc.zc_name);
2853                 (void) fprintf(stderr, "\n");
2854         }
2855         if (prop_errflags & ZPROP_ERR_NORESTORE) {
2856                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2857                     "failed to restore original properties on %s"),
2858                     zc.zc_name);
2859                 (void) fprintf(stderr, "\n");
2860         }
2861
2862         if (err || ioctl_err)
2863                 return (-1);
2864
2865         *action_handlep = zc.zc_action_handle;
2866
2867         if (flags.verbose) {
2868                 char buf1[64];
2869                 char buf2[64];
2870                 uint64_t bytes = zc.zc_cookie;
2871                 time_t delta = time(NULL) - begin_time;
2872                 if (delta == 0)
2873                         delta = 1;
2874                 zfs_nicenum(bytes, buf1, sizeof (buf1));
2875                 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2876
2877                 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2878                     buf1, delta, buf2);
2879         }
2880
2881         return (0);
2882 }
2883
2884 static int
2885 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2886     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2887     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2888 {
2889         int err;
2890         dmu_replay_record_t drr, drr_noswap;
2891         struct drr_begin *drrb = &drr.drr_u.drr_begin;
2892         char errbuf[1024];
2893         zio_cksum_t zcksum = { { 0 } };
2894         uint64_t featureflags;
2895         int hdrtype;
2896
2897         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2898             "cannot receive"));
2899
2900         if (flags.isprefix &&
2901             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2902                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2903                     "(%s) does not exist"), tosnap);
2904                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2905         }
2906
2907         /* read in the BEGIN record */
2908         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2909             &zcksum)))
2910                 return (err);
2911
2912         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2913                 /* It's the double end record at the end of a package */
2914                 return (ENODATA);
2915         }
2916
2917         /* the kernel needs the non-byteswapped begin record */
2918         drr_noswap = drr;
2919
2920         flags.byteswap = B_FALSE;
2921         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2922                 /*
2923                  * We computed the checksum in the wrong byteorder in
2924                  * recv_read() above; do it again correctly.
2925                  */
2926                 bzero(&zcksum, sizeof (zio_cksum_t));
2927                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2928                 flags.byteswap = B_TRUE;
2929
2930                 drr.drr_type = BSWAP_32(drr.drr_type);
2931                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2932                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2933                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
2934                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2935                 drrb->drr_type = BSWAP_32(drrb->drr_type);
2936                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2937                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2938                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2939         }
2940
2941         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2942                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2943                     "stream (bad magic number)"));
2944                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2945         }
2946
2947         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
2948         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
2949
2950         if (!DMU_STREAM_SUPPORTED(featureflags) ||
2951             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
2952                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2953                     "stream has unsupported feature, feature flags = %lx"),
2954                     featureflags);
2955                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2956         }
2957
2958         if (strchr(drrb->drr_toname, '@') == NULL) {
2959                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2960                     "stream (bad snapshot name)"));
2961                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2962         }
2963
2964         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
2965                 char nonpackage_sendfs[ZFS_MAXNAMELEN];
2966                 if (sendfs == NULL) {
2967                         /*
2968                          * We were not called from zfs_receive_package(). Get
2969                          * the fs specified by 'zfs send'.
2970                          */
2971                         char *cp;
2972                         (void) strlcpy(nonpackage_sendfs,
2973                             drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
2974                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
2975                                 *cp = '\0';
2976                         sendfs = nonpackage_sendfs;
2977                 }
2978                 return (zfs_receive_one(hdl, infd, tosnap, flags,
2979                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
2980                     top_zfs, cleanup_fd, action_handlep));
2981         } else {
2982                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
2983                     DMU_COMPOUNDSTREAM);
2984                 return (zfs_receive_package(hdl, infd, tosnap, flags,
2985                     &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
2986         }
2987 }
2988
2989 /*
2990  * Restores a backup of tosnap from the file descriptor specified by infd.
2991  * Return 0 on total success, -2 if some things couldn't be
2992  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2993  * (-1 will override -2).
2994  */
2995 int
2996 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2997     int infd, avl_tree_t *stream_avl)
2998 {
2999         char *top_zfs = NULL;
3000         int err;
3001         int cleanup_fd;
3002         uint64_t action_handle = 0;
3003
3004         cleanup_fd = open(ZFS_DEV, O_RDWR);
3005         VERIFY(cleanup_fd >= 0);
3006
3007         err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3008             stream_avl, &top_zfs, cleanup_fd, &action_handle);
3009
3010         VERIFY(0 == close(cleanup_fd));
3011
3012         if (err == 0 && !flags.nomount && top_zfs) {
3013                 zfs_handle_t *zhp;
3014                 prop_changelist_t *clp;
3015
3016                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3017                 if (zhp != NULL) {
3018                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3019                             CL_GATHER_MOUNT_ALWAYS, 0);
3020                         zfs_close(zhp);
3021                         if (clp != NULL) {
3022                                 /* mount and share received datasets */
3023                                 err = changelist_postfix(clp);
3024                                 changelist_free(clp);
3025                         }
3026                 }
3027                 if (zhp == NULL || clp == NULL || err)
3028                         err = -1;
3029         }
3030         if (top_zfs)
3031                 free(top_zfs);
3032
3033         return (err);
3034 }