Add linux sha2 support
[zfs.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <assert.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <libintl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <strings.h>
33 #include <unistd.h>
34 #include <stddef.h>
35 #include <fcntl.h>
36 #include <sys/mount.h>
37 #include <pthread.h>
38 #include <umem.h>
39
40 #include <libzfs.h>
41
42 #include "zfs_namecheck.h"
43 #include "zfs_prop.h"
44 #include "zfs_fletcher.h"
45 #include "libzfs_impl.h"
46 #include <sys/zio_checksum.h>
47 #include <sys/ddt.h>
48
49 /* in libzfs_dataset.c */
50 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
51
52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
53     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
54
55 static const zio_cksum_t zero_cksum = { { 0 } };
56
57 typedef struct dedup_arg {
58         int     inputfd;
59         int     outputfd;
60         libzfs_handle_t  *dedup_hdl;
61 } dedup_arg_t;
62
63 typedef struct dataref {
64         uint64_t ref_guid;
65         uint64_t ref_object;
66         uint64_t ref_offset;
67 } dataref_t;
68
69 typedef struct dedup_entry {
70         struct dedup_entry      *dde_next;
71         zio_cksum_t dde_chksum;
72         uint64_t dde_prop;
73         dataref_t dde_ref;
74 } dedup_entry_t;
75
76 #define MAX_DDT_PHYSMEM_PERCENT         20
77 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
78
79 typedef struct dedup_table {
80         dedup_entry_t   **dedup_hash_array;
81         umem_cache_t    *ddecache;
82         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
83         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
84         uint64_t        ddt_count;
85         int             numhashbits;
86         boolean_t       ddt_full;
87 } dedup_table_t;
88
89 static int
90 high_order_bit(uint64_t n)
91 {
92         int count;
93
94         for (count = 0; n != 0; count++)
95                 n >>= 1;
96         return (count);
97 }
98
99 static size_t
100 ssread(void *buf, size_t len, FILE *stream)
101 {
102         size_t outlen;
103
104         if ((outlen = fread(buf, len, 1, stream)) == 0)
105                 return (0);
106
107         return (outlen);
108 }
109
110 static void
111 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
112     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
113 {
114         dedup_entry_t   *dde;
115
116         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
117                 if (ddt->ddt_full == B_FALSE) {
118                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
119                             "Dedup table full.  Deduplication will continue "
120                             "with existing table entries"));
121                         ddt->ddt_full = B_TRUE;
122                 }
123                 return;
124         }
125
126         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
127             != NULL) {
128                 assert(*ddepp == NULL);
129                 dde->dde_next = NULL;
130                 dde->dde_chksum = *cs;
131                 dde->dde_prop = prop;
132                 dde->dde_ref = *dr;
133                 *ddepp = dde;
134                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
135                 ddt->ddt_count++;
136         }
137 }
138
139 /*
140  * Using the specified dedup table, do a lookup for an entry with
141  * the checksum cs.  If found, return the block's reference info
142  * in *dr. Otherwise, insert a new entry in the dedup table, using
143  * the reference information specified by *dr.
144  *
145  * return value:  true - entry was found
146  *                false - entry was not found
147  */
148 static boolean_t
149 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
150     uint64_t prop, dataref_t *dr)
151 {
152         uint32_t hashcode;
153         dedup_entry_t **ddepp;
154
155         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
156
157         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
158             ddepp = &((*ddepp)->dde_next)) {
159                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
160                     (*ddepp)->dde_prop == prop) {
161                         *dr = (*ddepp)->dde_ref;
162                         return (B_TRUE);
163                 }
164         }
165         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
166         return (B_FALSE);
167 }
168
169 static int
170 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
171 {
172         fletcher_4_incremental_native(buf, len, zc);
173         return (write(outfd, buf, len));
174 }
175
176 /*
177  * This function is started in a separate thread when the dedup option
178  * has been requested.  The main send thread determines the list of
179  * snapshots to be included in the send stream and makes the ioctl calls
180  * for each one.  But instead of having the ioctl send the output to the
181  * the output fd specified by the caller of zfs_send()), the
182  * ioctl is told to direct the output to a pipe, which is read by the
183  * alternate thread running THIS function.  This function does the
184  * dedup'ing by:
185  *  1. building a dedup table (the DDT)
186  *  2. doing checksums on each data block and inserting a record in the DDT
187  *  3. looking for matching checksums, and
188  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
189  *      a duplicate block is found.
190  * The output of this function then goes to the output fd requested
191  * by the caller of zfs_send().
192  */
193 static void *
194 cksummer(void *arg)
195 {
196         dedup_arg_t *dda = arg;
197         char *buf = malloc(1<<20);
198         dmu_replay_record_t thedrr;
199         dmu_replay_record_t *drr = &thedrr;
200         struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
201         struct drr_end *drre = &thedrr.drr_u.drr_end;
202         struct drr_object *drro = &thedrr.drr_u.drr_object;
203         struct drr_write *drrw = &thedrr.drr_u.drr_write;
204         struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
205         FILE *ofp;
206         int outfd;
207         dmu_replay_record_t wbr_drr = {0};
208         struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
209         dedup_table_t ddt;
210         zio_cksum_t stream_cksum;
211         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
212         uint64_t numbuckets;
213
214         ddt.max_ddt_size =
215             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
216             SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
217
218         numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
219
220         /*
221          * numbuckets must be a power of 2.  Increase number to
222          * a power of 2 if necessary.
223          */
224         if (!ISP2(numbuckets))
225                 numbuckets = 1 << high_order_bit(numbuckets);
226
227         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
228         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
229             NULL, NULL, NULL, NULL, NULL, 0);
230         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
231         ddt.numhashbits = high_order_bit(numbuckets) - 1;
232         ddt.ddt_full = B_FALSE;
233
234         /* Initialize the write-by-reference block. */
235         wbr_drr.drr_type = DRR_WRITE_BYREF;
236         wbr_drr.drr_payloadlen = 0;
237
238         outfd = dda->outputfd;
239         ofp = fdopen(dda->inputfd, "r");
240         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
241
242                 switch (drr->drr_type) {
243                 case DRR_BEGIN:
244                 {
245                         int     fflags;
246                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
247
248                         /* set the DEDUP feature flag for this stream */
249                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
250                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
251                             DMU_BACKUP_FEATURE_DEDUPPROPS);
252                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
253
254                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
255                             &stream_cksum, outfd) == -1)
256                                 goto out;
257                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
258                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
259                                 int sz = drr->drr_payloadlen;
260
261                                 if (sz > 1<<20) {
262                                         free(buf);
263                                         buf = malloc(sz);
264                                 }
265                                 (void) ssread(buf, sz, ofp);
266                                 if (ferror(stdin))
267                                         perror("fread");
268                                 if (cksum_and_write(buf, sz, &stream_cksum,
269                                     outfd) == -1)
270                                         goto out;
271                         }
272                         break;
273                 }
274
275                 case DRR_END:
276                 {
277                         /* use the recalculated checksum */
278                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
279                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
280                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
281                         if ((write(outfd, drr,
282                             sizeof (dmu_replay_record_t))) == -1)
283                                 goto out;
284                         break;
285                 }
286
287                 case DRR_OBJECT:
288                 {
289                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
290                             &stream_cksum, outfd) == -1)
291                                 goto out;
292                         if (drro->drr_bonuslen > 0) {
293                                 (void) ssread(buf,
294                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
295                                     ofp);
296                                 if (cksum_and_write(buf,
297                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
298                                     &stream_cksum, outfd) == -1)
299                                         goto out;
300                         }
301                         break;
302                 }
303
304                 case DRR_SPILL:
305                 {
306                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
307                             &stream_cksum, outfd) == -1)
308                                 goto out;
309                         (void) ssread(buf, drrs->drr_length, ofp);
310                         if (cksum_and_write(buf, drrs->drr_length,
311                             &stream_cksum, outfd) == -1)
312                                 goto out;
313                         break;
314                 }
315
316                 case DRR_FREEOBJECTS:
317                 {
318                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
319                             &stream_cksum, outfd) == -1)
320                                 goto out;
321                         break;
322                 }
323
324                 case DRR_WRITE:
325                 {
326                         dataref_t       dataref;
327
328                         (void) ssread(buf, drrw->drr_length, ofp);
329
330                         /*
331                          * Use the existing checksum if it's dedup-capable,
332                          * else calculate a SHA256 checksum for it.
333                          */
334
335                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
336                             zero_cksum) ||
337                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
338                                 zio_cksum_t tmpsha256;
339
340                                 zio_checksum_SHA256(buf,
341                                     drrw->drr_length, &tmpsha256);
342
343                                 drrw->drr_key.ddk_cksum.zc_word[0] =
344                                     BE_64(tmpsha256.zc_word[0]);
345                                 drrw->drr_key.ddk_cksum.zc_word[1] =
346                                     BE_64(tmpsha256.zc_word[1]);
347                                 drrw->drr_key.ddk_cksum.zc_word[2] =
348                                     BE_64(tmpsha256.zc_word[2]);
349                                 drrw->drr_key.ddk_cksum.zc_word[3] =
350                                     BE_64(tmpsha256.zc_word[3]);
351                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
352                                 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
353                         }
354
355                         dataref.ref_guid = drrw->drr_toguid;
356                         dataref.ref_object = drrw->drr_object;
357                         dataref.ref_offset = drrw->drr_offset;
358
359                         if (ddt_update(dda->dedup_hdl, &ddt,
360                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
361                             &dataref)) {
362                                 /* block already present in stream */
363                                 wbr_drrr->drr_object = drrw->drr_object;
364                                 wbr_drrr->drr_offset = drrw->drr_offset;
365                                 wbr_drrr->drr_length = drrw->drr_length;
366                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
367                                 wbr_drrr->drr_refguid = dataref.ref_guid;
368                                 wbr_drrr->drr_refobject =
369                                     dataref.ref_object;
370                                 wbr_drrr->drr_refoffset =
371                                     dataref.ref_offset;
372
373                                 wbr_drrr->drr_checksumtype =
374                                     drrw->drr_checksumtype;
375                                 wbr_drrr->drr_checksumflags =
376                                     drrw->drr_checksumtype;
377                                 wbr_drrr->drr_key.ddk_cksum =
378                                     drrw->drr_key.ddk_cksum;
379                                 wbr_drrr->drr_key.ddk_prop =
380                                     drrw->drr_key.ddk_prop;
381
382                                 if (cksum_and_write(&wbr_drr,
383                                     sizeof (dmu_replay_record_t), &stream_cksum,
384                                     outfd) == -1)
385                                         goto out;
386                         } else {
387                                 /* block not previously seen */
388                                 if (cksum_and_write(drr,
389                                     sizeof (dmu_replay_record_t), &stream_cksum,
390                                     outfd) == -1)
391                                         goto out;
392                                 if (cksum_and_write(buf,
393                                     drrw->drr_length,
394                                     &stream_cksum, outfd) == -1)
395                                         goto out;
396                         }
397                         break;
398                 }
399
400                 case DRR_FREE:
401                 {
402                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
403                             &stream_cksum, outfd) == -1)
404                                 goto out;
405                         break;
406                 }
407
408                 default:
409                         (void) printf("INVALID record type 0x%x\n",
410                             drr->drr_type);
411                         /* should never happen, so assert */
412                         assert(B_FALSE);
413                 }
414         }
415 out:
416         umem_cache_destroy(ddt.ddecache);
417         free(ddt.dedup_hash_array);
418         free(buf);
419         (void) fclose(ofp);
420
421         return (NULL);
422 }
423
424 /*
425  * Routines for dealing with the AVL tree of fs-nvlists
426  */
427 typedef struct fsavl_node {
428         avl_node_t fn_node;
429         nvlist_t *fn_nvfs;
430         char *fn_snapname;
431         uint64_t fn_guid;
432 } fsavl_node_t;
433
434 static int
435 fsavl_compare(const void *arg1, const void *arg2)
436 {
437         const fsavl_node_t *fn1 = arg1;
438         const fsavl_node_t *fn2 = arg2;
439
440         if (fn1->fn_guid > fn2->fn_guid)
441                 return (+1);
442         else if (fn1->fn_guid < fn2->fn_guid)
443                 return (-1);
444         else
445                 return (0);
446 }
447
448 /*
449  * Given the GUID of a snapshot, find its containing filesystem and
450  * (optionally) name.
451  */
452 static nvlist_t *
453 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
454 {
455         fsavl_node_t fn_find;
456         fsavl_node_t *fn;
457
458         fn_find.fn_guid = snapguid;
459
460         fn = avl_find(avl, &fn_find, NULL);
461         if (fn) {
462                 if (snapname)
463                         *snapname = fn->fn_snapname;
464                 return (fn->fn_nvfs);
465         }
466         return (NULL);
467 }
468
469 static void
470 fsavl_destroy(avl_tree_t *avl)
471 {
472         fsavl_node_t *fn;
473         void *cookie;
474
475         if (avl == NULL)
476                 return;
477
478         cookie = NULL;
479         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
480                 free(fn);
481         avl_destroy(avl);
482         free(avl);
483 }
484
485 /*
486  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
487  */
488 static avl_tree_t *
489 fsavl_create(nvlist_t *fss)
490 {
491         avl_tree_t *fsavl;
492         nvpair_t *fselem = NULL;
493
494         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
495                 return (NULL);
496
497         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
498             offsetof(fsavl_node_t, fn_node));
499
500         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
501                 nvlist_t *nvfs, *snaps;
502                 nvpair_t *snapelem = NULL;
503
504                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
505                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
506
507                 while ((snapelem =
508                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
509                         fsavl_node_t *fn;
510                         uint64_t guid;
511
512                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
513                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
514                                 fsavl_destroy(fsavl);
515                                 return (NULL);
516                         }
517                         fn->fn_nvfs = nvfs;
518                         fn->fn_snapname = nvpair_name(snapelem);
519                         fn->fn_guid = guid;
520
521                         /*
522                          * Note: if there are multiple snaps with the
523                          * same GUID, we ignore all but one.
524                          */
525                         if (avl_find(fsavl, fn, NULL) == NULL)
526                                 avl_add(fsavl, fn);
527                         else
528                                 free(fn);
529                 }
530         }
531
532         return (fsavl);
533 }
534
535 /*
536  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
537  */
538 typedef struct send_data {
539         uint64_t parent_fromsnap_guid;
540         nvlist_t *parent_snaps;
541         nvlist_t *fss;
542         nvlist_t *snapprops;
543         const char *fromsnap;
544         const char *tosnap;
545         boolean_t recursive;
546
547         /*
548          * The header nvlist is of the following format:
549          * {
550          *   "tosnap" -> string
551          *   "fromsnap" -> string (if incremental)
552          *   "fss" -> {
553          *      id -> {
554          *
555          *       "name" -> string (full name; for debugging)
556          *       "parentfromsnap" -> number (guid of fromsnap in parent)
557          *
558          *       "props" -> { name -> value (only if set here) }
559          *       "snaps" -> { name (lastname) -> number (guid) }
560          *       "snapprops" -> { name (lastname) -> { name -> value } }
561          *
562          *       "origin" -> number (guid) (if clone)
563          *       "sent" -> boolean (not on-disk)
564          *      }
565          *   }
566          * }
567          *
568          */
569 } send_data_t;
570
571 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
572
573 static int
574 send_iterate_snap(zfs_handle_t *zhp, void *arg)
575 {
576         send_data_t *sd = arg;
577         uint64_t guid = zhp->zfs_dmustats.dds_guid;
578         char *snapname;
579         nvlist_t *nv;
580
581         snapname = strrchr(zhp->zfs_name, '@')+1;
582
583         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
584         /*
585          * NB: if there is no fromsnap here (it's a newly created fs in
586          * an incremental replication), we will substitute the tosnap.
587          */
588         if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
589             (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
590             strcmp(snapname, sd->tosnap) == 0)) {
591                 sd->parent_fromsnap_guid = guid;
592         }
593
594         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
595         send_iterate_prop(zhp, nv);
596         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
597         nvlist_free(nv);
598
599         zfs_close(zhp);
600         return (0);
601 }
602
603 static void
604 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
605 {
606         nvpair_t *elem = NULL;
607
608         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
609                 char *propname = nvpair_name(elem);
610                 zfs_prop_t prop = zfs_name_to_prop(propname);
611                 nvlist_t *propnv;
612
613                 if (!zfs_prop_user(propname)) {
614                         /*
615                          * Realistically, this should never happen.  However,
616                          * we want the ability to add DSL properties without
617                          * needing to make incompatible version changes.  We
618                          * need to ignore unknown properties to allow older
619                          * software to still send datasets containing these
620                          * properties, with the unknown properties elided.
621                          */
622                         if (prop == ZPROP_INVAL)
623                                 continue;
624
625                         if (zfs_prop_readonly(prop))
626                                 continue;
627                 }
628
629                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
630                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
631                     prop == ZFS_PROP_REFQUOTA ||
632                     prop == ZFS_PROP_REFRESERVATION) {
633                         char *source;
634                         uint64_t value;
635                         verify(nvlist_lookup_uint64(propnv,
636                             ZPROP_VALUE, &value) == 0);
637                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
638                                 continue;
639                         /*
640                          * May have no source before SPA_VERSION_RECVD_PROPS,
641                          * but is still modifiable.
642                          */
643                         if (nvlist_lookup_string(propnv,
644                             ZPROP_SOURCE, &source) == 0) {
645                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
646                                     (strcmp(source,
647                                     ZPROP_SOURCE_VAL_RECVD) != 0))
648                                         continue;
649                         }
650                 } else {
651                         char *source;
652                         if (nvlist_lookup_string(propnv,
653                             ZPROP_SOURCE, &source) != 0)
654                                 continue;
655                         if ((strcmp(source, zhp->zfs_name) != 0) &&
656                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
657                                 continue;
658                 }
659
660                 if (zfs_prop_user(propname) ||
661                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
662                         char *value;
663                         verify(nvlist_lookup_string(propnv,
664                             ZPROP_VALUE, &value) == 0);
665                         VERIFY(0 == nvlist_add_string(nv, propname, value));
666                 } else {
667                         uint64_t value;
668                         verify(nvlist_lookup_uint64(propnv,
669                             ZPROP_VALUE, &value) == 0);
670                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
671                 }
672         }
673 }
674
675 /*
676  * recursively generate nvlists describing datasets.  See comment
677  * for the data structure send_data_t above for description of contents
678  * of the nvlist.
679  */
680 static int
681 send_iterate_fs(zfs_handle_t *zhp, void *arg)
682 {
683         send_data_t *sd = arg;
684         nvlist_t *nvfs, *nv;
685         int rv = 0;
686         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
687         uint64_t guid = zhp->zfs_dmustats.dds_guid;
688         char guidstring[64];
689
690         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
691         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
692         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
693             sd->parent_fromsnap_guid));
694
695         if (zhp->zfs_dmustats.dds_origin[0]) {
696                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
697                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
698                 if (origin == NULL)
699                         return (-1);
700                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
701                     origin->zfs_dmustats.dds_guid));
702         }
703
704         /* iterate over props */
705         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
706         send_iterate_prop(zhp, nv);
707         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
708         nvlist_free(nv);
709
710         /* iterate over snaps, and set sd->parent_fromsnap_guid */
711         sd->parent_fromsnap_guid = 0;
712         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
713         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
714         (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
715         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
716         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
717         nvlist_free(sd->parent_snaps);
718         nvlist_free(sd->snapprops);
719
720         /* add this fs to nvlist */
721         (void) snprintf(guidstring, sizeof (guidstring),
722             "0x%llx", (longlong_t)guid);
723         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
724         nvlist_free(nvfs);
725
726         /* iterate over children */
727         if (sd->recursive)
728                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
729
730         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
731
732         zfs_close(zhp);
733         return (rv);
734 }
735
736 static int
737 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
738     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
739 {
740         zfs_handle_t *zhp;
741         send_data_t sd = { 0 };
742         int error;
743
744         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
745         if (zhp == NULL)
746                 return (EZFS_BADTYPE);
747
748         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
749         sd.fromsnap = fromsnap;
750         sd.tosnap = tosnap;
751         sd.recursive = recursive;
752
753         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
754                 nvlist_free(sd.fss);
755                 if (avlp != NULL)
756                         *avlp = NULL;
757                 *nvlp = NULL;
758                 return (error);
759         }
760
761         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
762                 nvlist_free(sd.fss);
763                 *nvlp = NULL;
764                 return (EZFS_NOMEM);
765         }
766
767         *nvlp = sd.fss;
768         return (0);
769 }
770
771 /*
772  * Routines for dealing with the sorted snapshot functionality
773  */
774 typedef struct zfs_node {
775         zfs_handle_t    *zn_handle;
776         avl_node_t      zn_avlnode;
777 } zfs_node_t;
778
779 static int
780 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
781 {
782         avl_tree_t *avl = data;
783         zfs_node_t *node;
784         zfs_node_t search;
785
786         search.zn_handle = zhp;
787         node = avl_find(avl, &search, NULL);
788         if (node) {
789                 /*
790                  * If this snapshot was renamed while we were creating the
791                  * AVL tree, it's possible that we already inserted it under
792                  * its old name. Remove the old handle before adding the new
793                  * one.
794                  */
795                 zfs_close(node->zn_handle);
796                 avl_remove(avl, node);
797                 free(node);
798         }
799
800         node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
801         node->zn_handle = zhp;
802         avl_add(avl, node);
803
804         return (0);
805 }
806
807 static int
808 zfs_snapshot_compare(const void *larg, const void *rarg)
809 {
810         zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
811         zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
812         uint64_t lcreate, rcreate;
813
814         /*
815          * Sort them according to creation time.  We use the hidden
816          * CREATETXG property to get an absolute ordering of snapshots.
817          */
818         lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
819         rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
820
821         if (lcreate < rcreate)
822                 return (-1);
823         else if (lcreate > rcreate)
824                 return (+1);
825         else
826                 return (0);
827 }
828
829 int
830 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
831 {
832         int ret = 0;
833         zfs_node_t *node;
834         avl_tree_t avl;
835         void *cookie = NULL;
836
837         avl_create(&avl, zfs_snapshot_compare,
838             sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
839
840         ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
841
842         for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
843                 ret |= callback(node->zn_handle, data);
844
845         while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
846                 free(node);
847
848         avl_destroy(&avl);
849
850         return (ret);
851 }
852
853 /*
854  * Routines specific to "zfs send"
855  */
856 typedef struct send_dump_data {
857         /* these are all just the short snapname (the part after the @) */
858         const char *fromsnap;
859         const char *tosnap;
860         char prevsnap[ZFS_MAXNAMELEN];
861         uint64_t prevsnap_obj;
862         boolean_t seenfrom, seento, replicate, doall, fromorigin;
863         boolean_t verbose;
864         int outfd;
865         boolean_t err;
866         nvlist_t *fss;
867         avl_tree_t *fsavl;
868         snapfilter_cb_t *filter_cb;
869         void *filter_cb_arg;
870         nvlist_t *debugnv;
871         char holdtag[ZFS_MAXNAMELEN];
872         int cleanup_fd;
873 } send_dump_data_t;
874
875 /*
876  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
877  * NULL) to the file descriptor specified by outfd.
878  */
879 static int
880 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
881     boolean_t fromorigin, int outfd, nvlist_t *debugnv)
882 {
883         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
884         libzfs_handle_t *hdl = zhp->zfs_hdl;
885         nvlist_t *thisdbg;
886
887         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
888         assert(fromsnap_obj == 0 || !fromorigin);
889
890         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
891         zc.zc_cookie = outfd;
892         zc.zc_obj = fromorigin;
893         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
894         zc.zc_fromobj = fromsnap_obj;
895
896         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
897         if (fromsnap && fromsnap[0] != '\0') {
898                 VERIFY(0 == nvlist_add_string(thisdbg,
899                     "fromsnap", fromsnap));
900         }
901
902         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
903                 char errbuf[1024];
904                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
905                     "warning: cannot send '%s'"), zhp->zfs_name);
906
907                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
908                 if (debugnv) {
909                         VERIFY(0 == nvlist_add_nvlist(debugnv,
910                             zhp->zfs_name, thisdbg));
911                 }
912                 nvlist_free(thisdbg);
913
914                 switch (errno) {
915
916                 case EXDEV:
917                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
918                             "not an earlier snapshot from the same fs"));
919                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
920
921                 case ENOENT:
922                         if (zfs_dataset_exists(hdl, zc.zc_name,
923                             ZFS_TYPE_SNAPSHOT)) {
924                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
925                                     "incremental source (@%s) does not exist"),
926                                     zc.zc_value);
927                         }
928                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
929
930                 case EDQUOT:
931                 case EFBIG:
932                 case EIO:
933                 case ENOLINK:
934                 case ENOSPC:
935                 case ENOSTR:
936                 case ENXIO:
937                 case EPIPE:
938                 case ERANGE:
939                 case EFAULT:
940                 case EROFS:
941                         zfs_error_aux(hdl, strerror(errno));
942                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
943
944                 default:
945                         return (zfs_standard_error(hdl, errno, errbuf));
946                 }
947         }
948
949         if (debugnv)
950                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
951         nvlist_free(thisdbg);
952
953         return (0);
954 }
955
956 static int
957 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
958 {
959         zfs_handle_t *pzhp;
960         int error = 0;
961         char *thissnap;
962
963         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
964
965         /*
966          * zfs_send() only opens a cleanup_fd for sends that need it,
967          * e.g. replication and doall.
968          */
969         if (sdd->cleanup_fd == -1)
970                 return (0);
971
972         thissnap = strchr(zhp->zfs_name, '@') + 1;
973         *(thissnap - 1) = '\0';
974         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
975         *(thissnap - 1) = '@';
976
977         /*
978          * It's OK if the parent no longer exists.  The send code will
979          * handle that error.
980          */
981         if (pzhp) {
982                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
983                     B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
984                     zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
985                     zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
986                 zfs_close(pzhp);
987         }
988
989         return (error);
990 }
991
992 static int
993 dump_snapshot(zfs_handle_t *zhp, void *arg)
994 {
995         send_dump_data_t *sdd = arg;
996         char *thissnap;
997         int err;
998         boolean_t isfromsnap, istosnap;
999         boolean_t exclude = B_FALSE;
1000
1001         thissnap = strchr(zhp->zfs_name, '@') + 1;
1002         isfromsnap = (sdd->fromsnap != NULL &&
1003             strcmp(sdd->fromsnap, thissnap) == 0);
1004
1005         if (!sdd->seenfrom && isfromsnap) {
1006                 err = hold_for_send(zhp, sdd);
1007                 if (err == 0) {
1008                         sdd->seenfrom = B_TRUE;
1009                         (void) strcpy(sdd->prevsnap, thissnap);
1010                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1011                             ZFS_PROP_OBJSETID);
1012                 } else if (err == ENOENT) {
1013                         err = 0;
1014                 }
1015                 zfs_close(zhp);
1016                 return (err);
1017         }
1018
1019         if (sdd->seento || !sdd->seenfrom) {
1020                 zfs_close(zhp);
1021                 return (0);
1022         }
1023
1024         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1025         if (istosnap)
1026                 sdd->seento = B_TRUE;
1027
1028         if (!sdd->doall && !isfromsnap && !istosnap) {
1029                 if (sdd->replicate) {
1030                         char *snapname;
1031                         nvlist_t *snapprops;
1032                         /*
1033                          * Filter out all intermediate snapshots except origin
1034                          * snapshots needed to replicate clones.
1035                          */
1036                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1037                             zhp->zfs_dmustats.dds_guid, &snapname);
1038
1039                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1040                             "snapprops", &snapprops));
1041                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1042                             thissnap, &snapprops));
1043                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1044                 } else {
1045                         exclude = B_TRUE;
1046                 }
1047         }
1048
1049         /*
1050          * If a filter function exists, call it to determine whether
1051          * this snapshot will be sent.
1052          */
1053         if (exclude || (sdd->filter_cb != NULL &&
1054             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1055                 /*
1056                  * This snapshot is filtered out.  Don't send it, and don't
1057                  * set prevsnap_obj, so it will be as if this snapshot didn't
1058                  * exist, and the next accepted snapshot will be sent as
1059                  * an incremental from the last accepted one, or as the
1060                  * first (and full) snapshot in the case of a replication,
1061                  * non-incremental send.
1062                  */
1063                 zfs_close(zhp);
1064                 return (0);
1065         }
1066
1067         err = hold_for_send(zhp, sdd);
1068         if (err) {
1069                 if (err == ENOENT)
1070                         err = 0;
1071                 zfs_close(zhp);
1072                 return (err);
1073         }
1074
1075         /* send it */
1076         if (sdd->verbose) {
1077                 (void) fprintf(stderr, "sending from @%s to %s\n",
1078                     sdd->prevsnap, zhp->zfs_name);
1079         }
1080
1081         err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1082             sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
1083             sdd->outfd, sdd->debugnv);
1084
1085         (void) strcpy(sdd->prevsnap, thissnap);
1086         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1087         zfs_close(zhp);
1088         return (err);
1089 }
1090
1091 static int
1092 dump_filesystem(zfs_handle_t *zhp, void *arg)
1093 {
1094         int rv = 0;
1095         send_dump_data_t *sdd = arg;
1096         boolean_t missingfrom = B_FALSE;
1097         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1098
1099         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1100             zhp->zfs_name, sdd->tosnap);
1101         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1102                 (void) fprintf(stderr, "WARNING: "
1103                     "could not send %s@%s: does not exist\n",
1104                     zhp->zfs_name, sdd->tosnap);
1105                 sdd->err = B_TRUE;
1106                 return (0);
1107         }
1108
1109         if (sdd->replicate && sdd->fromsnap) {
1110                 /*
1111                  * If this fs does not have fromsnap, and we're doing
1112                  * recursive, we need to send a full stream from the
1113                  * beginning (or an incremental from the origin if this
1114                  * is a clone).  If we're doing non-recursive, then let
1115                  * them get the error.
1116                  */
1117                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1118                     zhp->zfs_name, sdd->fromsnap);
1119                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1120                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1121                         missingfrom = B_TRUE;
1122                 }
1123         }
1124
1125         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1126         sdd->prevsnap_obj = 0;
1127         if (sdd->fromsnap == NULL || missingfrom)
1128                 sdd->seenfrom = B_TRUE;
1129
1130         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1131         if (!sdd->seenfrom) {
1132                 (void) fprintf(stderr,
1133                     "WARNING: could not send %s@%s:\n"
1134                     "incremental source (%s@%s) does not exist\n",
1135                     zhp->zfs_name, sdd->tosnap,
1136                     zhp->zfs_name, sdd->fromsnap);
1137                 sdd->err = B_TRUE;
1138         } else if (!sdd->seento) {
1139                 if (sdd->fromsnap) {
1140                         (void) fprintf(stderr,
1141                             "WARNING: could not send %s@%s:\n"
1142                             "incremental source (%s@%s) "
1143                             "is not earlier than it\n",
1144                             zhp->zfs_name, sdd->tosnap,
1145                             zhp->zfs_name, sdd->fromsnap);
1146                 } else {
1147                         (void) fprintf(stderr, "WARNING: "
1148                             "could not send %s@%s: does not exist\n",
1149                             zhp->zfs_name, sdd->tosnap);
1150                 }
1151                 sdd->err = B_TRUE;
1152         }
1153
1154         return (rv);
1155 }
1156
1157 static int
1158 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1159 {
1160         send_dump_data_t *sdd = arg;
1161         nvpair_t *fspair;
1162         boolean_t needagain, progress;
1163
1164         if (!sdd->replicate)
1165                 return (dump_filesystem(rzhp, sdd));
1166
1167         /* Mark the clone origin snapshots. */
1168         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1169             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1170                 nvlist_t *nvfs;
1171                 uint64_t origin_guid = 0;
1172
1173                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1174                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1175                 if (origin_guid != 0) {
1176                         char *snapname;
1177                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1178                             origin_guid, &snapname);
1179                         if (origin_nv != NULL) {
1180                                 nvlist_t *snapprops;
1181                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1182                                     "snapprops", &snapprops));
1183                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1184                                     snapname, &snapprops));
1185                                 VERIFY(0 == nvlist_add_boolean(
1186                                     snapprops, "is_clone_origin"));
1187                         }
1188                 }
1189         }
1190 again:
1191         needagain = progress = B_FALSE;
1192         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1193             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1194                 nvlist_t *fslist;
1195                 char *fsname;
1196                 zfs_handle_t *zhp;
1197                 int err;
1198                 uint64_t origin_guid = 0;
1199
1200                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1201                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1202                         continue;
1203
1204                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1205                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1206
1207                 if (origin_guid != 0) {
1208                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1209                             origin_guid, NULL);
1210                         if (origin_nv != NULL &&
1211                             nvlist_lookup_boolean(origin_nv,
1212                             "sent") == ENOENT) {
1213                                 /*
1214                                  * origin has not been sent yet;
1215                                  * skip this clone.
1216                                  */
1217                                 needagain = B_TRUE;
1218                                 continue;
1219                         }
1220                 }
1221
1222                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1223                 if (zhp == NULL)
1224                         return (-1);
1225                 err = dump_filesystem(zhp, sdd);
1226                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1227                 progress = B_TRUE;
1228                 zfs_close(zhp);
1229                 if (err)
1230                         return (err);
1231         }
1232         if (needagain) {
1233                 assert(progress);
1234                 goto again;
1235         }
1236         return (0);
1237 }
1238
1239 /*
1240  * Generate a send stream for the dataset identified by the argument zhp.
1241  *
1242  * The content of the send stream is the snapshot identified by
1243  * 'tosnap'.  Incremental streams are requested in two ways:
1244  *     - from the snapshot identified by "fromsnap" (if non-null) or
1245  *     - from the origin of the dataset identified by zhp, which must
1246  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1247  *       is TRUE.
1248  *
1249  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1250  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1251  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1252  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1253  * case too. If "props" is set, send properties.
1254  */
1255 int
1256 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1257     sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
1258     void *cb_arg, nvlist_t **debugnvp)
1259 {
1260         char errbuf[1024];
1261         send_dump_data_t sdd = { 0 };
1262         int err;
1263         nvlist_t *fss = NULL;
1264         avl_tree_t *fsavl = NULL;
1265         static uint64_t holdseq;
1266         int spa_version;
1267         boolean_t holdsnaps = B_FALSE;
1268         pthread_t tid;
1269         int pipefd[2];
1270         dedup_arg_t dda = { 0 };
1271         int featureflags = 0;
1272
1273         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1274             "cannot send '%s'"), zhp->zfs_name);
1275
1276         if (fromsnap && fromsnap[0] == '\0') {
1277                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1278                     "zero-length incremental source"));
1279                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1280         }
1281
1282         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1283                 uint64_t version;
1284                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1285                 if (version >= ZPL_VERSION_SA) {
1286                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1287                 }
1288         }
1289
1290         if (zfs_spa_version(zhp, &spa_version) == 0 &&
1291             spa_version >= SPA_VERSION_USERREFS &&
1292             (flags.doall || flags.replicate))
1293                 holdsnaps = B_TRUE;
1294
1295         if (flags.dedup) {
1296                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1297                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1298                 if ((err = pipe(pipefd))) {
1299                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1300                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1301                             errbuf));
1302                 }
1303                 dda.outputfd = outfd;
1304                 dda.inputfd = pipefd[1];
1305                 dda.dedup_hdl = zhp->zfs_hdl;
1306                 if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
1307                         (void) close(pipefd[0]);
1308                         (void) close(pipefd[1]);
1309                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1310                         return (zfs_error(zhp->zfs_hdl,
1311                             EZFS_THREADCREATEFAILED, errbuf));
1312                 }
1313         }
1314
1315         if (flags.replicate || flags.doall || flags.props) {
1316                 dmu_replay_record_t drr = { 0 };
1317                 char *packbuf = NULL;
1318                 size_t buflen = 0;
1319                 zio_cksum_t zc = { { 0 } };
1320
1321                 if (flags.replicate || flags.props) {
1322                         nvlist_t *hdrnv;
1323
1324                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1325                         if (fromsnap) {
1326                                 VERIFY(0 == nvlist_add_string(hdrnv,
1327                                     "fromsnap", fromsnap));
1328                         }
1329                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1330                         if (!flags.replicate) {
1331                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1332                                     "not_recursive"));
1333                         }
1334
1335                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1336                             fromsnap, tosnap, flags.replicate, &fss, &fsavl);
1337                         if (err)
1338                                 goto err_out;
1339                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1340                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1341                             NV_ENCODE_XDR, 0);
1342                         if (debugnvp)
1343                                 *debugnvp = hdrnv;
1344                         else
1345                                 nvlist_free(hdrnv);
1346                         if (err) {
1347                                 fsavl_destroy(fsavl);
1348                                 nvlist_free(fss);
1349                                 goto stderr_out;
1350                         }
1351                 }
1352
1353                 /* write first begin record */
1354                 drr.drr_type = DRR_BEGIN;
1355                 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1356                 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
1357                     DMU_COMPOUNDSTREAM);
1358                 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
1359                     featureflags);
1360                 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1361                     sizeof (drr.drr_u.drr_begin.drr_toname),
1362                     "%s@%s", zhp->zfs_name, tosnap);
1363                 drr.drr_payloadlen = buflen;
1364                 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1365
1366                 /* write header nvlist */
1367                 if (err != -1 && packbuf != NULL) {
1368                         err = cksum_and_write(packbuf, buflen, &zc, outfd);
1369                 }
1370                 free(packbuf);
1371                 if (err == -1) {
1372                         fsavl_destroy(fsavl);
1373                         nvlist_free(fss);
1374                         err = errno;
1375                         goto stderr_out;
1376                 }
1377
1378                 /* write end record */
1379                 if (err != -1) {
1380                         bzero(&drr, sizeof (drr));
1381                         drr.drr_type = DRR_END;
1382                         drr.drr_u.drr_end.drr_checksum = zc;
1383                         err = write(outfd, &drr, sizeof (drr));
1384                         if (err == -1) {
1385                                 fsavl_destroy(fsavl);
1386                                 nvlist_free(fss);
1387                                 err = errno;
1388                                 goto stderr_out;
1389                         }
1390                 }
1391         }
1392
1393         /* dump each stream */
1394         sdd.fromsnap = fromsnap;
1395         sdd.tosnap = tosnap;
1396         if (flags.dedup)
1397                 sdd.outfd = pipefd[0];
1398         else
1399                 sdd.outfd = outfd;
1400         sdd.replicate = flags.replicate;
1401         sdd.doall = flags.doall;
1402         sdd.fromorigin = flags.fromorigin;
1403         sdd.fss = fss;
1404         sdd.fsavl = fsavl;
1405         sdd.verbose = flags.verbose;
1406         sdd.filter_cb = filter_func;
1407         sdd.filter_cb_arg = cb_arg;
1408         if (debugnvp)
1409                 sdd.debugnv = *debugnvp;
1410         if (holdsnaps) {
1411                 ++holdseq;
1412                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1413                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1414                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
1415                 if (sdd.cleanup_fd < 0) {
1416                         err = errno;
1417                         goto stderr_out;
1418                 }
1419         } else {
1420                 sdd.cleanup_fd = -1;
1421         }
1422         err = dump_filesystems(zhp, &sdd);
1423         fsavl_destroy(fsavl);
1424         nvlist_free(fss);
1425
1426         if (flags.dedup) {
1427                 (void) close(pipefd[0]);
1428                 (void) pthread_join(tid, NULL);
1429         }
1430
1431         if (sdd.cleanup_fd != -1) {
1432                 VERIFY(0 == close(sdd.cleanup_fd));
1433                 sdd.cleanup_fd = -1;
1434         }
1435
1436         if (flags.replicate || flags.doall || flags.props) {
1437                 /*
1438                  * write final end record.  NB: want to do this even if
1439                  * there was some error, because it might not be totally
1440                  * failed.
1441                  */
1442                 dmu_replay_record_t drr = { 0 };
1443                 drr.drr_type = DRR_END;
1444                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1445                         return (zfs_standard_error(zhp->zfs_hdl,
1446                             errno, errbuf));
1447                 }
1448         }
1449
1450         return (err || sdd.err);
1451
1452 stderr_out:
1453         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1454 err_out:
1455         if (sdd.cleanup_fd != -1)
1456                 VERIFY(0 == close(sdd.cleanup_fd));
1457         if (flags.dedup) {
1458                 (void) pthread_cancel(tid);
1459                 (void) pthread_join(tid, NULL);
1460                 (void) close(pipefd[0]);
1461         }
1462         return (err);
1463 }
1464
1465 /*
1466  * Routines specific to "zfs recv"
1467  */
1468
1469 static int
1470 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1471     boolean_t byteswap, zio_cksum_t *zc)
1472 {
1473         char *cp = buf;
1474         int rv;
1475         int len = ilen;
1476
1477         do {
1478                 rv = read(fd, cp, len);
1479                 cp += rv;
1480                 len -= rv;
1481         } while (rv > 0);
1482
1483         if (rv < 0 || len != 0) {
1484                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1485                     "failed to read from stream"));
1486                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1487                     "cannot receive")));
1488         }
1489
1490         if (zc) {
1491                 if (byteswap)
1492                         fletcher_4_incremental_byteswap(buf, ilen, zc);
1493                 else
1494                         fletcher_4_incremental_native(buf, ilen, zc);
1495         }
1496         return (0);
1497 }
1498
1499 static int
1500 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1501     boolean_t byteswap, zio_cksum_t *zc)
1502 {
1503         char *buf;
1504         int err;
1505
1506         buf = zfs_alloc(hdl, len);
1507         if (buf == NULL)
1508                 return (ENOMEM);
1509
1510         err = recv_read(hdl, fd, buf, len, byteswap, zc);
1511         if (err != 0) {
1512                 free(buf);
1513                 return (err);
1514         }
1515
1516         err = nvlist_unpack(buf, len, nvp, 0);
1517         free(buf);
1518         if (err != 0) {
1519                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1520                     "stream (malformed nvlist)"));
1521                 return (EINVAL);
1522         }
1523         return (0);
1524 }
1525
1526 static int
1527 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1528     int baselen, char *newname, recvflags_t flags)
1529 {
1530         static int seq;
1531         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1532         int err;
1533         prop_changelist_t *clp;
1534         zfs_handle_t *zhp;
1535
1536         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1537         if (zhp == NULL)
1538                 return (-1);
1539         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1540             flags.force ? MS_FORCE : 0);
1541         zfs_close(zhp);
1542         if (clp == NULL)
1543                 return (-1);
1544         err = changelist_prefix(clp);
1545         if (err)
1546                 return (err);
1547
1548         zc.zc_objset_type = DMU_OST_ZFS;
1549         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1550
1551         if (tryname) {
1552                 (void) strcpy(newname, tryname);
1553
1554                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1555
1556                 if (flags.verbose) {
1557                         (void) printf("attempting rename %s to %s\n",
1558                             zc.zc_name, zc.zc_value);
1559                 }
1560                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1561                 if (err == 0)
1562                         changelist_rename(clp, name, tryname);
1563         } else {
1564                 err = ENOENT;
1565         }
1566
1567         if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
1568                 seq++;
1569
1570                 (void) strncpy(newname, name, baselen);
1571                 (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
1572                     "recv-%ld-%u", (long) getpid(), seq);
1573                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1574
1575                 if (flags.verbose) {
1576                         (void) printf("failed - trying rename %s to %s\n",
1577                             zc.zc_name, zc.zc_value);
1578                 }
1579                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1580                 if (err == 0)
1581                         changelist_rename(clp, name, newname);
1582                 if (err && flags.verbose) {
1583                         (void) printf("failed (%u) - "
1584                             "will try again on next pass\n", errno);
1585                 }
1586                 err = EAGAIN;
1587         } else if (flags.verbose) {
1588                 if (err == 0)
1589                         (void) printf("success\n");
1590                 else
1591                         (void) printf("failed (%u)\n", errno);
1592         }
1593
1594         (void) changelist_postfix(clp);
1595         changelist_free(clp);
1596
1597         return (err);
1598 }
1599
1600 static int
1601 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1602     char *newname, recvflags_t flags)
1603 {
1604         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1605         int err = 0;
1606         prop_changelist_t *clp;
1607         zfs_handle_t *zhp;
1608         boolean_t defer = B_FALSE;
1609         int spa_version;
1610
1611         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1612         if (zhp == NULL)
1613                 return (-1);
1614         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1615             flags.force ? MS_FORCE : 0);
1616         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1617             zfs_spa_version(zhp, &spa_version) == 0 &&
1618             spa_version >= SPA_VERSION_USERREFS)
1619                 defer = B_TRUE;
1620         zfs_close(zhp);
1621         if (clp == NULL)
1622                 return (-1);
1623         err = changelist_prefix(clp);
1624         if (err)
1625                 return (err);
1626
1627         zc.zc_objset_type = DMU_OST_ZFS;
1628         zc.zc_defer_destroy = defer;
1629         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1630
1631         if (flags.verbose)
1632                 (void) printf("attempting destroy %s\n", zc.zc_name);
1633         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1634         if (err == 0) {
1635                 if (flags.verbose)
1636                         (void) printf("success\n");
1637                 changelist_remove(clp, zc.zc_name);
1638         }
1639
1640         (void) changelist_postfix(clp);
1641         changelist_free(clp);
1642
1643         /*
1644          * Deferred destroy might destroy the snapshot or only mark it to be
1645          * destroyed later, and it returns success in either case.
1646          */
1647         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1648             ZFS_TYPE_SNAPSHOT))) {
1649                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1650         }
1651
1652         return (err);
1653 }
1654
1655 typedef struct guid_to_name_data {
1656         uint64_t guid;
1657         char *name;
1658 } guid_to_name_data_t;
1659
1660 static int
1661 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1662 {
1663         guid_to_name_data_t *gtnd = arg;
1664         int err;
1665
1666         if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1667                 (void) strcpy(gtnd->name, zhp->zfs_name);
1668                 zfs_close(zhp);
1669                 return (EEXIST);
1670         }
1671         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1672         zfs_close(zhp);
1673         return (err);
1674 }
1675
1676 static int
1677 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1678     char *name)
1679 {
1680         /* exhaustive search all local snapshots */
1681         guid_to_name_data_t gtnd;
1682         int err = 0;
1683         zfs_handle_t *zhp;
1684         char *cp;
1685
1686         gtnd.guid = guid;
1687         gtnd.name = name;
1688
1689         if (strchr(parent, '@') == NULL) {
1690                 zhp = make_dataset_handle(hdl, parent);
1691                 if (zhp != NULL) {
1692                         err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1693                         zfs_close(zhp);
1694                         if (err == EEXIST)
1695                                 return (0);
1696                 }
1697         }
1698
1699         cp = strchr(parent, '/');
1700         if (cp)
1701                 *cp = '\0';
1702         zhp = make_dataset_handle(hdl, parent);
1703         if (cp)
1704                 *cp = '/';
1705
1706         if (zhp) {
1707                 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1708                 zfs_close(zhp);
1709         }
1710
1711         return (err == EEXIST ? 0 : ENOENT);
1712
1713 }
1714
1715 /*
1716  * Return true if dataset guid1 is created before guid2.
1717  */
1718 static int
1719 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1720     uint64_t guid1, uint64_t guid2)
1721 {
1722         nvlist_t *nvfs;
1723         char *fsname, *snapname;
1724         char buf[ZFS_MAXNAMELEN];
1725         int rv;
1726         zfs_node_t zn1, zn2;
1727
1728         if (guid2 == 0)
1729                 return (0);
1730         if (guid1 == 0)
1731                 return (1);
1732
1733         nvfs = fsavl_find(avl, guid1, &snapname);
1734         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1735         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1736         zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1737         if (zn1.zn_handle == NULL)
1738                 return (-1);
1739
1740         nvfs = fsavl_find(avl, guid2, &snapname);
1741         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1742         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1743         zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1744         if (zn2.zn_handle == NULL) {
1745                 zfs_close(zn2.zn_handle);
1746                 return (-1);
1747         }
1748
1749         rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1750
1751         zfs_close(zn1.zn_handle);
1752         zfs_close(zn2.zn_handle);
1753
1754         return (rv);
1755 }
1756
1757 static int
1758 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1759     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
1760     nvlist_t *renamed)
1761 {
1762         nvlist_t *local_nv;
1763         avl_tree_t *local_avl;
1764         nvpair_t *fselem, *nextfselem;
1765         char *fromsnap;
1766         char newname[ZFS_MAXNAMELEN];
1767         int error;
1768         boolean_t needagain, progress, recursive;
1769         char *s1, *s2;
1770
1771         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1772
1773         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
1774             ENOENT);
1775
1776         if (flags.dryrun)
1777                 return (0);
1778
1779 again:
1780         needagain = progress = B_FALSE;
1781
1782         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1783             recursive, &local_nv, &local_avl)) != 0)
1784                 return (error);
1785
1786         /*
1787          * Process deletes and renames
1788          */
1789         for (fselem = nvlist_next_nvpair(local_nv, NULL);
1790             fselem; fselem = nextfselem) {
1791                 nvlist_t *nvfs, *snaps;
1792                 nvlist_t *stream_nvfs = NULL;
1793                 nvpair_t *snapelem, *nextsnapelem;
1794                 uint64_t fromguid = 0;
1795                 uint64_t originguid = 0;
1796                 uint64_t stream_originguid = 0;
1797                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1798                 char *fsname, *stream_fsname;
1799
1800                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
1801
1802                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1803                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1804                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1805                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1806                     &parent_fromsnap_guid));
1807                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1808
1809                 /*
1810                  * First find the stream's fs, so we can check for
1811                  * a different origin (due to "zfs promote")
1812                  */
1813                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1814                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1815                         uint64_t thisguid;
1816
1817                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1818                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1819
1820                         if (stream_nvfs != NULL)
1821                                 break;
1822                 }
1823
1824                 /* check for promote */
1825                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
1826                     &stream_originguid);
1827                 if (stream_nvfs && originguid != stream_originguid) {
1828                         switch (created_before(hdl, local_avl,
1829                             stream_originguid, originguid)) {
1830                         case 1: {
1831                                 /* promote it! */
1832                                 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1833                                 nvlist_t *origin_nvfs;
1834                                 char *origin_fsname;
1835
1836                                 if (flags.verbose)
1837                                         (void) printf("promoting %s\n", fsname);
1838
1839                                 origin_nvfs = fsavl_find(local_avl, originguid,
1840                                     NULL);
1841                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1842                                     "name", &origin_fsname));
1843                                 (void) strlcpy(zc.zc_value, origin_fsname,
1844                                     sizeof (zc.zc_value));
1845                                 (void) strlcpy(zc.zc_name, fsname,
1846                                     sizeof (zc.zc_name));
1847                                 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1848                                 if (error == 0)
1849                                         progress = B_TRUE;
1850                                 break;
1851                         }
1852                         default:
1853                                 break;
1854                         case -1:
1855                                 fsavl_destroy(local_avl);
1856                                 nvlist_free(local_nv);
1857                                 return (-1);
1858                         }
1859                         /*
1860                          * We had/have the wrong origin, therefore our
1861                          * list of snapshots is wrong.  Need to handle
1862                          * them on the next pass.
1863                          */
1864                         needagain = B_TRUE;
1865                         continue;
1866                 }
1867
1868                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1869                     snapelem; snapelem = nextsnapelem) {
1870                         uint64_t thisguid;
1871                         char *stream_snapname;
1872                         nvlist_t *found, *props;
1873
1874                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1875
1876                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1877                         found = fsavl_find(stream_avl, thisguid,
1878                             &stream_snapname);
1879
1880                         /* check for delete */
1881                         if (found == NULL) {
1882                                 char name[ZFS_MAXNAMELEN];
1883
1884                                 if (!flags.force)
1885                                         continue;
1886
1887                                 (void) snprintf(name, sizeof (name), "%s@%s",
1888                                     fsname, nvpair_name(snapelem));
1889
1890                                 error = recv_destroy(hdl, name,
1891                                     strlen(fsname)+1, newname, flags);
1892                                 if (error)
1893                                         needagain = B_TRUE;
1894                                 else
1895                                         progress = B_TRUE;
1896                                 continue;
1897                         }
1898
1899                         stream_nvfs = found;
1900
1901                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1902                             &props) && 0 == nvlist_lookup_nvlist(props,
1903                             stream_snapname, &props)) {
1904                                 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
1905
1906                                 zc.zc_cookie = B_TRUE; /* received */
1907                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1908                                     "%s@%s", fsname, nvpair_name(snapelem));
1909                                 if (zcmd_write_src_nvlist(hdl, &zc,
1910                                     props) == 0) {
1911                                         (void) zfs_ioctl(hdl,
1912                                             ZFS_IOC_SET_PROP, &zc);
1913                                         zcmd_free_nvlists(&zc);
1914                                 }
1915                         }
1916
1917                         /* check for different snapname */
1918                         if (strcmp(nvpair_name(snapelem),
1919                             stream_snapname) != 0) {
1920                                 char name[ZFS_MAXNAMELEN];
1921                                 char tryname[ZFS_MAXNAMELEN];
1922
1923                                 (void) snprintf(name, sizeof (name), "%s@%s",
1924                                     fsname, nvpair_name(snapelem));
1925                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
1926                                     fsname, stream_snapname);
1927
1928                                 error = recv_rename(hdl, name, tryname,
1929                                     strlen(fsname)+1, newname, flags);
1930                                 if (error)
1931                                         needagain = B_TRUE;
1932                                 else
1933                                         progress = B_TRUE;
1934                         }
1935
1936                         if (strcmp(stream_snapname, fromsnap) == 0)
1937                                 fromguid = thisguid;
1938                 }
1939
1940                 /* check for delete */
1941                 if (stream_nvfs == NULL) {
1942                         if (!flags.force)
1943                                 continue;
1944
1945                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1946                             newname, flags);
1947                         if (error)
1948                                 needagain = B_TRUE;
1949                         else
1950                                 progress = B_TRUE;
1951                         continue;
1952                 }
1953
1954                 if (fromguid == 0) {
1955                         if (flags.verbose) {
1956                                 (void) printf("local fs %s does not have "
1957                                     "fromsnap (%s in stream); must have "
1958                                     "been deleted locally; ignoring\n",
1959                                     fsname, fromsnap);
1960                         }
1961                         continue;
1962                 }
1963
1964                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1965                     "name", &stream_fsname));
1966                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1967                     "parentfromsnap", &stream_parent_fromsnap_guid));
1968
1969                 s1 = strrchr(fsname, '/');
1970                 s2 = strrchr(stream_fsname, '/');
1971
1972                 /*
1973                  * Check for rename. If the exact receive path is specified, it
1974                  * does not count as a rename, but we still need to check the
1975                  * datasets beneath it.
1976                  */
1977                 if ((stream_parent_fromsnap_guid != 0 &&
1978                     parent_fromsnap_guid != 0 &&
1979                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1980                     ((flags.isprefix || strcmp(tofs, fsname) != 0) &&
1981                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
1982                         nvlist_t *parent;
1983                         char tryname[ZFS_MAXNAMELEN];
1984
1985                         parent = fsavl_find(local_avl,
1986                             stream_parent_fromsnap_guid, NULL);
1987                         /*
1988                          * NB: parent might not be found if we used the
1989                          * tosnap for stream_parent_fromsnap_guid,
1990                          * because the parent is a newly-created fs;
1991                          * we'll be able to rename it after we recv the
1992                          * new fs.
1993                          */
1994                         if (parent != NULL) {
1995                                 char *pname;
1996
1997                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
1998                                     &pname));
1999                                 (void) snprintf(tryname, sizeof (tryname),
2000                                     "%s%s", pname, strrchr(stream_fsname, '/'));
2001                         } else {
2002                                 tryname[0] = '\0';
2003                                 if (flags.verbose) {
2004                                         (void) printf("local fs %s new parent "
2005                                             "not found\n", fsname);
2006                                 }
2007                         }
2008
2009                         newname[0] = '\0';
2010
2011                         error = recv_rename(hdl, fsname, tryname,
2012                             strlen(tofs)+1, newname, flags);
2013
2014                         if (renamed != NULL && newname[0] != '\0') {
2015                                 VERIFY(0 == nvlist_add_boolean(renamed,
2016                                     newname));
2017                         }
2018
2019                         if (error)
2020                                 needagain = B_TRUE;
2021                         else
2022                                 progress = B_TRUE;
2023                 }
2024         }
2025
2026         fsavl_destroy(local_avl);
2027         nvlist_free(local_nv);
2028
2029         if (needagain && progress) {
2030                 /* do another pass to fix up temporary names */
2031                 if (flags.verbose)
2032                         (void) printf("another pass:\n");
2033                 goto again;
2034         }
2035
2036         return (needagain);
2037 }
2038
2039 static int
2040 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2041     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2042     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2043 {
2044         nvlist_t *stream_nv = NULL;
2045         avl_tree_t *stream_avl = NULL;
2046         char *fromsnap = NULL;
2047         char *cp;
2048         char tofs[ZFS_MAXNAMELEN];
2049         char sendfs[ZFS_MAXNAMELEN];
2050         char errbuf[1024];
2051         dmu_replay_record_t drre;
2052         int error;
2053         boolean_t anyerr = B_FALSE;
2054         boolean_t softerr = B_FALSE;
2055         boolean_t recursive;
2056
2057         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2058             "cannot receive"));
2059
2060         assert(drr->drr_type == DRR_BEGIN);
2061         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2062         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2063             DMU_COMPOUNDSTREAM);
2064
2065         /*
2066          * Read in the nvlist from the stream.
2067          */
2068         if (drr->drr_payloadlen != 0) {
2069                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2070                     &stream_nv, flags.byteswap, zc);
2071                 if (error) {
2072                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2073                         goto out;
2074                 }
2075         }
2076
2077         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2078             ENOENT);
2079
2080         if (recursive && strchr(destname, '@')) {
2081                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2082                     "cannot specify snapshot name for multi-snapshot stream"));
2083                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2084                 goto out;
2085         }
2086
2087         /*
2088          * Read in the end record and verify checksum.
2089          */
2090         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2091             flags.byteswap, NULL)))
2092                 goto out;
2093         if (flags.byteswap) {
2094                 drre.drr_type = BSWAP_32(drre.drr_type);
2095                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2096                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2097                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2098                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2099                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2100                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2101                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2102                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2103         }
2104         if (drre.drr_type != DRR_END) {
2105                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2106                 goto out;
2107         }
2108         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2109                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2110                     "incorrect header checksum"));
2111                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2112                 goto out;
2113         }
2114
2115         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2116
2117         if (drr->drr_payloadlen != 0) {
2118                 nvlist_t *stream_fss;
2119
2120                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2121                     &stream_fss));
2122                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2123                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2124                             "couldn't allocate avl tree"));
2125                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2126                         goto out;
2127                 }
2128
2129                 if (fromsnap != NULL) {
2130                         nvlist_t *renamed = NULL;
2131                         nvpair_t *pair = NULL;
2132
2133                         (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2134                         if (flags.isprefix) {
2135                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2136                                 int i;
2137
2138                                 if (flags.istail) {
2139                                         cp = strrchr(drrb->drr_toname, '/');
2140                                         if (cp == NULL) {
2141                                                 (void) strlcat(tofs, "/",
2142                                                     ZFS_MAXNAMELEN);
2143                                                 i = 0;
2144                                         } else {
2145                                                 i = (cp - drrb->drr_toname);
2146                                         }
2147                                 } else {
2148                                         i = strcspn(drrb->drr_toname, "/@");
2149                                 }
2150                                 /* zfs_receive_one() will create_parents() */
2151                                 (void) strlcat(tofs, &drrb->drr_toname[i],
2152                                     ZFS_MAXNAMELEN);
2153                                 *strchr(tofs, '@') = '\0';
2154                         }
2155
2156                         if (recursive && !flags.dryrun && !flags.nomount) {
2157                                 VERIFY(0 == nvlist_alloc(&renamed,
2158                                     NV_UNIQUE_NAME, 0));
2159                         }
2160
2161                         softerr = recv_incremental_replication(hdl, tofs, flags,
2162                             stream_nv, stream_avl, renamed);
2163
2164                         /* Unmount renamed filesystems before receiving. */
2165                         while ((pair = nvlist_next_nvpair(renamed,
2166                             pair)) != NULL) {
2167                                 zfs_handle_t *zhp;
2168                                 prop_changelist_t *clp = NULL;
2169
2170                                 zhp = zfs_open(hdl, nvpair_name(pair),
2171                                     ZFS_TYPE_FILESYSTEM);
2172                                 if (zhp != NULL) {
2173                                         clp = changelist_gather(zhp,
2174                                             ZFS_PROP_MOUNTPOINT, 0, 0);
2175                                         zfs_close(zhp);
2176                                         if (clp != NULL) {
2177                                                 softerr |=
2178                                                     changelist_prefix(clp);
2179                                                 changelist_free(clp);
2180                                         }
2181                                 }
2182                         }
2183
2184                         nvlist_free(renamed);
2185                 }
2186         }
2187
2188         /*
2189          * Get the fs specified by the first path in the stream (the top level
2190          * specified by 'zfs send') and pass it to each invocation of
2191          * zfs_receive_one().
2192          */
2193         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2194             ZFS_MAXNAMELEN);
2195         if ((cp = strchr(sendfs, '@')) != NULL)
2196                 *cp = '\0';
2197
2198         /* Finally, receive each contained stream */
2199         do {
2200                 /*
2201                  * we should figure out if it has a recoverable
2202                  * error, in which case do a recv_skip() and drive on.
2203                  * Note, if we fail due to already having this guid,
2204                  * zfs_receive_one() will take care of it (ie,
2205                  * recv_skip() and return 0).
2206                  */
2207                 error = zfs_receive_impl(hdl, destname, flags, fd,
2208                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2209                     action_handlep);
2210                 if (error == ENODATA) {
2211                         error = 0;
2212                         break;
2213                 }
2214                 anyerr |= error;
2215         } while (error == 0);
2216
2217         if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2218                 /*
2219                  * Now that we have the fs's they sent us, try the
2220                  * renames again.
2221                  */
2222                 softerr = recv_incremental_replication(hdl, tofs, flags,
2223                     stream_nv, stream_avl, NULL);
2224         }
2225
2226 out:
2227         fsavl_destroy(stream_avl);
2228         if (stream_nv)
2229                 nvlist_free(stream_nv);
2230         if (softerr)
2231                 error = -2;
2232         if (anyerr)
2233                 error = -1;
2234         return (error);
2235 }
2236
2237 static void
2238 trunc_prop_errs(int truncated)
2239 {
2240         ASSERT(truncated != 0);
2241
2242         if (truncated == 1)
2243                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2244                     "1 more property could not be set\n"));
2245         else
2246                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2247                     "%d more properties could not be set\n"), truncated);
2248 }
2249
2250 static int
2251 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2252 {
2253         dmu_replay_record_t *drr;
2254         void *buf = malloc(1<<20);
2255         char errbuf[1024];
2256
2257         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2258             "cannot receive:"));
2259
2260         /* XXX would be great to use lseek if possible... */
2261         drr = buf;
2262
2263         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2264             byteswap, NULL) == 0) {
2265                 if (byteswap)
2266                         drr->drr_type = BSWAP_32(drr->drr_type);
2267
2268                 switch (drr->drr_type) {
2269                 case DRR_BEGIN:
2270                         /* NB: not to be used on v2 stream packages */
2271                         if (drr->drr_payloadlen != 0) {
2272                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2273                                     "invalid substream header"));
2274                                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2275                         }
2276                         break;
2277
2278                 case DRR_END:
2279                         free(buf);
2280                         return (0);
2281
2282                 case DRR_OBJECT:
2283                         if (byteswap) {
2284                                 drr->drr_u.drr_object.drr_bonuslen =
2285                                     BSWAP_32(drr->drr_u.drr_object.
2286                                     drr_bonuslen);
2287                         }
2288                         (void) recv_read(hdl, fd, buf,
2289                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2290                             B_FALSE, NULL);
2291                         break;
2292
2293                 case DRR_WRITE:
2294                         if (byteswap) {
2295                                 drr->drr_u.drr_write.drr_length =
2296                                     BSWAP_64(drr->drr_u.drr_write.drr_length);
2297                         }
2298                         (void) recv_read(hdl, fd, buf,
2299                             drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2300                         break;
2301                 case DRR_SPILL:
2302                         if (byteswap) {
2303                                 drr->drr_u.drr_write.drr_length =
2304                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
2305                         }
2306                         (void) recv_read(hdl, fd, buf,
2307                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2308                         break;
2309                 case DRR_WRITE_BYREF:
2310                 case DRR_FREEOBJECTS:
2311                 case DRR_FREE:
2312                         break;
2313
2314                 default:
2315                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2316                             "invalid record type"));
2317                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2318                 }
2319         }
2320
2321         free(buf);
2322         return (-1);
2323 }
2324
2325 /*
2326  * Restores a backup of tosnap from the file descriptor specified by infd.
2327  */
2328 static int
2329 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2330     recvflags_t flags, dmu_replay_record_t *drr,
2331     dmu_replay_record_t *drr_noswap, const char *sendfs,
2332     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2333     uint64_t *action_handlep)
2334 {
2335         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
2336         time_t begin_time;
2337         int ioctl_err, ioctl_errno, err;
2338         char *cp;
2339         struct drr_begin *drrb = &drr->drr_u.drr_begin;
2340         char errbuf[1024];
2341         char prop_errbuf[1024];
2342         const char *chopprefix;
2343         boolean_t newfs = B_FALSE;
2344         boolean_t stream_wantsnewfs;
2345         uint64_t parent_snapguid = 0;
2346         prop_changelist_t *clp = NULL;
2347         nvlist_t *snapprops_nvlist = NULL;
2348         zprop_errflags_t prop_errflags;
2349         boolean_t recursive;
2350
2351         begin_time = time(NULL);
2352
2353         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2354             "cannot receive"));
2355
2356         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2357             ENOENT);
2358
2359         if (stream_avl != NULL) {
2360                 char *snapname;
2361                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2362                     &snapname);
2363                 nvlist_t *props;
2364                 int ret;
2365
2366                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2367                     &parent_snapguid);
2368                 err = nvlist_lookup_nvlist(fs, "props", &props);
2369                 if (err)
2370                         VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2371
2372                 if (flags.canmountoff) {
2373                         VERIFY(0 == nvlist_add_uint64(props,
2374                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2375                 }
2376                 ret = zcmd_write_src_nvlist(hdl, &zc, props);
2377                 if (err)
2378                         nvlist_free(props);
2379
2380                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2381                         VERIFY(0 == nvlist_lookup_nvlist(props,
2382                             snapname, &snapprops_nvlist));
2383                 }
2384
2385                 if (ret != 0)
2386                         return (-1);
2387         }
2388
2389         cp = NULL;
2390
2391         /*
2392          * Determine how much of the snapshot name stored in the stream
2393          * we are going to tack on to the name they specified on the
2394          * command line, and how much we are going to chop off.
2395          *
2396          * If they specified a snapshot, chop the entire name stored in
2397          * the stream.
2398          */
2399         if (flags.istail) {
2400                 /*
2401                  * A filesystem was specified with -e. We want to tack on only
2402                  * the tail of the sent snapshot path.
2403                  */
2404                 if (strchr(tosnap, '@')) {
2405                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2406                             "argument - snapshot not allowed with -e"));
2407                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2408                 }
2409
2410                 chopprefix = strrchr(sendfs, '/');
2411
2412                 if (chopprefix == NULL) {
2413                         /*
2414                          * The tail is the poolname, so we need to
2415                          * prepend a path separator.
2416                          */
2417                         int len = strlen(drrb->drr_toname);
2418                         cp = malloc(len + 2);
2419                         cp[0] = '/';
2420                         (void) strcpy(&cp[1], drrb->drr_toname);
2421                         chopprefix = cp;
2422                 } else {
2423                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2424                 }
2425         } else if (flags.isprefix) {
2426                 /*
2427                  * A filesystem was specified with -d. We want to tack on
2428                  * everything but the first element of the sent snapshot path
2429                  * (all but the pool name).
2430                  */
2431                 if (strchr(tosnap, '@')) {
2432                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2433                             "argument - snapshot not allowed with -d"));
2434                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2435                 }
2436
2437                 chopprefix = strchr(drrb->drr_toname, '/');
2438                 if (chopprefix == NULL)
2439                         chopprefix = strchr(drrb->drr_toname, '@');
2440         } else if (strchr(tosnap, '@') == NULL) {
2441                 /*
2442                  * If a filesystem was specified without -d or -e, we want to
2443                  * tack on everything after the fs specified by 'zfs send'.
2444                  */
2445                 chopprefix = drrb->drr_toname + strlen(sendfs);
2446         } else {
2447                 /* A snapshot was specified as an exact path (no -d or -e). */
2448                 if (recursive) {
2449                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2450                             "cannot specify snapshot name for multi-snapshot "
2451                             "stream"));
2452                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2453                 }
2454                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2455         }
2456
2457         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2458         ASSERT(chopprefix > drrb->drr_toname);
2459         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2460         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2461             chopprefix[0] == '\0');
2462
2463         /*
2464          * Determine name of destination snapshot, store in zc_value.
2465          */
2466         (void) strcpy(zc.zc_top_ds, tosnap);
2467         (void) strcpy(zc.zc_value, tosnap);
2468         (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2469         free(cp);
2470         if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2471                 zcmd_free_nvlists(&zc);
2472                 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2473         }
2474
2475         /*
2476          * Determine the name of the origin snapshot, store in zc_string.
2477          */
2478         if (drrb->drr_flags & DRR_FLAG_CLONE) {
2479                 if (guid_to_name(hdl, tosnap,
2480                     drrb->drr_fromguid, zc.zc_string) != 0) {
2481                         zcmd_free_nvlists(&zc);
2482                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2483                             "local origin for clone %s does not exist"),
2484                             zc.zc_value);
2485                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2486                 }
2487                 if (flags.verbose)
2488                         (void) printf("found clone origin %s\n", zc.zc_string);
2489         }
2490
2491         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2492             (drrb->drr_flags & DRR_FLAG_CLONE));
2493
2494         if (stream_wantsnewfs) {
2495                 /*
2496                  * if the parent fs does not exist, look for it based on
2497                  * the parent snap GUID
2498                  */
2499                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2500                     "cannot receive new filesystem stream"));
2501
2502                 (void) strcpy(zc.zc_name, zc.zc_value);
2503                 cp = strrchr(zc.zc_name, '/');
2504                 if (cp)
2505                         *cp = '\0';
2506                 if (cp &&
2507                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2508                         char suffix[ZFS_MAXNAMELEN];
2509                         (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2510                         if (guid_to_name(hdl, tosnap, parent_snapguid,
2511                             zc.zc_value) == 0) {
2512                                 *strchr(zc.zc_value, '@') = '\0';
2513                                 (void) strcat(zc.zc_value, suffix);
2514                         }
2515                 }
2516         } else {
2517                 /*
2518                  * if the fs does not exist, look for it based on the
2519                  * fromsnap GUID
2520                  */
2521                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2522                     "cannot receive incremental stream"));
2523
2524                 (void) strcpy(zc.zc_name, zc.zc_value);
2525                 *strchr(zc.zc_name, '@') = '\0';
2526
2527                 /*
2528                  * If the exact receive path was specified and this is the
2529                  * topmost path in the stream, then if the fs does not exist we
2530                  * should look no further.
2531                  */
2532                 if ((flags.isprefix || (*(chopprefix = drrb->drr_toname +
2533                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2534                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2535                         char snap[ZFS_MAXNAMELEN];
2536                         (void) strcpy(snap, strchr(zc.zc_value, '@'));
2537                         if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
2538                             zc.zc_value) == 0) {
2539                                 *strchr(zc.zc_value, '@') = '\0';
2540                                 (void) strcat(zc.zc_value, snap);
2541                         }
2542                 }
2543         }
2544
2545         (void) strcpy(zc.zc_name, zc.zc_value);
2546         *strchr(zc.zc_name, '@') = '\0';
2547
2548         if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2549                 zfs_handle_t *zhp;
2550
2551                 /*
2552                  * Destination fs exists.  Therefore this should either
2553                  * be an incremental, or the stream specifies a new fs
2554                  * (full stream or clone) and they want us to blow it
2555                  * away (and have therefore specified -F and removed any
2556                  * snapshots).
2557                  */
2558                 if (stream_wantsnewfs) {
2559                         if (!flags.force) {
2560                                 zcmd_free_nvlists(&zc);
2561                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2562                                     "destination '%s' exists\n"
2563                                     "must specify -F to overwrite it"),
2564                                     zc.zc_name);
2565                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2566                         }
2567                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2568                             &zc) == 0) {
2569                                 zcmd_free_nvlists(&zc);
2570                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2571                                     "destination has snapshots (eg. %s)\n"
2572                                     "must destroy them to overwrite it"),
2573                                     zc.zc_name);
2574                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2575                         }
2576                 }
2577
2578                 if ((zhp = zfs_open(hdl, zc.zc_name,
2579                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2580                         zcmd_free_nvlists(&zc);
2581                         return (-1);
2582                 }
2583
2584                 if (stream_wantsnewfs &&
2585                     zhp->zfs_dmustats.dds_origin[0]) {
2586                         zcmd_free_nvlists(&zc);
2587                         zfs_close(zhp);
2588                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2589                             "destination '%s' is a clone\n"
2590                             "must destroy it to overwrite it"),
2591                             zc.zc_name);
2592                         return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2593                 }
2594
2595                 if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2596                     stream_wantsnewfs) {
2597                         /* We can't do online recv in this case */
2598                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2599                         if (clp == NULL) {
2600                                 zfs_close(zhp);
2601                                 zcmd_free_nvlists(&zc);
2602                                 return (-1);
2603                         }
2604                         if (changelist_prefix(clp) != 0) {
2605                                 changelist_free(clp);
2606                                 zfs_close(zhp);
2607                                 zcmd_free_nvlists(&zc);
2608                                 return (-1);
2609                         }
2610                 }
2611                 zfs_close(zhp);
2612         } else {
2613                 /*
2614                  * Destination filesystem does not exist.  Therefore we better
2615                  * be creating a new filesystem (either from a full backup, or
2616                  * a clone).  It would therefore be invalid if the user
2617                  * specified only the pool name (i.e. if the destination name
2618                  * contained no slash character).
2619                  */
2620                 if (!stream_wantsnewfs ||
2621                     (cp = strrchr(zc.zc_name, '/')) == NULL) {
2622                         zcmd_free_nvlists(&zc);
2623                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2624                             "destination '%s' does not exist"), zc.zc_name);
2625                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2626                 }
2627
2628                 /*
2629                  * Trim off the final dataset component so we perform the
2630                  * recvbackup ioctl to the filesystems's parent.
2631                  */
2632                 *cp = '\0';
2633
2634                 if (flags.isprefix && !flags.istail && !flags.dryrun &&
2635                     create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2636                         zcmd_free_nvlists(&zc);
2637                         return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2638                 }
2639
2640                 newfs = B_TRUE;
2641         }
2642
2643         zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2644         zc.zc_cookie = infd;
2645         zc.zc_guid = flags.force;
2646         if (flags.verbose) {
2647                 (void) printf("%s %s stream of %s into %s\n",
2648                     flags.dryrun ? "would receive" : "receiving",
2649                     drrb->drr_fromguid ? "incremental" : "full",
2650                     drrb->drr_toname, zc.zc_value);
2651                 (void) fflush(stdout);
2652         }
2653
2654         if (flags.dryrun) {
2655                 zcmd_free_nvlists(&zc);
2656                 return (recv_skip(hdl, infd, flags.byteswap));
2657         }
2658
2659         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2660         zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2661         zc.zc_cleanup_fd = cleanup_fd;
2662         zc.zc_action_handle = *action_handlep;
2663
2664         err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2665         ioctl_errno = errno;
2666         prop_errflags = (zprop_errflags_t)zc.zc_obj;
2667
2668         if (err == 0) {
2669                 nvlist_t *prop_errors;
2670                 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2671                     zc.zc_nvlist_dst_size, &prop_errors, 0));
2672
2673                 nvpair_t *prop_err = NULL;
2674
2675                 while ((prop_err = nvlist_next_nvpair(prop_errors,
2676                     prop_err)) != NULL) {
2677                         char tbuf[1024];
2678                         zfs_prop_t prop;
2679                         int intval;
2680
2681                         prop = zfs_name_to_prop(nvpair_name(prop_err));
2682                         (void) nvpair_value_int32(prop_err, &intval);
2683                         if (strcmp(nvpair_name(prop_err),
2684                             ZPROP_N_MORE_ERRORS) == 0) {
2685                                 trunc_prop_errs(intval);
2686                                 break;
2687                         } else {
2688                                 (void) snprintf(tbuf, sizeof (tbuf),
2689                                     dgettext(TEXT_DOMAIN,
2690                                     "cannot receive %s property on %s"),
2691                                     nvpair_name(prop_err), zc.zc_name);
2692                                 zfs_setprop_error(hdl, prop, intval, tbuf);
2693                         }
2694                 }
2695                 nvlist_free(prop_errors);
2696         }
2697
2698         zc.zc_nvlist_dst = 0;
2699         zc.zc_nvlist_dst_size = 0;
2700         zcmd_free_nvlists(&zc);
2701
2702         if (err == 0 && snapprops_nvlist) {
2703                 zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 };
2704
2705                 (void) strcpy(zc2.zc_name, zc.zc_value);
2706                 zc2.zc_cookie = B_TRUE; /* received */
2707                 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
2708                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
2709                         zcmd_free_nvlists(&zc2);
2710                 }
2711         }
2712
2713         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
2714                 /*
2715                  * It may be that this snapshot already exists,
2716                  * in which case we want to consume & ignore it
2717                  * rather than failing.
2718                  */
2719                 avl_tree_t *local_avl;
2720                 nvlist_t *local_nv, *fs;
2721                 cp = strchr(zc.zc_value, '@');
2722
2723                 /*
2724                  * XXX Do this faster by just iterating over snaps in
2725                  * this fs.  Also if zc_value does not exist, we will
2726                  * get a strange "does not exist" error message.
2727                  */
2728                 *cp = '\0';
2729                 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
2730                     &local_nv, &local_avl) == 0) {
2731                         *cp = '@';
2732                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
2733                         fsavl_destroy(local_avl);
2734                         nvlist_free(local_nv);
2735
2736                         if (fs != NULL) {
2737                                 if (flags.verbose) {
2738                                         (void) printf("snap %s already exists; "
2739                                             "ignoring\n", zc.zc_value);
2740                                 }
2741                                 err = ioctl_err = recv_skip(hdl, infd,
2742                                     flags.byteswap);
2743                         }
2744                 }
2745                 *cp = '@';
2746         }
2747
2748         if (ioctl_err != 0) {
2749                 switch (ioctl_errno) {
2750                 case ENODEV:
2751                         cp = strchr(zc.zc_value, '@');
2752                         *cp = '\0';
2753                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2754                             "most recent snapshot of %s does not\n"
2755                             "match incremental source"), zc.zc_value);
2756                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2757                         *cp = '@';
2758                         break;
2759                 case ETXTBSY:
2760                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2761                             "destination %s has been modified\n"
2762                             "since most recent snapshot"), zc.zc_name);
2763                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2764                         break;
2765                 case EEXIST:
2766                         cp = strchr(zc.zc_value, '@');
2767                         if (newfs) {
2768                                 /* it's the containing fs that exists */
2769                                 *cp = '\0';
2770                         }
2771                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2772                             "destination already exists"));
2773                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
2774                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
2775                             zc.zc_value);
2776                         *cp = '@';
2777                         break;
2778                 case EINVAL:
2779                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2780                         break;
2781                 case ECKSUM:
2782                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2783                             "invalid stream (checksum mismatch)"));
2784                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2785                         break;
2786                 case ENOTSUP:
2787                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2788                             "pool must be upgraded to receive this stream."));
2789                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
2790                         break;
2791                 case EDQUOT:
2792                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2793                             "destination %s space quota exceeded"), zc.zc_name);
2794                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2795                         break;
2796                 default:
2797                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
2798                 }
2799         }
2800
2801         /*
2802          * Mount the target filesystem (if created).  Also mount any
2803          * children of the target filesystem if we did a replication
2804          * receive (indicated by stream_avl being non-NULL).
2805          */
2806         cp = strchr(zc.zc_value, '@');
2807         if (cp && (ioctl_err == 0 || !newfs)) {
2808                 zfs_handle_t *h;
2809
2810                 *cp = '\0';
2811                 h = zfs_open(hdl, zc.zc_value,
2812                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2813                 if (h != NULL) {
2814                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
2815                                 *cp = '@';
2816                         } else if (newfs || stream_avl) {
2817                                 /*
2818                                  * Track the first/top of hierarchy fs,
2819                                  * for mounting and sharing later.
2820                                  */
2821                                 if (top_zfs && *top_zfs == NULL)
2822                                         *top_zfs = zfs_strdup(hdl, zc.zc_value);
2823                         }
2824                         zfs_close(h);
2825                 }
2826                 *cp = '@';
2827         }
2828
2829         if (clp) {
2830                 err |= changelist_postfix(clp);
2831                 changelist_free(clp);
2832         }
2833
2834         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
2835                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2836                     "failed to clear unreceived properties on %s"),
2837                     zc.zc_name);
2838                 (void) fprintf(stderr, "\n");
2839         }
2840         if (prop_errflags & ZPROP_ERR_NORESTORE) {
2841                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2842                     "failed to restore original properties on %s"),
2843                     zc.zc_name);
2844                 (void) fprintf(stderr, "\n");
2845         }
2846
2847         if (err || ioctl_err)
2848                 return (-1);
2849
2850         *action_handlep = zc.zc_action_handle;
2851
2852         if (flags.verbose) {
2853                 char buf1[64];
2854                 char buf2[64];
2855                 uint64_t bytes = zc.zc_cookie;
2856                 time_t delta = time(NULL) - begin_time;
2857                 if (delta == 0)
2858                         delta = 1;
2859                 zfs_nicenum(bytes, buf1, sizeof (buf1));
2860                 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2861
2862                 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2863                     buf1, delta, buf2);
2864         }
2865
2866         return (0);
2867 }
2868
2869 static int
2870 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2871     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2872     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2873 {
2874         int err;
2875         dmu_replay_record_t drr, drr_noswap;
2876         struct drr_begin *drrb = &drr.drr_u.drr_begin;
2877         char errbuf[1024];
2878         zio_cksum_t zcksum = { { 0 } };
2879         uint64_t featureflags;
2880         int hdrtype;
2881
2882         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2883             "cannot receive"));
2884
2885         if (flags.isprefix &&
2886             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2887                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2888                     "(%s) does not exist"), tosnap);
2889                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2890         }
2891
2892         /* read in the BEGIN record */
2893         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2894             &zcksum)))
2895                 return (err);
2896
2897         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2898                 /* It's the double end record at the end of a package */
2899                 return (ENODATA);
2900         }
2901
2902         /* the kernel needs the non-byteswapped begin record */
2903         drr_noswap = drr;
2904
2905         flags.byteswap = B_FALSE;
2906         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2907                 /*
2908                  * We computed the checksum in the wrong byteorder in
2909                  * recv_read() above; do it again correctly.
2910                  */
2911                 bzero(&zcksum, sizeof (zio_cksum_t));
2912                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2913                 flags.byteswap = B_TRUE;
2914
2915                 drr.drr_type = BSWAP_32(drr.drr_type);
2916                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2917                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2918                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
2919                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2920                 drrb->drr_type = BSWAP_32(drrb->drr_type);
2921                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2922                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2923                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2924         }
2925
2926         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2927                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2928                     "stream (bad magic number)"));
2929                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2930         }
2931
2932         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
2933         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
2934
2935         if (!DMU_STREAM_SUPPORTED(featureflags) ||
2936             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
2937                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2938                     "stream has unsupported feature, feature flags = %lx"),
2939                     featureflags);
2940                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2941         }
2942
2943         if (strchr(drrb->drr_toname, '@') == NULL) {
2944                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2945                     "stream (bad snapshot name)"));
2946                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2947         }
2948
2949         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
2950                 char nonpackage_sendfs[ZFS_MAXNAMELEN];
2951                 if (sendfs == NULL) {
2952                         /*
2953                          * We were not called from zfs_receive_package(). Get
2954                          * the fs specified by 'zfs send'.
2955                          */
2956                         char *cp;
2957                         (void) strlcpy(nonpackage_sendfs,
2958                             drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
2959                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
2960                                 *cp = '\0';
2961                         sendfs = nonpackage_sendfs;
2962                 }
2963                 return (zfs_receive_one(hdl, infd, tosnap, flags,
2964                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
2965                     top_zfs, cleanup_fd, action_handlep));
2966         } else {
2967                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
2968                     DMU_COMPOUNDSTREAM);
2969                 return (zfs_receive_package(hdl, infd, tosnap, flags,
2970                     &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
2971         }
2972 }
2973
2974 /*
2975  * Restores a backup of tosnap from the file descriptor specified by infd.
2976  * Return 0 on total success, -2 if some things couldn't be
2977  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2978  * (-1 will override -2).
2979  */
2980 int
2981 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2982     int infd, avl_tree_t *stream_avl)
2983 {
2984         char *top_zfs = NULL;
2985         int err;
2986         int cleanup_fd;
2987         uint64_t action_handle = 0;
2988
2989         cleanup_fd = open(ZFS_DEV, O_RDWR);
2990         VERIFY(cleanup_fd >= 0);
2991
2992         err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
2993             stream_avl, &top_zfs, cleanup_fd, &action_handle);
2994
2995         VERIFY(0 == close(cleanup_fd));
2996
2997         if (err == 0 && !flags.nomount && top_zfs) {
2998                 zfs_handle_t *zhp;
2999                 prop_changelist_t *clp;
3000
3001                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3002                 if (zhp != NULL) {
3003                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3004                             CL_GATHER_MOUNT_ALWAYS, 0);
3005                         zfs_close(zhp);
3006                         if (clp != NULL) {
3007                                 /* mount and share received datasets */
3008                                 err = changelist_postfix(clp);
3009                                 changelist_free(clp);
3010                         }
3011                 }
3012                 if (zhp == NULL || clp == NULL || err)
3013                         err = -1;
3014         }
3015         if (top_zfs)
3016                 free(top_zfs);
3017
3018         return (err);
3019 }