Move the world out of /zfs/ and seperate out module build tree
[zfs.git] / cmd / zdb / zdb.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 #include <stdio.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <ctype.h>
30 #include <sys/zfs_context.h>
31 #include <sys/spa.h>
32 #include <sys/spa_impl.h>
33 #include <sys/dmu.h>
34 #include <sys/zap.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zfs_znode.h>
37 #include <sys/vdev.h>
38 #include <sys/vdev_impl.h>
39 #include <sys/metaslab_impl.h>
40 #include <sys/dmu_objset.h>
41 #include <sys/dsl_dir.h>
42 #include <sys/dsl_dataset.h>
43 #include <sys/dsl_pool.h>
44 #include <sys/dbuf.h>
45 #include <sys/zil.h>
46 #include <sys/zil_impl.h>
47 #include <sys/stat.h>
48 #include <sys/resource.h>
49 #include <sys/dmu_traverse.h>
50 #include <sys/zio_checksum.h>
51 #include <sys/zio_compress.h>
52 #include <sys/zfs_fuid.h>
53 #include <sys/arc.h>
54 #undef ZFS_MAXNAMELEN
55 #undef verify
56 #include <libzfs.h>
57
58 const char cmdname[] = "zdb";
59 uint8_t dump_opt[256];
60
61 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
62
63 extern void dump_intent_log(zilog_t *);
64 uint64_t *zopt_object = NULL;
65 int zopt_objects = 0;
66 libzfs_handle_t *g_zfs;
67 boolean_t zdb_sig_user_data = B_TRUE;
68 int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
69
70 /*
71  * These libumem hooks provide a reasonable set of defaults for the allocator's
72  * debugging facilities.
73  */
74 const char *
75 _umem_debug_init()
76 {
77         return ("default,verbose"); /* $UMEM_DEBUG setting */
78 }
79
80 const char *
81 _umem_logging_init(void)
82 {
83         return ("fail,contents"); /* $UMEM_LOGGING setting */
84 }
85
86 static void
87 usage(void)
88 {
89         (void) fprintf(stderr,
90             "Usage: %s [-udibcsv] [-U cachefile_path] "
91             "[-S user:cksumalg] "
92             "dataset [object...]\n"
93             "       %s -C [pool]\n"
94             "       %s -l dev\n"
95             "       %s -R pool:vdev:offset:size:flags\n"
96             "       %s [-p path_to_vdev_dir]\n"
97             "       %s -e pool | GUID | devid ...\n",
98             cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
99
100         (void) fprintf(stderr, "        -u uberblock\n");
101         (void) fprintf(stderr, "        -d datasets\n");
102         (void) fprintf(stderr, "        -C cached pool configuration\n");
103         (void) fprintf(stderr, "        -i intent logs\n");
104         (void) fprintf(stderr, "        -b block statistics\n");
105         (void) fprintf(stderr, "        -c checksum all data blocks\n");
106         (void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
107         (void) fprintf(stderr, "        -S <user|all>:<cksum_alg|all> -- "
108             "dump blkptr signatures\n");
109         (void) fprintf(stderr, "        -v verbose (applies to all others)\n");
110         (void) fprintf(stderr, "        -l dump label contents\n");
111         (void) fprintf(stderr, "        -U cachefile_path -- use alternate "
112             "cachefile\n");
113         (void) fprintf(stderr, "        -R read and display block from a "
114             "device\n");
115         (void) fprintf(stderr, "        -e Pool is exported/destroyed/"
116             "has altroot\n");
117         (void) fprintf(stderr, "        -p <Path to vdev dir> (use with -e)\n");
118         (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
119             "to make only that option verbose\n");
120         (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
121         exit(1);
122 }
123
124 static void
125 fatal(const char *fmt, ...)
126 {
127         va_list ap;
128
129         va_start(ap, fmt);
130         (void) fprintf(stderr, "%s: ", cmdname);
131         (void) vfprintf(stderr, fmt, ap);
132         va_end(ap);
133         (void) fprintf(stderr, "\n");
134
135         abort();
136 }
137
138 static void
139 dump_nvlist(nvlist_t *list, int indent)
140 {
141         nvpair_t *elem = NULL;
142
143         while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
144                 switch (nvpair_type(elem)) {
145                 case DATA_TYPE_STRING:
146                         {
147                                 char *value;
148
149                                 VERIFY(nvpair_value_string(elem, &value) == 0);
150                                 (void) printf("%*s%s='%s'\n", indent, "",
151                                     nvpair_name(elem), value);
152                         }
153                         break;
154
155                 case DATA_TYPE_UINT64:
156                         {
157                                 uint64_t value;
158
159                                 VERIFY(nvpair_value_uint64(elem, &value) == 0);
160                                 (void) printf("%*s%s=%llu\n", indent, "",
161                                     nvpair_name(elem), (u_longlong_t)value);
162                         }
163                         break;
164
165                 case DATA_TYPE_NVLIST:
166                         {
167                                 nvlist_t *value;
168
169                                 VERIFY(nvpair_value_nvlist(elem, &value) == 0);
170                                 (void) printf("%*s%s\n", indent, "",
171                                     nvpair_name(elem));
172                                 dump_nvlist(value, indent + 4);
173                         }
174                         break;
175
176                 case DATA_TYPE_NVLIST_ARRAY:
177                         {
178                                 nvlist_t **value;
179                                 uint_t c, count;
180
181                                 VERIFY(nvpair_value_nvlist_array(elem, &value,
182                                     &count) == 0);
183
184                                 for (c = 0; c < count; c++) {
185                                         (void) printf("%*s%s[%u]\n", indent, "",
186                                             nvpair_name(elem), c);
187                                         dump_nvlist(value[c], indent + 8);
188                                 }
189                         }
190                         break;
191
192                 default:
193
194                         (void) printf("bad config type %d for %s\n",
195                             nvpair_type(elem), nvpair_name(elem));
196                 }
197         }
198 }
199
200 /* ARGSUSED */
201 static void
202 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
203 {
204         nvlist_t *nv;
205         size_t nvsize = *(uint64_t *)data;
206         char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
207
208         VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
209
210         VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
211
212         umem_free(packed, nvsize);
213
214         dump_nvlist(nv, 8);
215
216         nvlist_free(nv);
217 }
218
219 const char dump_zap_stars[] = "****************************************";
220 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
221
222 static void
223 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
224 {
225         int i;
226         int minidx = ZAP_HISTOGRAM_SIZE - 1;
227         int maxidx = 0;
228         uint64_t max = 0;
229
230         for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
231                 if (histo[i] > max)
232                         max = histo[i];
233                 if (histo[i] > 0 && i > maxidx)
234                         maxidx = i;
235                 if (histo[i] > 0 && i < minidx)
236                         minidx = i;
237         }
238
239         if (max < dump_zap_width)
240                 max = dump_zap_width;
241
242         for (i = minidx; i <= maxidx; i++)
243                 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
244                     &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
245 }
246
247 static void
248 dump_zap_stats(objset_t *os, uint64_t object)
249 {
250         int error;
251         zap_stats_t zs;
252
253         error = zap_get_stats(os, object, &zs);
254         if (error)
255                 return;
256
257         if (zs.zs_ptrtbl_len == 0) {
258                 ASSERT(zs.zs_num_blocks == 1);
259                 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
260                     (u_longlong_t)zs.zs_blocksize,
261                     (u_longlong_t)zs.zs_num_entries);
262                 return;
263         }
264
265         (void) printf("\tFat ZAP stats:\n");
266
267         (void) printf("\t\tPointer table:\n");
268         (void) printf("\t\t\t%llu elements\n",
269             (u_longlong_t)zs.zs_ptrtbl_len);
270         (void) printf("\t\t\tzt_blk: %llu\n",
271             (u_longlong_t)zs.zs_ptrtbl_zt_blk);
272         (void) printf("\t\t\tzt_numblks: %llu\n",
273             (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
274         (void) printf("\t\t\tzt_shift: %llu\n",
275             (u_longlong_t)zs.zs_ptrtbl_zt_shift);
276         (void) printf("\t\t\tzt_blks_copied: %llu\n",
277             (u_longlong_t)zs.zs_ptrtbl_blks_copied);
278         (void) printf("\t\t\tzt_nextblk: %llu\n",
279             (u_longlong_t)zs.zs_ptrtbl_nextblk);
280
281         (void) printf("\t\tZAP entries: %llu\n",
282             (u_longlong_t)zs.zs_num_entries);
283         (void) printf("\t\tLeaf blocks: %llu\n",
284             (u_longlong_t)zs.zs_num_leafs);
285         (void) printf("\t\tTotal blocks: %llu\n",
286             (u_longlong_t)zs.zs_num_blocks);
287         (void) printf("\t\tzap_block_type: 0x%llx\n",
288             (u_longlong_t)zs.zs_block_type);
289         (void) printf("\t\tzap_magic: 0x%llx\n",
290             (u_longlong_t)zs.zs_magic);
291         (void) printf("\t\tzap_salt: 0x%llx\n",
292             (u_longlong_t)zs.zs_salt);
293
294         (void) printf("\t\tLeafs with 2^n pointers:\n");
295         dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
296
297         (void) printf("\t\tBlocks with n*5 entries:\n");
298         dump_zap_histogram(zs.zs_blocks_with_n5_entries);
299
300         (void) printf("\t\tBlocks n/10 full:\n");
301         dump_zap_histogram(zs.zs_blocks_n_tenths_full);
302
303         (void) printf("\t\tEntries with n chunks:\n");
304         dump_zap_histogram(zs.zs_entries_using_n_chunks);
305
306         (void) printf("\t\tBuckets with n entries:\n");
307         dump_zap_histogram(zs.zs_buckets_with_n_entries);
308 }
309
310 /*ARGSUSED*/
311 static void
312 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
313 {
314 }
315
316 /*ARGSUSED*/
317 void
318 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
319 {
320 }
321
322 /*ARGSUSED*/
323 static void
324 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
325 {
326 }
327
328 /*ARGSUSED*/
329 static void
330 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
331 {
332         zap_cursor_t zc;
333         zap_attribute_t attr;
334         void *prop;
335         int i;
336
337         dump_zap_stats(os, object);
338         (void) printf("\n");
339
340         for (zap_cursor_init(&zc, os, object);
341             zap_cursor_retrieve(&zc, &attr) == 0;
342             zap_cursor_advance(&zc)) {
343                 (void) printf("\t\t%s = ", attr.za_name);
344                 if (attr.za_num_integers == 0) {
345                         (void) printf("\n");
346                         continue;
347                 }
348                 prop = umem_zalloc(attr.za_num_integers *
349                     attr.za_integer_length, UMEM_NOFAIL);
350                 (void) zap_lookup(os, object, attr.za_name,
351                     attr.za_integer_length, attr.za_num_integers, prop);
352                 if (attr.za_integer_length == 1) {
353                         (void) printf("%s", (char *)prop);
354                 } else {
355                         for (i = 0; i < attr.za_num_integers; i++) {
356                                 switch (attr.za_integer_length) {
357                                 case 2:
358                                         (void) printf("%u ",
359                                             ((uint16_t *)prop)[i]);
360                                         break;
361                                 case 4:
362                                         (void) printf("%u ",
363                                             ((uint32_t *)prop)[i]);
364                                         break;
365                                 case 8:
366                                         (void) printf("%lld ",
367                                             (u_longlong_t)((int64_t *)prop)[i]);
368                                         break;
369                                 }
370                         }
371                 }
372                 (void) printf("\n");
373                 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
374         }
375         zap_cursor_fini(&zc);
376 }
377
378 /*ARGSUSED*/
379 static void
380 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
381 {
382         zap_cursor_t zc;
383         zap_attribute_t attr;
384         const char *typenames[] = {
385                 /* 0 */ "not specified",
386                 /* 1 */ "FIFO",
387                 /* 2 */ "Character Device",
388                 /* 3 */ "3 (invalid)",
389                 /* 4 */ "Directory",
390                 /* 5 */ "5 (invalid)",
391                 /* 6 */ "Block Device",
392                 /* 7 */ "7 (invalid)",
393                 /* 8 */ "Regular File",
394                 /* 9 */ "9 (invalid)",
395                 /* 10 */ "Symbolic Link",
396                 /* 11 */ "11 (invalid)",
397                 /* 12 */ "Socket",
398                 /* 13 */ "Door",
399                 /* 14 */ "Event Port",
400                 /* 15 */ "15 (invalid)",
401         };
402
403         dump_zap_stats(os, object);
404         (void) printf("\n");
405
406         for (zap_cursor_init(&zc, os, object);
407             zap_cursor_retrieve(&zc, &attr) == 0;
408             zap_cursor_advance(&zc)) {
409                 (void) printf("\t\t%s = %lld (type: %s)\n",
410                     attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
411                     typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
412         }
413         zap_cursor_fini(&zc);
414 }
415
416 static void
417 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
418 {
419         uint64_t alloc, offset, entry;
420         uint8_t mapshift = sm->sm_shift;
421         uint64_t mapstart = sm->sm_start;
422         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
423                             "INVALID", "INVALID", "INVALID", "INVALID" };
424
425         if (smo->smo_object == 0)
426                 return;
427
428         /*
429          * Print out the freelist entries in both encoded and decoded form.
430          */
431         alloc = 0;
432         for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
433                 VERIFY(0 == dmu_read(os, smo->smo_object, offset,
434                     sizeof (entry), &entry));
435                 if (SM_DEBUG_DECODE(entry)) {
436                         (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
437                             (u_longlong_t)(offset / sizeof (entry)),
438                             ddata[SM_DEBUG_ACTION_DECODE(entry)],
439                             (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
440                             (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
441                 } else {
442                         (void) printf("\t\t[%4llu]    %c  range:"
443                             " %08llx-%08llx  size: %06llx\n",
444                             (u_longlong_t)(offset / sizeof (entry)),
445                             SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
446                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
447                             mapshift) + mapstart),
448                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
449                             mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
450                             mapshift)),
451                             (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
452                         if (SM_TYPE_DECODE(entry) == SM_ALLOC)
453                                 alloc += SM_RUN_DECODE(entry) << mapshift;
454                         else
455                                 alloc -= SM_RUN_DECODE(entry) << mapshift;
456                 }
457         }
458         if (alloc != smo->smo_alloc) {
459                 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
460                     "with space map summary (%llu)\n",
461                     (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
462         }
463 }
464
465 static void
466 dump_metaslab(metaslab_t *msp)
467 {
468         char freebuf[5];
469         space_map_obj_t *smo = &msp->ms_smo;
470         vdev_t *vd = msp->ms_group->mg_vd;
471         spa_t *spa = vd->vdev_spa;
472
473         nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
474
475         if (dump_opt['d'] <= 5) {
476                 (void) printf("\t%10llx   %10llu   %5s\n",
477                     (u_longlong_t)msp->ms_map.sm_start,
478                     (u_longlong_t)smo->smo_object,
479                     freebuf);
480                 return;
481         }
482
483         (void) printf(
484             "\tvdev %llu   offset %08llx   spacemap %4llu   free %5s\n",
485             (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
486             (u_longlong_t)smo->smo_object, freebuf);
487
488         ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
489
490         dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
491 }
492
493 static void
494 dump_metaslabs(spa_t *spa)
495 {
496         vdev_t *rvd = spa->spa_root_vdev;
497         vdev_t *vd;
498         int c, m;
499
500         (void) printf("\nMetaslabs:\n");
501
502         for (c = 0; c < rvd->vdev_children; c++) {
503                 vd = rvd->vdev_child[c];
504
505                 (void) printf("\n    vdev %llu\n\n", (u_longlong_t)vd->vdev_id);
506
507                 if (dump_opt['d'] <= 5) {
508                         (void) printf("\t%10s   %10s   %5s\n",
509                             "offset", "spacemap", "free");
510                         (void) printf("\t%10s   %10s   %5s\n",
511                             "------", "--------", "----");
512                 }
513                 for (m = 0; m < vd->vdev_ms_count; m++)
514                         dump_metaslab(vd->vdev_ms[m]);
515                 (void) printf("\n");
516         }
517 }
518
519 static void
520 dump_dtl(vdev_t *vd, int indent)
521 {
522         avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
523         space_seg_t *ss;
524         vdev_t *pvd;
525         int c;
526
527         if (indent == 0)
528                 (void) printf("\nDirty time logs:\n\n");
529
530         (void) printf("\t%*s%s\n", indent, "",
531             vd->vdev_path ? vd->vdev_path :
532             vd->vdev_parent ? vd->vdev_ops->vdev_op_type :
533             spa_name(vd->vdev_spa));
534
535         for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
536                 /*
537                  * Everything in this DTL must appear in all parent DTL unions.
538                  */
539                 for (pvd = vd; pvd; pvd = pvd->vdev_parent)
540                         ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
541                             ss->ss_start, ss->ss_end - ss->ss_start));
542                 (void) printf("\t%*soutage [%llu,%llu] length %llu\n",
543                     indent, "",
544                     (u_longlong_t)ss->ss_start,
545                     (u_longlong_t)ss->ss_end - 1,
546                     (u_longlong_t)(ss->ss_end - ss->ss_start));
547         }
548
549         (void) printf("\n");
550
551         if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
552                 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
553                     &vd->vdev_dtl_map);
554                 (void) printf("\n");
555         }
556
557         for (c = 0; c < vd->vdev_children; c++)
558                 dump_dtl(vd->vdev_child[c], indent + 4);
559 }
560
561 /*ARGSUSED*/
562 static void
563 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
564 {
565 }
566
567 static uint64_t
568 blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
569 {
570         if (level < 0)
571                 return (blkid);
572
573         return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
574             dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
575 }
576
577 static void
578 sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
579 {
580         dva_t *dva = bp->blk_dva;
581         int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
582         int i;
583
584         blkbuf[0] = '\0';
585
586         for (i = 0; i < ndvas; i++)
587                 (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
588                     (u_longlong_t)DVA_GET_VDEV(&dva[i]),
589                     (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
590                     (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
591
592         (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
593             (u_longlong_t)BP_GET_LSIZE(bp),
594             (u_longlong_t)BP_GET_PSIZE(bp),
595             (u_longlong_t)bp->blk_fill,
596             (u_longlong_t)bp->blk_birth);
597 }
598
599 static void
600 print_indirect(blkptr_t *bp, const zbookmark_t *zb,
601     const dnode_phys_t *dnp)
602 {
603         char blkbuf[BP_SPRINTF_LEN];
604         int l;
605
606         ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
607         ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
608
609         (void) printf("%16llx ",
610             (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
611
612         ASSERT(zb->zb_level >= 0);
613
614         for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
615                 if (l == zb->zb_level) {
616                         (void) printf("L%llx", (u_longlong_t)zb->zb_level);
617                 } else {
618                         (void) printf(" ");
619                 }
620         }
621
622         sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
623         (void) printf("%s\n", blkbuf);
624 }
625
626 #define SET_BOOKMARK(zb, objset, object, level, blkid)  \
627 {                                                       \
628         (zb)->zb_objset = objset;                       \
629         (zb)->zb_object = object;                       \
630         (zb)->zb_level = level;                         \
631         (zb)->zb_blkid = blkid;                         \
632 }
633
634 static int
635 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
636     blkptr_t *bp, const zbookmark_t *zb)
637 {
638         int err;
639
640         if (bp->blk_birth == 0)
641                 return (0);
642
643         print_indirect(bp, zb, dnp);
644
645         if (BP_GET_LEVEL(bp) > 0) {
646                 uint32_t flags = ARC_WAIT;
647                 int i;
648                 blkptr_t *cbp;
649                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
650                 arc_buf_t *buf;
651                 uint64_t fill = 0;
652
653                 err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
654                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
655                 if (err)
656                         return (err);
657
658                 /* recursively visit blocks below this */
659                 cbp = buf->b_data;
660                 for (i = 0; i < epb; i++, cbp++) {
661                         zbookmark_t czb;
662
663                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
664                             zb->zb_level - 1,
665                             zb->zb_blkid * epb + i);
666                         err = visit_indirect(spa, dnp, cbp, &czb);
667                         if (err)
668                                 break;
669                         fill += cbp->blk_fill;
670                 }
671                 ASSERT3U(fill, ==, bp->blk_fill);
672                 (void) arc_buf_remove_ref(buf, &buf);
673         }
674
675         return (err);
676 }
677
678 /*ARGSUSED*/
679 static void
680 dump_indirect(dnode_t *dn)
681 {
682         dnode_phys_t *dnp = dn->dn_phys;
683         int j;
684         zbookmark_t czb;
685
686         (void) printf("Indirect blocks:\n");
687
688         SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
689             dn->dn_object, dnp->dn_nlevels - 1, 0);
690         for (j = 0; j < dnp->dn_nblkptr; j++) {
691                 czb.zb_blkid = j;
692                 (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
693                     &dnp->dn_blkptr[j], &czb);
694         }
695
696         (void) printf("\n");
697 }
698
699 /*ARGSUSED*/
700 static void
701 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
702 {
703         dsl_dir_phys_t *dd = data;
704         time_t crtime;
705         char nice[6];
706
707         if (dd == NULL)
708                 return;
709
710         ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
711
712         crtime = dd->dd_creation_time;
713         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
714         (void) printf("\t\thead_dataset_obj = %llu\n",
715             (u_longlong_t)dd->dd_head_dataset_obj);
716         (void) printf("\t\tparent_dir_obj = %llu\n",
717             (u_longlong_t)dd->dd_parent_obj);
718         (void) printf("\t\torigin_obj = %llu\n",
719             (u_longlong_t)dd->dd_origin_obj);
720         (void) printf("\t\tchild_dir_zapobj = %llu\n",
721             (u_longlong_t)dd->dd_child_dir_zapobj);
722         nicenum(dd->dd_used_bytes, nice);
723         (void) printf("\t\tused_bytes = %s\n", nice);
724         nicenum(dd->dd_compressed_bytes, nice);
725         (void) printf("\t\tcompressed_bytes = %s\n", nice);
726         nicenum(dd->dd_uncompressed_bytes, nice);
727         (void) printf("\t\tuncompressed_bytes = %s\n", nice);
728         nicenum(dd->dd_quota, nice);
729         (void) printf("\t\tquota = %s\n", nice);
730         nicenum(dd->dd_reserved, nice);
731         (void) printf("\t\treserved = %s\n", nice);
732         (void) printf("\t\tprops_zapobj = %llu\n",
733             (u_longlong_t)dd->dd_props_zapobj);
734         (void) printf("\t\tdeleg_zapobj = %llu\n",
735             (u_longlong_t)dd->dd_deleg_zapobj);
736         (void) printf("\t\tflags = %llx\n",
737             (u_longlong_t)dd->dd_flags);
738
739 #define DO(which) \
740         nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
741         (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
742         DO(HEAD);
743         DO(SNAP);
744         DO(CHILD);
745         DO(CHILD_RSRV);
746         DO(REFRSRV);
747 #undef DO
748 }
749
750 /*ARGSUSED*/
751 static void
752 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
753 {
754         dsl_dataset_phys_t *ds = data;
755         time_t crtime;
756         char used[6], compressed[6], uncompressed[6], unique[6];
757         char blkbuf[BP_SPRINTF_LEN];
758
759         if (ds == NULL)
760                 return;
761
762         ASSERT(size == sizeof (*ds));
763         crtime = ds->ds_creation_time;
764         nicenum(ds->ds_used_bytes, used);
765         nicenum(ds->ds_compressed_bytes, compressed);
766         nicenum(ds->ds_uncompressed_bytes, uncompressed);
767         nicenum(ds->ds_unique_bytes, unique);
768         sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
769
770         (void) printf("\t\tdir_obj = %llu\n",
771             (u_longlong_t)ds->ds_dir_obj);
772         (void) printf("\t\tprev_snap_obj = %llu\n",
773             (u_longlong_t)ds->ds_prev_snap_obj);
774         (void) printf("\t\tprev_snap_txg = %llu\n",
775             (u_longlong_t)ds->ds_prev_snap_txg);
776         (void) printf("\t\tnext_snap_obj = %llu\n",
777             (u_longlong_t)ds->ds_next_snap_obj);
778         (void) printf("\t\tsnapnames_zapobj = %llu\n",
779             (u_longlong_t)ds->ds_snapnames_zapobj);
780         (void) printf("\t\tnum_children = %llu\n",
781             (u_longlong_t)ds->ds_num_children);
782         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
783         (void) printf("\t\tcreation_txg = %llu\n",
784             (u_longlong_t)ds->ds_creation_txg);
785         (void) printf("\t\tdeadlist_obj = %llu\n",
786             (u_longlong_t)ds->ds_deadlist_obj);
787         (void) printf("\t\tused_bytes = %s\n", used);
788         (void) printf("\t\tcompressed_bytes = %s\n", compressed);
789         (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
790         (void) printf("\t\tunique = %s\n", unique);
791         (void) printf("\t\tfsid_guid = %llu\n",
792             (u_longlong_t)ds->ds_fsid_guid);
793         (void) printf("\t\tguid = %llu\n",
794             (u_longlong_t)ds->ds_guid);
795         (void) printf("\t\tflags = %llx\n",
796             (u_longlong_t)ds->ds_flags);
797         (void) printf("\t\tnext_clones_obj = %llu\n",
798             (u_longlong_t)ds->ds_next_clones_obj);
799         (void) printf("\t\tprops_obj = %llu\n",
800             (u_longlong_t)ds->ds_props_obj);
801         (void) printf("\t\tbp = %s\n", blkbuf);
802 }
803
804 static void
805 dump_bplist(objset_t *mos, uint64_t object, char *name)
806 {
807         bplist_t bpl = { 0 };
808         blkptr_t blk, *bp = &blk;
809         uint64_t itor = 0;
810         char bytes[6];
811         char comp[6];
812         char uncomp[6];
813
814         if (dump_opt['d'] < 3)
815                 return;
816
817         mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
818         VERIFY(0 == bplist_open(&bpl, mos, object));
819         if (bplist_empty(&bpl)) {
820                 bplist_close(&bpl);
821                 mutex_destroy(&bpl.bpl_lock);
822                 return;
823         }
824
825         nicenum(bpl.bpl_phys->bpl_bytes, bytes);
826         if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
827                 nicenum(bpl.bpl_phys->bpl_comp, comp);
828                 nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
829                 (void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
830                     name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
831                     bytes, comp, uncomp);
832         } else {
833                 (void) printf("\n    %s: %llu entries, %s\n",
834                     name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
835         }
836
837         if (dump_opt['d'] < 5) {
838                 bplist_close(&bpl);
839                 mutex_destroy(&bpl.bpl_lock);
840                 return;
841         }
842
843         (void) printf("\n");
844
845         while (bplist_iterate(&bpl, &itor, bp) == 0) {
846                 char blkbuf[BP_SPRINTF_LEN];
847
848                 ASSERT(bp->blk_birth != 0);
849                 sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
850                 (void) printf("\tItem %3llu: %s\n",
851                     (u_longlong_t)itor - 1, blkbuf);
852         }
853
854         bplist_close(&bpl);
855         mutex_destroy(&bpl.bpl_lock);
856 }
857
858 static avl_tree_t idx_tree;
859 static avl_tree_t domain_tree;
860 static boolean_t fuid_table_loaded;
861
862 static void
863 fuid_table_destroy()
864 {
865         if (fuid_table_loaded) {
866                 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
867                 fuid_table_loaded = B_FALSE;
868         }
869 }
870
871 /*
872  * print uid or gid information.
873  * For normal POSIX id just the id is printed in decimal format.
874  * For CIFS files with FUID the fuid is printed in hex followed by
875  * the doman-rid string.
876  */
877 static void
878 print_idstr(uint64_t id, const char *id_type)
879 {
880         if (FUID_INDEX(id)) {
881                 char *domain;
882
883                 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
884                 (void) printf("\t%s     %llx [%s-%d]\n", id_type,
885                     (u_longlong_t)id, domain, (int)FUID_RID(id));
886         } else {
887                 (void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
888         }
889
890 }
891
892 static void
893 dump_uidgid(objset_t *os, znode_phys_t *zp)
894 {
895         uint32_t uid_idx, gid_idx;
896
897         uid_idx = FUID_INDEX(zp->zp_uid);
898         gid_idx = FUID_INDEX(zp->zp_gid);
899
900         /* Load domain table, if not already loaded */
901         if (!fuid_table_loaded && (uid_idx || gid_idx)) {
902                 uint64_t fuid_obj;
903
904                 /* first find the fuid object.  It lives in the master node */
905                 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
906                     8, 1, &fuid_obj) == 0);
907                 (void) zfs_fuid_table_load(os, fuid_obj,
908                     &idx_tree, &domain_tree);
909                 fuid_table_loaded = B_TRUE;
910         }
911
912         print_idstr(zp->zp_uid, "uid");
913         print_idstr(zp->zp_gid, "gid");
914 }
915
916 /*ARGSUSED*/
917 static void
918 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
919 {
920         znode_phys_t *zp = data;
921         time_t z_crtime, z_atime, z_mtime, z_ctime;
922         char path[MAXPATHLEN * 2];      /* allow for xattr and failure prefix */
923         int error;
924
925         ASSERT(size >= sizeof (znode_phys_t));
926
927         error = zfs_obj_to_path(os, object, path, sizeof (path));
928         if (error != 0) {
929                 (void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
930                     (u_longlong_t)object);
931         }
932
933         if (dump_opt['d'] < 3) {
934                 (void) printf("\t%s\n", path);
935                 return;
936         }
937
938         z_crtime = (time_t)zp->zp_crtime[0];
939         z_atime = (time_t)zp->zp_atime[0];
940         z_mtime = (time_t)zp->zp_mtime[0];
941         z_ctime = (time_t)zp->zp_ctime[0];
942
943         (void) printf("\tpath   %s\n", path);
944         dump_uidgid(os, zp);
945         (void) printf("\tatime  %s", ctime(&z_atime));
946         (void) printf("\tmtime  %s", ctime(&z_mtime));
947         (void) printf("\tctime  %s", ctime(&z_ctime));
948         (void) printf("\tcrtime %s", ctime(&z_crtime));
949         (void) printf("\tgen    %llu\n", (u_longlong_t)zp->zp_gen);
950         (void) printf("\tmode   %llo\n", (u_longlong_t)zp->zp_mode);
951         (void) printf("\tsize   %llu\n", (u_longlong_t)zp->zp_size);
952         (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
953         (void) printf("\tlinks  %llu\n", (u_longlong_t)zp->zp_links);
954         (void) printf("\txattr  %llu\n", (u_longlong_t)zp->zp_xattr);
955         (void) printf("\trdev   0x%016llx\n", (u_longlong_t)zp->zp_rdev);
956 }
957
958 /*ARGSUSED*/
959 static void
960 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
961 {
962 }
963
964 /*ARGSUSED*/
965 static void
966 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
967 {
968 }
969
970 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
971         dump_none,              /* unallocated                  */
972         dump_zap,               /* object directory             */
973         dump_uint64,            /* object array                 */
974         dump_none,              /* packed nvlist                */
975         dump_packed_nvlist,     /* packed nvlist size           */
976         dump_none,              /* bplist                       */
977         dump_none,              /* bplist header                */
978         dump_none,              /* SPA space map header         */
979         dump_none,              /* SPA space map                */
980         dump_none,              /* ZIL intent log               */
981         dump_dnode,             /* DMU dnode                    */
982         dump_dmu_objset,        /* DMU objset                   */
983         dump_dsl_dir,           /* DSL directory                */
984         dump_zap,               /* DSL directory child map      */
985         dump_zap,               /* DSL dataset snap map         */
986         dump_zap,               /* DSL props                    */
987         dump_dsl_dataset,       /* DSL dataset                  */
988         dump_znode,             /* ZFS znode                    */
989         dump_acl,               /* ZFS V0 ACL                   */
990         dump_uint8,             /* ZFS plain file               */
991         dump_zpldir,            /* ZFS directory                */
992         dump_zap,               /* ZFS master node              */
993         dump_zap,               /* ZFS delete queue             */
994         dump_uint8,             /* zvol object                  */
995         dump_zap,               /* zvol prop                    */
996         dump_uint8,             /* other uint8[]                */
997         dump_uint64,            /* other uint64[]               */
998         dump_zap,               /* other ZAP                    */
999         dump_zap,               /* persistent error log         */
1000         dump_uint8,             /* SPA history                  */
1001         dump_uint64,            /* SPA history offsets          */
1002         dump_zap,               /* Pool properties              */
1003         dump_zap,               /* DSL permissions              */
1004         dump_acl,               /* ZFS ACL                      */
1005         dump_uint8,             /* ZFS SYSACL                   */
1006         dump_none,              /* FUID nvlist                  */
1007         dump_packed_nvlist,     /* FUID nvlist size             */
1008         dump_zap,               /* DSL dataset next clones      */
1009         dump_zap,               /* DSL scrub queue              */
1010 };
1011
1012 static void
1013 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1014 {
1015         dmu_buf_t *db = NULL;
1016         dmu_object_info_t doi;
1017         dnode_t *dn;
1018         void *bonus = NULL;
1019         size_t bsize = 0;
1020         char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
1021         char aux[50];
1022         int error;
1023
1024         if (*print_header) {
1025                 (void) printf("\n    Object  lvl   iblk   dblk  lsize"
1026                     "  asize  type\n");
1027                 *print_header = 0;
1028         }
1029
1030         if (object == 0) {
1031                 dn = os->os->os_meta_dnode;
1032         } else {
1033                 error = dmu_bonus_hold(os, object, FTAG, &db);
1034                 if (error)
1035                         fatal("dmu_bonus_hold(%llu) failed, errno %u",
1036                             object, error);
1037                 bonus = db->db_data;
1038                 bsize = db->db_size;
1039                 dn = ((dmu_buf_impl_t *)db)->db_dnode;
1040         }
1041         dmu_object_info_from_dnode(dn, &doi);
1042
1043         nicenum(doi.doi_metadata_block_size, iblk);
1044         nicenum(doi.doi_data_block_size, dblk);
1045         nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
1046             lsize);
1047         nicenum(doi.doi_physical_blks << 9, asize);
1048         nicenum(doi.doi_bonus_size, bonus_size);
1049
1050         aux[0] = '\0';
1051
1052         if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1053                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1054                     zio_checksum_table[doi.doi_checksum].ci_name);
1055         }
1056
1057         if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1058                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1059                     zio_compress_table[doi.doi_compress].ci_name);
1060         }
1061
1062         (void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %s%s\n",
1063             (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
1064             asize, dmu_ot[doi.doi_type].ot_name, aux);
1065
1066         if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1067                 (void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %s\n",
1068                     "", "", "", "", bonus_size, "bonus",
1069                     dmu_ot[doi.doi_bonus_type].ot_name);
1070         }
1071
1072         if (verbosity >= 4) {
1073                 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
1074                 object_viewer[doi.doi_type](os, object, NULL, 0);
1075                 *print_header = 1;
1076         }
1077
1078         if (verbosity >= 5)
1079                 dump_indirect(dn);
1080
1081         if (verbosity >= 5) {
1082                 /*
1083                  * Report the list of segments that comprise the object.
1084                  */
1085                 uint64_t start = 0;
1086                 uint64_t end;
1087                 uint64_t blkfill = 1;
1088                 int minlvl = 1;
1089
1090                 if (dn->dn_type == DMU_OT_DNODE) {
1091                         minlvl = 0;
1092                         blkfill = DNODES_PER_BLOCK;
1093                 }
1094
1095                 for (;;) {
1096                         error = dnode_next_offset(dn,
1097                             0, &start, minlvl, blkfill, 0);
1098                         if (error)
1099                                 break;
1100                         end = start;
1101                         error = dnode_next_offset(dn,
1102                             DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1103                         nicenum(end - start, segsize);
1104                         (void) printf("\t\tsegment [%016llx, %016llx)"
1105                             " size %5s\n", (u_longlong_t)start,
1106                             (u_longlong_t)end, segsize);
1107                         if (error)
1108                                 break;
1109                         start = end;
1110                 }
1111         }
1112
1113         if (db != NULL)
1114                 dmu_buf_rele(db, FTAG);
1115 }
1116
1117 static char *objset_types[DMU_OST_NUMTYPES] = {
1118         "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1119
1120 static void
1121 dump_dir(objset_t *os)
1122 {
1123         dmu_objset_stats_t dds;
1124         uint64_t object, object_count;
1125         uint64_t refdbytes, usedobjs, scratch;
1126         char numbuf[8];
1127         char blkbuf[BP_SPRINTF_LEN];
1128         char osname[MAXNAMELEN];
1129         char *type = "UNKNOWN";
1130         int verbosity = dump_opt['d'];
1131         int print_header = 1;
1132         int i, error;
1133
1134         dmu_objset_fast_stat(os, &dds);
1135
1136         if (dds.dds_type < DMU_OST_NUMTYPES)
1137                 type = objset_types[dds.dds_type];
1138
1139         if (dds.dds_type == DMU_OST_META) {
1140                 dds.dds_creation_txg = TXG_INITIAL;
1141                 usedobjs = os->os->os_rootbp->blk_fill;
1142                 refdbytes = os->os->os_spa->spa_dsl_pool->
1143                     dp_mos_dir->dd_phys->dd_used_bytes;
1144         } else {
1145                 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1146         }
1147
1148         ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
1149
1150         nicenum(refdbytes, numbuf);
1151
1152         if (verbosity >= 4) {
1153                 (void) strcpy(blkbuf, ", rootbp ");
1154                 sprintf_blkptr(blkbuf + strlen(blkbuf),
1155                     BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
1156         } else {
1157                 blkbuf[0] = '\0';
1158         }
1159
1160         dmu_objset_name(os, osname);
1161
1162         (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1163             "%s, %llu objects%s\n",
1164             osname, type, (u_longlong_t)dmu_objset_id(os),
1165             (u_longlong_t)dds.dds_creation_txg,
1166             numbuf, (u_longlong_t)usedobjs, blkbuf);
1167
1168         dump_intent_log(dmu_objset_zil(os));
1169
1170         if (dmu_objset_ds(os) != NULL)
1171                 dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1172                     dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1173
1174         if (verbosity < 2)
1175                 return;
1176
1177         if (os->os->os_rootbp->blk_birth == 0)
1178                 return;
1179
1180         if (zopt_objects != 0) {
1181                 for (i = 0; i < zopt_objects; i++)
1182                         dump_object(os, zopt_object[i], verbosity,
1183                             &print_header);
1184                 (void) printf("\n");
1185                 return;
1186         }
1187
1188         dump_object(os, 0, verbosity, &print_header);
1189         object_count = 1;
1190
1191         object = 0;
1192         while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1193                 dump_object(os, object, verbosity, &print_header);
1194                 object_count++;
1195         }
1196
1197         ASSERT3U(object_count, ==, usedobjs);
1198
1199         (void) printf("\n");
1200
1201         if (error != ESRCH)
1202                 fatal("dmu_object_next() = %d", error);
1203 }
1204
1205 static void
1206 dump_uberblock(uberblock_t *ub)
1207 {
1208         time_t timestamp = ub->ub_timestamp;
1209
1210         (void) printf("Uberblock\n\n");
1211         (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1212         (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1213         (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1214         (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1215         (void) printf("\ttimestamp = %llu UTC = %s",
1216             (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1217         if (dump_opt['u'] >= 3) {
1218                 char blkbuf[BP_SPRINTF_LEN];
1219                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
1220                 (void) printf("\trootbp = %s\n", blkbuf);
1221         }
1222         (void) printf("\n");
1223 }
1224
1225 static void
1226 dump_config(const char *pool)
1227 {
1228         spa_t *spa = NULL;
1229
1230         mutex_enter(&spa_namespace_lock);
1231         while ((spa = spa_next(spa)) != NULL) {
1232                 if (pool == NULL)
1233                         (void) printf("%s\n", spa_name(spa));
1234                 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
1235                         dump_nvlist(spa->spa_config, 4);
1236         }
1237         mutex_exit(&spa_namespace_lock);
1238 }
1239
1240 static void
1241 dump_cachefile(const char *cachefile)
1242 {
1243         int fd;
1244         struct stat64 statbuf;
1245         char *buf;
1246         nvlist_t *config;
1247
1248         if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1249                 (void) printf("cannot open '%s': %s\n", cachefile,
1250                     strerror(errno));
1251                 exit(1);
1252         }
1253
1254         if (fstat64(fd, &statbuf) != 0) {
1255                 (void) printf("failed to stat '%s': %s\n", cachefile,
1256                     strerror(errno));
1257                 exit(1);
1258         }
1259
1260         if ((buf = malloc(statbuf.st_size)) == NULL) {
1261                 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
1262                     (u_longlong_t)statbuf.st_size);
1263                 exit(1);
1264         }
1265
1266         if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1267                 (void) fprintf(stderr, "failed to read %llu bytes\n",
1268                     (u_longlong_t)statbuf.st_size);
1269                 exit(1);
1270         }
1271
1272         (void) close(fd);
1273
1274         if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1275                 (void) fprintf(stderr, "failed to unpack nvlist\n");
1276                 exit(1);
1277         }
1278
1279         free(buf);
1280
1281         dump_nvlist(config, 0);
1282
1283         nvlist_free(config);
1284 }
1285
1286 static void
1287 dump_label(const char *dev)
1288 {
1289         int fd;
1290         vdev_label_t label;
1291         char *buf = label.vl_vdev_phys.vp_nvlist;
1292         size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1293         struct stat64 statbuf;
1294         uint64_t psize;
1295         int l;
1296
1297         if ((fd = open64(dev, O_RDONLY)) < 0) {
1298                 (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1299                 exit(1);
1300         }
1301
1302         if (fstat64(fd, &statbuf) != 0) {
1303                 (void) printf("failed to stat '%s': %s\n", dev,
1304                     strerror(errno));
1305                 exit(1);
1306         }
1307
1308         psize = statbuf.st_size;
1309         psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1310
1311         for (l = 0; l < VDEV_LABELS; l++) {
1312
1313                 nvlist_t *config = NULL;
1314
1315                 (void) printf("--------------------------------------------\n");
1316                 (void) printf("LABEL %d\n", l);
1317                 (void) printf("--------------------------------------------\n");
1318
1319                 if (pread64(fd, &label, sizeof (label),
1320                     vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1321                         (void) printf("failed to read label %d\n", l);
1322                         continue;
1323                 }
1324
1325                 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1326                         (void) printf("failed to unpack label %d\n", l);
1327                         continue;
1328                 }
1329                 dump_nvlist(config, 4);
1330                 nvlist_free(config);
1331         }
1332 }
1333
1334 /*ARGSUSED*/
1335 static int
1336 dump_one_dir(char *dsname, void *arg)
1337 {
1338         int error;
1339         objset_t *os;
1340
1341         error = dmu_objset_open(dsname, DMU_OST_ANY,
1342             DS_MODE_USER | DS_MODE_READONLY, &os);
1343         if (error) {
1344                 (void) printf("Could not open %s\n", dsname);
1345                 return (0);
1346         }
1347         dump_dir(os);
1348         dmu_objset_close(os);
1349         fuid_table_destroy();
1350         return (0);
1351 }
1352
1353 static void
1354 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1355 {
1356         vdev_t *vd = sm->sm_ppd;
1357
1358         (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1359             (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1360 }
1361
1362 /* ARGSUSED */
1363 static void
1364 zdb_space_map_load(space_map_t *sm)
1365 {
1366 }
1367
1368 static void
1369 zdb_space_map_unload(space_map_t *sm)
1370 {
1371         space_map_vacate(sm, zdb_leak, sm);
1372 }
1373
1374 /* ARGSUSED */
1375 static void
1376 zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1377 {
1378 }
1379
1380 static space_map_ops_t zdb_space_map_ops = {
1381         zdb_space_map_load,
1382         zdb_space_map_unload,
1383         NULL,   /* alloc */
1384         zdb_space_map_claim,
1385         NULL    /* free */
1386 };
1387
1388 static void
1389 zdb_leak_init(spa_t *spa)
1390 {
1391         vdev_t *rvd = spa->spa_root_vdev;
1392
1393         for (int c = 0; c < rvd->vdev_children; c++) {
1394                 vdev_t *vd = rvd->vdev_child[c];
1395                 for (int m = 0; m < vd->vdev_ms_count; m++) {
1396                         metaslab_t *msp = vd->vdev_ms[m];
1397                         mutex_enter(&msp->ms_lock);
1398                         VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
1399                             SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
1400                         msp->ms_map.sm_ppd = vd;
1401                         mutex_exit(&msp->ms_lock);
1402                 }
1403         }
1404 }
1405
1406 static void
1407 zdb_leak_fini(spa_t *spa)
1408 {
1409         vdev_t *rvd = spa->spa_root_vdev;
1410
1411         for (int c = 0; c < rvd->vdev_children; c++) {
1412                 vdev_t *vd = rvd->vdev_child[c];
1413                 for (int m = 0; m < vd->vdev_ms_count; m++) {
1414                         metaslab_t *msp = vd->vdev_ms[m];
1415                         mutex_enter(&msp->ms_lock);
1416                         space_map_unload(&msp->ms_map);
1417                         mutex_exit(&msp->ms_lock);
1418                 }
1419         }
1420 }
1421
1422 /*
1423  * Verify that the sum of the sizes of all blocks in the pool adds up
1424  * to the SPA's sa_alloc total.
1425  */
1426 typedef struct zdb_blkstats {
1427         uint64_t        zb_asize;
1428         uint64_t        zb_lsize;
1429         uint64_t        zb_psize;
1430         uint64_t        zb_count;
1431 } zdb_blkstats_t;
1432
1433 #define DMU_OT_DEFERRED DMU_OT_NONE
1434 #define DMU_OT_TOTAL    DMU_OT_NUMTYPES
1435
1436 #define ZB_TOTAL        DN_MAX_LEVELS
1437
1438 typedef struct zdb_cb {
1439         zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
1440         uint64_t        zcb_errors[256];
1441         int             zcb_readfails;
1442         int             zcb_haderrors;
1443 } zdb_cb_t;
1444
1445 static void
1446 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
1447 {
1448         for (int i = 0; i < 4; i++) {
1449                 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1450                 int t = (i & 1) ? type : DMU_OT_TOTAL;
1451                 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1452
1453                 zb->zb_asize += BP_GET_ASIZE(bp);
1454                 zb->zb_lsize += BP_GET_LSIZE(bp);
1455                 zb->zb_psize += BP_GET_PSIZE(bp);
1456                 zb->zb_count++;
1457         }
1458
1459         if (dump_opt['S']) {
1460                 boolean_t print_sig;
1461
1462                 print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
1463                     BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
1464
1465                 if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
1466                         print_sig = B_FALSE;
1467
1468                 if (print_sig) {
1469                         (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
1470                             "%llx:%llx:%llx:%llx\n",
1471                             (u_longlong_t)BP_GET_LEVEL(bp),
1472                             (longlong_t)BP_GET_PSIZE(bp),
1473                             (longlong_t)BP_GET_NDVAS(bp),
1474                             dmu_ot[BP_GET_TYPE(bp)].ot_name,
1475                             zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1476                             zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
1477                             (u_longlong_t)bp->blk_cksum.zc_word[0],
1478                             (u_longlong_t)bp->blk_cksum.zc_word[1],
1479                             (u_longlong_t)bp->blk_cksum.zc_word[2],
1480                             (u_longlong_t)bp->blk_cksum.zc_word[3]);
1481                 }
1482         }
1483
1484         VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
1485             NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
1486 }
1487
1488 static int
1489 zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
1490     const dnode_phys_t *dnp, void *arg)
1491 {
1492         zdb_cb_t *zcb = arg;
1493         char blkbuf[BP_SPRINTF_LEN];
1494
1495         if (bp == NULL)
1496                 return (0);
1497
1498         zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
1499
1500         if (dump_opt['c'] || dump_opt['S']) {
1501                 int ioerr, size;
1502                 void *data;
1503
1504                 size = BP_GET_LSIZE(bp);
1505                 data = malloc(size);
1506                 ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1507                     NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
1508                     ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
1509                 free(data);
1510
1511                 /* We expect io errors on intent log */
1512                 if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
1513                         zcb->zcb_haderrors = 1;
1514                         zcb->zcb_errors[ioerr]++;
1515
1516                         if (dump_opt['b'] >= 2)
1517                                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1518                         else
1519                                 blkbuf[0] = '\0';
1520
1521                         if (!dump_opt['S']) {
1522                                 (void) printf("zdb_blkptr_cb: "
1523                                     "Got error %d reading "
1524                                     "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1525                                     ioerr,
1526                                     (u_longlong_t)zb->zb_objset,
1527                                     (u_longlong_t)zb->zb_object,
1528                                     (u_longlong_t)zb->zb_level,
1529                                     (u_longlong_t)zb->zb_blkid,
1530                                     blkbuf);
1531                         }
1532                 }
1533         }
1534
1535         zcb->zcb_readfails = 0;
1536
1537         if (dump_opt['b'] >= 4) {
1538                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1539                 (void) printf("objset %llu object %llu offset 0x%llx %s\n",
1540                     (u_longlong_t)zb->zb_objset,
1541                     (u_longlong_t)zb->zb_object,
1542                     (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
1543                     blkbuf);
1544         }
1545
1546         return (0);
1547 }
1548
1549 static int
1550 dump_block_stats(spa_t *spa)
1551 {
1552         zdb_cb_t zcb = { 0 };
1553         zdb_blkstats_t *zb, *tzb;
1554         uint64_t alloc, space, logalloc;
1555         vdev_t *rvd = spa->spa_root_vdev;
1556         int leaks = 0;
1557         int c, e;
1558
1559         if (!dump_opt['S']) {
1560                 (void) printf("\nTraversing all blocks to %sverify"
1561                     " nothing leaked ...\n",
1562                     dump_opt['c'] ? "verify checksums and " : "");
1563         }
1564
1565         /*
1566          * Load all space maps as SM_ALLOC maps, then traverse the pool
1567          * claiming each block we discover.  If the pool is perfectly
1568          * consistent, the space maps will be empty when we're done.
1569          * Anything left over is a leak; any block we can't claim (because
1570          * it's not part of any space map) is a double allocation,
1571          * reference to a freed block, or an unclaimed log block.
1572          */
1573         zdb_leak_init(spa);
1574
1575         /*
1576          * If there's a deferred-free bplist, process that first.
1577          */
1578         if (spa->spa_sync_bplist_obj != 0) {
1579                 bplist_t *bpl = &spa->spa_sync_bplist;
1580                 blkptr_t blk;
1581                 uint64_t itor = 0;
1582
1583                 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1584                     spa->spa_sync_bplist_obj));
1585
1586                 while (bplist_iterate(bpl, &itor, &blk) == 0) {
1587                         if (dump_opt['b'] >= 4) {
1588                                 char blkbuf[BP_SPRINTF_LEN];
1589                                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
1590                                 (void) printf("[%s] %s\n",
1591                                     "deferred free", blkbuf);
1592                         }
1593                         zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
1594                 }
1595
1596                 bplist_close(bpl);
1597         }
1598
1599         zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
1600
1601         if (zcb.zcb_haderrors && !dump_opt['S']) {
1602                 (void) printf("\nError counts:\n\n");
1603                 (void) printf("\t%5s  %s\n", "errno", "count");
1604                 for (e = 0; e < 256; e++) {
1605                         if (zcb.zcb_errors[e] != 0) {
1606                                 (void) printf("\t%5d  %llu\n",
1607                                     e, (u_longlong_t)zcb.zcb_errors[e]);
1608                         }
1609                 }
1610         }
1611
1612         /*
1613          * Report any leaked segments.
1614          */
1615         zdb_leak_fini(spa);
1616
1617         /*
1618          * If we're interested in printing out the blkptr signatures,
1619          * return now as we don't print out anything else (including
1620          * errors and leaks).
1621          */
1622         if (dump_opt['S'])
1623                 return (zcb.zcb_haderrors ? 3 : 0);
1624
1625         alloc = spa_get_alloc(spa);
1626         space = spa_get_space(spa);
1627
1628         /*
1629          * Log blocks allocated from a separate log device don't count
1630          * as part of the normal pool space; factor them in here.
1631          */
1632         logalloc = 0;
1633
1634         for (c = 0; c < rvd->vdev_children; c++)
1635                 if (rvd->vdev_child[c]->vdev_islog)
1636                         logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
1637
1638         tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
1639
1640         if (tzb->zb_asize == alloc + logalloc) {
1641                 (void) printf("\n\tNo leaks (block sum matches space"
1642                     " maps exactly)\n");
1643         } else {
1644                 (void) printf("block traversal size %llu != alloc %llu "
1645                     "(leaked %lld)\n",
1646                     (u_longlong_t)tzb->zb_asize,
1647                     (u_longlong_t)alloc + logalloc,
1648                     (u_longlong_t)(alloc + logalloc - tzb->zb_asize));
1649                 leaks = 1;
1650         }
1651
1652         if (tzb->zb_count == 0)
1653                 return (2);
1654
1655         (void) printf("\n");
1656         (void) printf("\tbp count:      %10llu\n",
1657             (u_longlong_t)tzb->zb_count);
1658         (void) printf("\tbp logical:    %10llu\t avg: %6llu\n",
1659             (u_longlong_t)tzb->zb_lsize,
1660             (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
1661         (void) printf("\tbp physical:   %10llu\t avg:"
1662             " %6llu\tcompression: %6.2f\n",
1663             (u_longlong_t)tzb->zb_psize,
1664             (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
1665             (double)tzb->zb_lsize / tzb->zb_psize);
1666         (void) printf("\tbp allocated:  %10llu\t avg:"
1667             " %6llu\tcompression: %6.2f\n",
1668             (u_longlong_t)tzb->zb_asize,
1669             (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
1670             (double)tzb->zb_lsize / tzb->zb_asize);
1671         (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
1672             (u_longlong_t)alloc, 100.0 * alloc / space);
1673
1674         if (dump_opt['b'] >= 2) {
1675                 int l, t, level;
1676                 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
1677                     "\t  avg\t comp\t%%Total\tType\n");
1678
1679                 for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
1680                         char csize[6], lsize[6], psize[6], asize[6], avg[6];
1681                         char *typename;
1682
1683                         typename = t == DMU_OT_DEFERRED ? "deferred free" :
1684                             t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
1685
1686                         if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
1687                                 (void) printf("%6s\t%5s\t%5s\t%5s"
1688                                     "\t%5s\t%5s\t%6s\t%s\n",
1689                                     "-",
1690                                     "-",
1691                                     "-",
1692                                     "-",
1693                                     "-",
1694                                     "-",
1695                                     "-",
1696                                     typename);
1697                                 continue;
1698                         }
1699
1700                         for (l = ZB_TOTAL - 1; l >= -1; l--) {
1701                                 level = (l == -1 ? ZB_TOTAL : l);
1702                                 zb = &zcb.zcb_type[level][t];
1703
1704                                 if (zb->zb_asize == 0)
1705                                         continue;
1706
1707                                 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
1708                                         continue;
1709
1710                                 if (level == 0 && zb->zb_asize ==
1711                                     zcb.zcb_type[ZB_TOTAL][t].zb_asize)
1712                                         continue;
1713
1714                                 nicenum(zb->zb_count, csize);
1715                                 nicenum(zb->zb_lsize, lsize);
1716                                 nicenum(zb->zb_psize, psize);
1717                                 nicenum(zb->zb_asize, asize);
1718                                 nicenum(zb->zb_asize / zb->zb_count, avg);
1719
1720                                 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
1721                                     "\t%5.2f\t%6.2f\t",
1722                                     csize, lsize, psize, asize, avg,
1723                                     (double)zb->zb_lsize / zb->zb_psize,
1724                                     100.0 * zb->zb_asize / tzb->zb_asize);
1725
1726                                 if (level == ZB_TOTAL)
1727                                         (void) printf("%s\n", typename);
1728                                 else
1729                                         (void) printf("    L%d %s\n",
1730                                             level, typename);
1731                         }
1732                 }
1733         }
1734
1735         (void) printf("\n");
1736
1737         if (leaks)
1738                 return (2);
1739
1740         if (zcb.zcb_haderrors)
1741                 return (3);
1742
1743         return (0);
1744 }
1745
1746 static void
1747 dump_zpool(spa_t *spa)
1748 {
1749         dsl_pool_t *dp = spa_get_dsl(spa);
1750         int rc = 0;
1751
1752         if (dump_opt['u'])
1753                 dump_uberblock(&spa->spa_uberblock);
1754
1755         if (dump_opt['d'] || dump_opt['i']) {
1756                 dump_dir(dp->dp_meta_objset);
1757                 if (dump_opt['d'] >= 3) {
1758                         dump_bplist(dp->dp_meta_objset,
1759                             spa->spa_sync_bplist_obj, "Deferred frees");
1760                         dump_dtl(spa->spa_root_vdev, 0);
1761                         dump_metaslabs(spa);
1762                 }
1763                 (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
1764                     DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
1765         }
1766
1767         if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
1768                 rc = dump_block_stats(spa);
1769
1770         if (dump_opt['s'])
1771                 show_pool_stats(spa);
1772
1773         if (rc != 0)
1774                 exit(rc);
1775 }
1776
1777 #define ZDB_FLAG_CHECKSUM       0x0001
1778 #define ZDB_FLAG_DECOMPRESS     0x0002
1779 #define ZDB_FLAG_BSWAP          0x0004
1780 #define ZDB_FLAG_GBH            0x0008
1781 #define ZDB_FLAG_INDIRECT       0x0010
1782 #define ZDB_FLAG_PHYS           0x0020
1783 #define ZDB_FLAG_RAW            0x0040
1784 #define ZDB_FLAG_PRINT_BLKPTR   0x0080
1785
1786 int flagbits[256];
1787
1788 static void
1789 zdb_print_blkptr(blkptr_t *bp, int flags)
1790 {
1791         dva_t *dva = bp->blk_dva;
1792         int d;
1793
1794         if (flags & ZDB_FLAG_BSWAP)
1795                 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
1796         /*
1797          * Super-ick warning:  This code is also duplicated in
1798          * cmd/mdb/common/modules/zfs/zfs.c .  Yeah, I hate code
1799          * replication, too.
1800          */
1801         for (d = 0; d < BP_GET_NDVAS(bp); d++) {
1802                 (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
1803                     (longlong_t)DVA_GET_VDEV(&dva[d]),
1804                     (longlong_t)DVA_GET_OFFSET(&dva[d]));
1805                 (void) printf("\tDVA[%d]:       GANG: %-5s  GRID:  %04llx\t"
1806                     "ASIZE: %llx\n", d,
1807                     DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
1808                     (longlong_t)DVA_GET_GRID(&dva[d]),
1809                     (longlong_t)DVA_GET_ASIZE(&dva[d]));
1810                 (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
1811                     (u_longlong_t)DVA_GET_VDEV(&dva[d]),
1812                     (longlong_t)DVA_GET_OFFSET(&dva[d]),
1813                     (longlong_t)BP_GET_PSIZE(bp),
1814                     BP_SHOULD_BYTESWAP(bp) ? "e" : "",
1815                     !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
1816                     "d" : "",
1817                     DVA_GET_GANG(&dva[d]) ? "g" : "",
1818                     BP_GET_COMPRESS(bp) != 0 ? "d" : "");
1819         }
1820         (void) printf("\tLSIZE:  %-16llx\t\tPSIZE: %llx\n",
1821             (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
1822         (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
1823             BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
1824             dmu_ot[BP_GET_TYPE(bp)].ot_name);
1825         (void) printf("\tBIRTH:  %-16llx   LEVEL: %-2llu\tFILL:  %llx\n",
1826             (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
1827             (u_longlong_t)bp->blk_fill);
1828         (void) printf("\tCKFUNC: %-16s\t\tCOMP:  %s\n",
1829             zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1830             zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
1831         (void) printf("\tCKSUM:  %llx:%llx:%llx:%llx\n",
1832             (u_longlong_t)bp->blk_cksum.zc_word[0],
1833             (u_longlong_t)bp->blk_cksum.zc_word[1],
1834             (u_longlong_t)bp->blk_cksum.zc_word[2],
1835             (u_longlong_t)bp->blk_cksum.zc_word[3]);
1836 }
1837
1838 static void
1839 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
1840 {
1841         int i;
1842
1843         for (i = 0; i < nbps; i++)
1844                 zdb_print_blkptr(&bp[i], flags);
1845 }
1846
1847 static void
1848 zdb_dump_gbh(void *buf, int flags)
1849 {
1850         zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
1851 }
1852
1853 static void
1854 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
1855 {
1856         if (flags & ZDB_FLAG_BSWAP)
1857                 byteswap_uint64_array(buf, size);
1858         (void) write(2, buf, size);
1859 }
1860
1861 static void
1862 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
1863 {
1864         uint64_t *d = (uint64_t *)buf;
1865         int nwords = size / sizeof (uint64_t);
1866         int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
1867         int i, j;
1868         char *hdr, *c;
1869
1870
1871         if (do_bswap)
1872                 hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
1873         else
1874                 hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
1875
1876         (void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
1877
1878         for (i = 0; i < nwords; i += 2) {
1879                 (void) printf("%06llx:  %016llx  %016llx  ",
1880                     (u_longlong_t)(i * sizeof (uint64_t)),
1881                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
1882                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
1883
1884                 c = (char *)&d[i];
1885                 for (j = 0; j < 2 * sizeof (uint64_t); j++)
1886                         (void) printf("%c", isprint(c[j]) ? c[j] : '.');
1887                 (void) printf("\n");
1888         }
1889 }
1890
1891 /*
1892  * There are two acceptable formats:
1893  *      leaf_name         - For example: c1t0d0 or /tmp/ztest.0a
1894  *      child[.child]*    - For example: 0.1.1
1895  *
1896  * The second form can be used to specify arbitrary vdevs anywhere
1897  * in the heirarchy.  For example, in a pool with a mirror of
1898  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
1899  */
1900 static vdev_t *
1901 zdb_vdev_lookup(vdev_t *vdev, char *path)
1902 {
1903         char *s, *p, *q;
1904         int i;
1905
1906         if (vdev == NULL)
1907                 return (NULL);
1908
1909         /* First, assume the x.x.x.x format */
1910         i = (int)strtoul(path, &s, 10);
1911         if (s == path || (s && *s != '.' && *s != '\0'))
1912                 goto name;
1913         if (i < 0 || i >= vdev->vdev_children)
1914                 return (NULL);
1915
1916         vdev = vdev->vdev_child[i];
1917         if (*s == '\0')
1918                 return (vdev);
1919         return (zdb_vdev_lookup(vdev, s+1));
1920
1921 name:
1922         for (i = 0; i < vdev->vdev_children; i++) {
1923                 vdev_t *vc = vdev->vdev_child[i];
1924
1925                 if (vc->vdev_path == NULL) {
1926                         vc = zdb_vdev_lookup(vc, path);
1927                         if (vc == NULL)
1928                                 continue;
1929                         else
1930                                 return (vc);
1931                 }
1932
1933                 p = strrchr(vc->vdev_path, '/');
1934                 p = p ? p + 1 : vc->vdev_path;
1935                 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
1936
1937                 if (strcmp(vc->vdev_path, path) == 0)
1938                         return (vc);
1939                 if (strcmp(p, path) == 0)
1940                         return (vc);
1941                 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
1942                         return (vc);
1943         }
1944
1945         return (NULL);
1946 }
1947
1948 /*
1949  * Read a block from a pool and print it out.  The syntax of the
1950  * block descriptor is:
1951  *
1952  *      pool:vdev_specifier:offset:size[:flags]
1953  *
1954  *      pool           - The name of the pool you wish to read from
1955  *      vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
1956  *      offset         - offset, in hex, in bytes
1957  *      size           - Amount of data to read, in hex, in bytes
1958  *      flags          - A string of characters specifying options
1959  *               b: Decode a blkptr at given offset within block
1960  *              *c: Calculate and display checksums
1961  *              *d: Decompress data before dumping
1962  *               e: Byteswap data before dumping
1963  *              *g: Display data as a gang block header
1964  *              *i: Display as an indirect block
1965  *               p: Do I/O to physical offset
1966  *               r: Dump raw data to stdout
1967  *
1968  *              * = not yet implemented
1969  */
1970 static void
1971 zdb_read_block(char *thing, spa_t **spap)
1972 {
1973         spa_t *spa = *spap;
1974         int flags = 0;
1975         uint64_t offset = 0, size = 0, blkptr_offset = 0;
1976         zio_t *zio;
1977         vdev_t *vd;
1978         void *buf;
1979         char *s, *p, *dup, *pool, *vdev, *flagstr;
1980         int i, error, zio_flags;
1981
1982         dup = strdup(thing);
1983         s = strtok(dup, ":");
1984         pool = s ? s : "";
1985         s = strtok(NULL, ":");
1986         vdev = s ? s : "";
1987         s = strtok(NULL, ":");
1988         offset = strtoull(s ? s : "", NULL, 16);
1989         s = strtok(NULL, ":");
1990         size = strtoull(s ? s : "", NULL, 16);
1991         s = strtok(NULL, ":");
1992         flagstr = s ? s : "";
1993
1994         s = NULL;
1995         if (size == 0)
1996                 s = "size must not be zero";
1997         if (!IS_P2ALIGNED(size, DEV_BSIZE))
1998                 s = "size must be a multiple of sector size";
1999         if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2000                 s = "offset must be a multiple of sector size";
2001         if (s) {
2002                 (void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2003                 free(dup);
2004                 return;
2005         }
2006
2007         for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2008                 for (i = 0; flagstr[i]; i++) {
2009                         int bit = flagbits[(uchar_t)flagstr[i]];
2010
2011                         if (bit == 0) {
2012                                 (void) printf("***Invalid flag: %c\n",
2013                                     flagstr[i]);
2014                                 continue;
2015                         }
2016                         flags |= bit;
2017
2018                         /* If it's not something with an argument, keep going */
2019                         if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
2020                             ZDB_FLAG_PRINT_BLKPTR)) == 0)
2021                                 continue;
2022
2023                         p = &flagstr[i + 1];
2024                         if (bit == ZDB_FLAG_PRINT_BLKPTR)
2025                                 blkptr_offset = strtoull(p, &p, 16);
2026                         if (*p != ':' && *p != '\0') {
2027                                 (void) printf("***Invalid flag arg: '%s'\n", s);
2028                                 free(dup);
2029                                 return;
2030                         }
2031                 }
2032         }
2033
2034         if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
2035                 if (spa)
2036                         spa_close(spa, (void *)zdb_read_block);
2037                 error = spa_open(pool, spap, (void *)zdb_read_block);
2038                 if (error)
2039                         fatal("Failed to open pool '%s': %s",
2040                             pool, strerror(error));
2041                 spa = *spap;
2042         }
2043
2044         vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2045         if (vd == NULL) {
2046                 (void) printf("***Invalid vdev: %s\n", vdev);
2047                 free(dup);
2048                 return;
2049         } else {
2050                 if (vd->vdev_path)
2051                         (void) printf("Found vdev: %s\n", vd->vdev_path);
2052                 else
2053                         (void) printf("Found vdev type: %s\n",
2054                             vd->vdev_ops->vdev_op_type);
2055         }
2056
2057         buf = umem_alloc(size, UMEM_NOFAIL);
2058
2059         zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2060             ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
2061
2062         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2063         zio = zio_root(spa, NULL, NULL, 0);
2064         /* XXX todo - cons up a BP so RAID-Z will be happy */
2065         zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
2066             ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
2067         error = zio_wait(zio);
2068         spa_config_exit(spa, SCL_STATE, FTAG);
2069
2070         if (error) {
2071                 (void) printf("Read of %s failed, error: %d\n", thing, error);
2072                 goto out;
2073         }
2074
2075         if (flags & ZDB_FLAG_PRINT_BLKPTR)
2076                 zdb_print_blkptr((blkptr_t *)(void *)
2077                     ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2078         else if (flags & ZDB_FLAG_RAW)
2079                 zdb_dump_block_raw(buf, size, flags);
2080         else if (flags & ZDB_FLAG_INDIRECT)
2081                 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2082                     flags);
2083         else if (flags & ZDB_FLAG_GBH)
2084                 zdb_dump_gbh(buf, flags);
2085         else
2086                 zdb_dump_block(thing, buf, size, flags);
2087
2088 out:
2089         umem_free(buf, size);
2090         free(dup);
2091 }
2092
2093 static boolean_t
2094 nvlist_string_match(nvlist_t *config, char *name, char *tgt)
2095 {
2096         char *s;
2097
2098         if (nvlist_lookup_string(config, name, &s) != 0)
2099                 return (B_FALSE);
2100
2101         return (strcmp(s, tgt) == 0);
2102 }
2103
2104 static boolean_t
2105 nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
2106 {
2107         uint64_t val;
2108
2109         if (nvlist_lookup_uint64(config, name, &val) != 0)
2110                 return (B_FALSE);
2111
2112         return (val == tgt);
2113 }
2114
2115 static boolean_t
2116 vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
2117 {
2118         nvlist_t **child;
2119         uint_t c, children;
2120
2121         verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
2122             &child, &children) == 0);
2123         for (c = 0; c < children; ++c)
2124                 if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
2125                         return (B_TRUE);
2126         return (B_FALSE);
2127 }
2128
2129 static boolean_t
2130 vdev_child_string_match(nvlist_t *vdev, char *tgt)
2131 {
2132         nvlist_t **child;
2133         uint_t c, children;
2134
2135         verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
2136             &child, &children) == 0);
2137         for (c = 0; c < children; ++c) {
2138                 if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
2139                     nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
2140                         return (B_TRUE);
2141         }
2142         return (B_FALSE);
2143 }
2144
2145 static boolean_t
2146 vdev_guid_match(nvlist_t *config, uint64_t guid)
2147 {
2148         nvlist_t *nvroot;
2149
2150         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2151             &nvroot) == 0);
2152
2153         return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
2154             vdev_child_guid_match(nvroot, guid));
2155 }
2156
2157 static boolean_t
2158 vdev_string_match(nvlist_t *config, char *tgt)
2159 {
2160         nvlist_t *nvroot;
2161
2162         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2163             &nvroot) == 0);
2164
2165         return (vdev_child_string_match(nvroot, tgt));
2166 }
2167
2168 static boolean_t
2169 pool_match(nvlist_t *config, char *tgt)
2170 {
2171         uint64_t guid = strtoull(tgt, NULL, 0);
2172
2173         if (guid != 0) {
2174                 return (
2175                     nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
2176                     vdev_guid_match(config, guid));
2177         } else {
2178                 return (
2179                     nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
2180                     vdev_string_match(config, tgt));
2181         }
2182 }
2183
2184 static int
2185 find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
2186 {
2187         nvlist_t *pools;
2188         int error = ENOENT;
2189         nvlist_t *match = NULL;
2190
2191         if (vdev_dir != NULL)
2192                 pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
2193         else
2194                 pools = zpool_find_import_activeok(g_zfs, 0, NULL);
2195
2196         if (pools != NULL) {
2197                 nvpair_t *elem = NULL;
2198
2199                 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2200                         verify(nvpair_value_nvlist(elem, configp) == 0);
2201                         if (pool_match(*configp, pool_id)) {
2202                                 if (match != NULL) {
2203                                         (void) fatal(
2204                                             "More than one matching pool - "
2205                                             "specify guid/devid/device path.");
2206                                 } else {
2207                                         match = *configp;
2208                                         error = 0;
2209                                 }
2210                         }
2211                 }
2212         }
2213
2214         *configp = error ? NULL : match;
2215
2216         return (error);
2217 }
2218
2219 int
2220 main(int argc, char **argv)
2221 {
2222         int i, c;
2223         struct rlimit rl = { 1024, 1024 };
2224         spa_t *spa;
2225         objset_t *os = NULL;
2226         char *endstr;
2227         int dump_all = 1;
2228         int verbose = 0;
2229         int error;
2230         int exported = 0;
2231         char *vdev_dir = NULL;
2232
2233         (void) setrlimit(RLIMIT_NOFILE, &rl);
2234         (void) enable_extended_FILE_stdio(-1, -1);
2235
2236         dprintf_setup(&argc, argv);
2237
2238         while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
2239                 switch (c) {
2240                 case 'u':
2241                 case 'd':
2242                 case 'i':
2243                 case 'b':
2244                 case 'c':
2245                 case 's':
2246                 case 'C':
2247                 case 'l':
2248                 case 'R':
2249                         dump_opt[c]++;
2250                         dump_all = 0;
2251                         break;
2252                 case 'v':
2253                         verbose++;
2254                         break;
2255                 case 'U':
2256                         spa_config_path = optarg;
2257                         break;
2258                 case 'e':
2259                         exported = 1;
2260                         break;
2261                 case 'p':
2262                         vdev_dir = optarg;
2263                         break;
2264                 case 'S':
2265                         dump_opt[c]++;
2266                         dump_all = 0;
2267                         zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
2268                         if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
2269                                 usage();
2270                         endstr = strchr(optarg, ':') + 1;
2271                         if (strcmp(endstr, "fletcher2") == 0)
2272                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
2273                         else if (strcmp(endstr, "fletcher4") == 0)
2274                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
2275                         else if (strcmp(endstr, "sha256") == 0)
2276                                 zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
2277                         else if (strcmp(endstr, "all") == 0)
2278                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
2279                         else
2280                                 usage();
2281                         break;
2282                 default:
2283                         usage();
2284                         break;
2285                 }
2286         }
2287
2288         if (vdev_dir != NULL && exported == 0) {
2289                 (void) fprintf(stderr, "-p option requires use of -e\n");
2290                 usage();
2291         }
2292
2293         kernel_init(FREAD);
2294         g_zfs = libzfs_init();
2295         ASSERT(g_zfs != NULL);
2296
2297         for (c = 0; c < 256; c++) {
2298                 if (dump_all && c != 'l' && c != 'R')
2299                         dump_opt[c] = 1;
2300                 if (dump_opt[c])
2301                         dump_opt[c] += verbose;
2302         }
2303
2304         argc -= optind;
2305         argv += optind;
2306
2307         if (argc < 1) {
2308                 if (dump_opt['C']) {
2309                         dump_cachefile(spa_config_path);
2310                         return (0);
2311                 }
2312                 usage();
2313         }
2314
2315         if (dump_opt['l']) {
2316                 dump_label(argv[0]);
2317                 return (0);
2318         }
2319
2320         if (dump_opt['R']) {
2321                 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2322                 flagbits['c'] = ZDB_FLAG_CHECKSUM;
2323                 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2324                 flagbits['e'] = ZDB_FLAG_BSWAP;
2325                 flagbits['g'] = ZDB_FLAG_GBH;
2326                 flagbits['i'] = ZDB_FLAG_INDIRECT;
2327                 flagbits['p'] = ZDB_FLAG_PHYS;
2328                 flagbits['r'] = ZDB_FLAG_RAW;
2329
2330                 spa = NULL;
2331                 while (argv[0]) {
2332                         zdb_read_block(argv[0], &spa);
2333                         argv++;
2334                         argc--;
2335                 }
2336                 if (spa)
2337                         spa_close(spa, (void *)zdb_read_block);
2338                 return (0);
2339         }
2340
2341         if (dump_opt['C'])
2342                 dump_config(argv[0]);
2343
2344         error = 0;
2345         if (exported) {
2346                 /*
2347                  * Check to see if the name refers to an exported zpool
2348                  */
2349                 char *slash;
2350                 nvlist_t *exported_conf = NULL;
2351
2352                 if ((slash = strchr(argv[0], '/')) != NULL)
2353                         *slash = '\0';
2354
2355                 error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
2356                 if (error == 0) {
2357                         nvlist_t *nvl = NULL;
2358
2359                         if (vdev_dir != NULL) {
2360                                 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
2361                                         error = ENOMEM;
2362                                 else if (nvlist_add_string(nvl,
2363                                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
2364                                     vdev_dir) != 0)
2365                                         error = ENOMEM;
2366                         }
2367
2368                         if (error == 0)
2369                                 error = spa_import_faulted(argv[0],
2370                                     exported_conf, nvl);
2371
2372                         nvlist_free(nvl);
2373                 }
2374
2375                 if (slash != NULL)
2376                         *slash = '/';
2377         }
2378
2379         if (error == 0) {
2380                 if (strchr(argv[0], '/') != NULL) {
2381                         error = dmu_objset_open(argv[0], DMU_OST_ANY,
2382                             DS_MODE_USER | DS_MODE_READONLY, &os);
2383                 } else {
2384                         error = spa_open(argv[0], &spa, FTAG);
2385                 }
2386         }
2387
2388         if (error)
2389                 fatal("can't open %s: %s", argv[0], strerror(error));
2390
2391         argv++;
2392         if (--argc > 0) {
2393                 zopt_objects = argc;
2394                 zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2395                 for (i = 0; i < zopt_objects; i++) {
2396                         errno = 0;
2397                         zopt_object[i] = strtoull(argv[i], NULL, 0);
2398                         if (zopt_object[i] == 0 && errno != 0)
2399                                 fatal("bad object number %s: %s",
2400                                     argv[i], strerror(errno));
2401                 }
2402         }
2403
2404         if (os != NULL) {
2405                 dump_dir(os);
2406                 dmu_objset_close(os);
2407         } else {
2408                 dump_zpool(spa);
2409                 spa_close(spa, FTAG);
2410         }
2411
2412         fuid_table_destroy();
2413
2414         libzfs_fini(g_zfs);
2415         kernel_fini();
2416
2417         return (0);
2418 }