X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fzdb%2Fzdb.c;h=82491ad5f2f34b39e51b1fff14afbd2e9d543742;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=ff73072f8a64f5622c23cb5b2b3c625af3b9e9b4;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index ff73072..82491ad 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -18,8 +18,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -54,17 +56,20 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN -#undef verify #include -#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ - zio_compress_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ - zio_checksum_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ - dmu_ot[(idx)].ot_name : "UNKNOWN") -#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES) +#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ + zio_compress_table[(idx)].ci_name : "UNKNOWN") +#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ + zio_checksum_table[(idx)].ci_name : "UNKNOWN") +#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ + dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \ + dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN") +#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ + (((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ? \ + DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES)) #ifndef lint extern int zfs_recover; @@ -81,13 +86,14 @@ extern void dump_intent_log(zilog_t *); uint64_t *zopt_object = NULL; int zopt_objects = 0; libzfs_handle_t *g_zfs; +uint64_t max_inflight = 200; /* * These libumem hooks provide a reasonable set of defaults for the allocator's * debugging facilities. */ const char * -_umem_debug_init() +_umem_debug_init(void) { return ("default,verbose"); /* $UMEM_DEBUG setting */ } @@ -102,13 +108,17 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsDvhL] poolname [object...]\n" - " %s [-div] dataset [object...]\n" - " %s -m [-L] poolname [vdev [metaslab...]]\n" - " %s -R poolname vdev:offset:size[:flags]\n" - " %s -S poolname\n" - " %s -l [-u] device\n" - " %s -C\n\n", + "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-M inflight I/Os] poolname [object...]\n" + " %s [-divPA] [-e -p path...] [-U config] dataset " + "[object...]\n" + " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + "poolname [vdev [metaslab...]]\n" + " %s -R [-A] [-e [-p path...]] poolname " + "vdev:offset:size[:flags]\n" + " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n" + " %s -l [-uA] device\n" + " %s -C [-A] [-U config]\n\n", cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname); (void) fprintf(stderr, " Dataset name must include at least one " @@ -150,9 +160,11 @@ usage(void) "has altroot/not in a cachefile\n"); (void) fprintf(stderr, " -p -- use one or more with " "-e to specify path to vdev dir\n"); - (void) fprintf(stderr, " -P print numbers parsable\n"); + (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t -- highest txg to use when " "searching for uberblocks\n"); + (void) fprintf(stderr, " -M -- " + "specify the maximum number of checksumming I/Os [default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); @@ -536,7 +548,7 @@ static void dump_metaslab_stats(metaslab_t *msp) { char maxbuf[32]; - space_map_t *sm = &msp->ms_map; + space_map_t *sm = msp->ms_map; avl_tree_t *t = sm->sm_pp_root; int free_pct = sm->sm_space * 100 / sm->sm_size; @@ -552,7 +564,7 @@ dump_metaslab(metaslab_t *msp) { vdev_t *vd = msp->ms_group->mg_vd; spa_t *spa = vd->vdev_spa; - space_map_t *sm = &msp->ms_map; + space_map_t *sm = msp->ms_map; space_map_obj_t *smo = &msp->ms_smo; char freebuf[32]; @@ -645,8 +657,9 @@ dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) char *types[4] = { "ditto", "single", "double", "triple" }; char blkbuf[BP_SPRINTF_LEN]; blkptr_t blk; + int p; - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); @@ -695,12 +708,14 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) return; ASSERT(error == 0); - count = ddt_object_count(ddt, type, class); + error = ddt_object_count(ddt, type, class, &count); + ASSERT(error == 0); + if (count == 0) + return; + dspace = doi.doi_physical_blocks_512 << 9; mspace = doi.doi_fill_count * doi.doi_data_block_size; - ASSERT(count != 0); /* we should have destroyed it */ - ddt_object_name(ddt, type, class, name); (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", @@ -733,13 +748,19 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) static void dump_all_ddts(spa_t *spa) { - ddt_histogram_t ddh_total = { 0 }; - ddt_stat_t dds_total = { 0 }; + ddt_histogram_t ddh_total; + ddt_stat_t dds_total; + enum zio_checksum c; + enum ddt_type type; + enum ddt_class class; + + bzero(&ddh_total, sizeof (ddt_histogram_t)); + bzero(&dds_total, sizeof (ddt_stat_t)); - for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { ddt_t *ddt = spa->spa_ddt[c]; - for (enum ddt_type type = 0; type < DDT_TYPES; type++) { - for (enum ddt_class class = 0; class < DDT_CLASSES; + for (type = 0; type < DDT_TYPES; type++) { + for (class = 0; class < DDT_CLASSES; class++) { dump_ddt(ddt, type, class); } @@ -783,6 +804,7 @@ dump_dtl(vdev_t *vd, int indent) boolean_t required; char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" }; char prefix[256]; + int c, t; spa_vdev_state_enter(spa, SCL_NONE); required = vdev_dtl_required(vd); @@ -796,7 +818,7 @@ dump_dtl(vdev_t *vd, int indent) vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), required ? "DTL-required" : "DTL-expendable"); - for (int t = 0; t < DTL_TYPES; t++) { + for (t = 0; t < DTL_TYPES; t++) { space_map_t *sm = &vd->vdev_dtl[t]; if (sm->sm_space == 0) continue; @@ -810,7 +832,7 @@ dump_dtl(vdev_t *vd, int indent) &vd->vdev_dtl_smo, sm); } - for (int c = 0; c < vd->vdev_children; c++) + for (c = 0; c < vd->vdev_children; c++) dump_dtl(vd->vdev_child[c], indent + 4); } @@ -826,6 +848,7 @@ dump_history(spa_t *spa) struct tm t; char tbuf[30]; char internalstr[MAXPATHLEN]; + int i; do { len = sizeof (buf); @@ -843,7 +866,7 @@ dump_history(spa_t *spa) } while (len != 0); (void) printf("\nHistory:\n"); - for (int i = 0; i < num; i++) { + for (i = 0; i < num; i++) { uint64_t time, txg, ievent; char *cmd, *intstr; @@ -865,8 +888,8 @@ dump_history(spa_t *spa) (void) snprintf(internalstr, sizeof (internalstr), "[internal %s txg:%lld] %s", - zfs_history_event_names[ievent], txg, - intstr); + zfs_history_event_names[ievent], + (longlong_t)txg, intstr); cmd = internalstr; } tsec = time; @@ -904,6 +927,7 @@ sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) { const dva_t *dva = bp->blk_dva; int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; + int i; if (dump_opt['b'] >= 5) { sprintf_blkptr(blkbuf, bp); @@ -912,7 +936,7 @@ sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) blkbuf[0] = '\0'; - for (int i = 0; i < ndvas; i++) + for (i = 0; i < ndvas; i++) (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), (u_longlong_t)DVA_GET_OFFSET(&dva[i]), @@ -972,7 +996,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, arc_buf_t *buf; uint64_t fill = 0; - err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf, + err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err) return (err); @@ -1085,7 +1109,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) ASSERT(size == sizeof (*ds)); crtime = ds->ds_creation_time; - zdb_nicenum(ds->ds_used_bytes, used); + zdb_nicenum(ds->ds_referenced_bytes, used); zdb_nicenum(ds->ds_compressed_bytes, compressed); zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed); zdb_nicenum(ds->ds_unique_bytes, unique); @@ -1129,6 +1153,44 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) /* ARGSUSED */ static int +dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + char blkbuf[BP_SPRINTF_LEN]; + + if (bp->blk_birth != 0) { + sprintf_blkptr(blkbuf, bp); + (void) printf("\t%s\n", blkbuf); + } + return (0); +} + +static void +dump_bptree(objset_t *os, uint64_t obj, char *name) +{ + char bytes[32]; + bptree_phys_t *bt; + dmu_buf_t *db; + + if (dump_opt['d'] < 3) + return; + + VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); + bt = db->db_data; + zdb_nicenum(bt->bt_bytes, bytes); + (void) printf("\n %s: %llu datasets, %s\n", + name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); + dmu_buf_rele(db, FTAG); + + if (dump_opt['d'] < 5) + return; + + (void) printf("\n"); + + (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); +} + +/* ARGSUSED */ +static int dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { char blkbuf[BP_SPRINTF_LEN]; @@ -1211,7 +1273,7 @@ static boolean_t sa_loaded; sa_attr_type_t *sa_attr_table; static void -fuid_table_destroy() +fuid_table_destroy(void) { if (fuid_table_loaded) { zfs_fuid_table_destroy(&idx_tree, &domain_tree); @@ -1265,6 +1327,61 @@ dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) print_idstr(gid, "gid"); } +static void +dump_znode_sa_xattr(sa_handle_t *hdl) +{ + nvlist_t *sa_xattr; + nvpair_t *elem = NULL; + int sa_xattr_size = 0; + int sa_xattr_entries = 0; + int error; + char *sa_xattr_packed; + + error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size); + if (error || sa_xattr_size == 0) + return; + + sa_xattr_packed = malloc(sa_xattr_size); + if (sa_xattr_packed == NULL) + return; + + error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR], + sa_xattr_packed, sa_xattr_size); + if (error) { + free(sa_xattr_packed); + return; + } + + error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0); + if (error) { + free(sa_xattr_packed); + return; + } + + while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) + sa_xattr_entries++; + + (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", + sa_xattr_size, sa_xattr_entries); + while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { + uchar_t *value; + uint_t cnt, idx; + + (void) printf("\t\t%s = ", nvpair_name(elem)); + nvpair_value_byte_array(elem, &value, &cnt); + for (idx = 0 ; idx < cnt ; ++idx) { + if (isprint(value[idx])) + (void) putchar(value[idx]); + else + (void) printf("\\%3.3o", value[idx]); + } + (void) putchar('\n'); + } + + nvlist_free(sa_xattr); + free(sa_xattr_packed); +} + /*ARGSUSED*/ static void dump_znode(objset_t *os, uint64_t object, void *data, size_t size) @@ -1290,8 +1407,12 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size) VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_attrs) == 0); } - sa_attr_table = sa_setup(os, sa_attrs, - zfs_attr_table, ZPL_END); + if ((error = sa_setup(os, sa_attrs, zfs_attr_table, + ZPL_END, &sa_attr_table)) != 0) { + (void) printf("sa_setup failed errno %d, can't " + "display znode contents\n", error); + return; + } sa_loaded = B_TRUE; } @@ -1361,6 +1482,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size) if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, sizeof (uint64_t)) == 0) (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); + dump_znode_sa_xattr(hdl); sa_handle_destroy(hdl); } @@ -1455,7 +1577,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) } if (object == 0) { - dn = os->os_meta_dnode; + dn = DMU_META_DNODE(os); } else { error = dmu_bonus_hold(os, object, FTAG, &db); if (error) @@ -1463,7 +1585,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) object, error); bonus = db->db_data; bsize = db->db_size; - dn = ((dmu_buf_impl_t *)db)->db_dnode; + dn = DB_DNODE((dmu_buf_impl_t *)db); } dmu_object_info_from_dnode(dn, &doi); @@ -1627,8 +1749,8 @@ dump_dir(objset_t *os) dump_object(os, 0, verbosity, &print_header); object_count = 0; - if (os->os_userused_dnode && - os->os_userused_dnode->dn_type != 0) { + if (DMU_USERUSED_DNODE(os) != NULL && + DMU_USERUSED_DNODE(os)->dn_type != 0) { dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); } @@ -1654,7 +1776,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) { time_t timestamp = ub->ub_timestamp; - (void) printf(header ? header : ""); + (void) printf("%s", header ? header : ""); (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); @@ -1666,7 +1788,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) sprintf_blkptr(blkbuf, &ub->ub_rootbp); (void) printf("\trootbp = %s\n", blkbuf); } - (void) printf(footer ? footer : ""); + (void) printf("%s", footer ? footer : ""); } static void @@ -1747,11 +1869,12 @@ dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift) vdev_t vd; vdev_t *vdp = &vd; char header[ZDB_MAX_UB_HEADER_SIZE]; + int i; vd.vdev_ashift = ashift; vdp->vdev_top = vdp; - for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) { + for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) { uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i); uberblock_t *ub = (void *)((char *)lbl + uoff); @@ -1773,6 +1896,7 @@ dump_label(const char *dev) struct stat64 statbuf; uint64_t psize, ashift; int len = strlen(dev) + 1; + int l; if (strncmp(dev, "/dev/dsk/", 9) == 0) { len++; @@ -1788,7 +1912,7 @@ dump_label(const char *dev) exit(1); } - if (fstat64(fd, &statbuf) != 0) { + if (fstat64_blk(fd, &statbuf) != 0) { (void) printf("failed to stat '%s': %s\n", path, strerror(errno)); free(path); @@ -1796,18 +1920,10 @@ dump_label(const char *dev) exit(1); } - if (S_ISBLK(statbuf.st_mode)) { - (void) printf("cannot use '%s': character device required\n", - path); - free(path); - (void) close(fd); - exit(1); - } - psize = statbuf.st_size; psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); - for (int l = 0; l < VDEV_LABELS; l++) { + for (l = 0; l < VDEV_LABELS; l++) { nvlist_t *config = NULL; (void) printf("--------------------------------------------\n"); @@ -1876,11 +1992,13 @@ typedef struct zdb_blkstats { */ #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) -#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2) +#define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) +#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) static char *zdb_ot_extname[] = { "deferred free", "dedup ditto", + "other", "Total", }; @@ -1901,13 +2019,14 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type) { uint64_t refcnt = 0; + int i; ASSERT(type < ZDB_OT_TOTAL); if (zilog && zil_bp_tree_add(zilog, bp) != 0) return; - for (int i = 0; i < 4; i++) { + for (i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; int t = (i & 1) ? type : ZDB_OT_TOTAL; zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; @@ -1946,9 +2065,47 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); } -/* ARGSUSED */ +static void +zdb_blkptr_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + int ioerr = zio->io_error; + zdb_cb_t *zcb = zio->io_private; + zbookmark_t *zb = &zio->io_bookmark; + + zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + + if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + char blkbuf[BP_SPRINTF_LEN]; + + zcb->zcb_haderrors = 1; + zcb->zcb_errors[ioerr]++; + + if (dump_opt['b'] >= 2) + sprintf_blkptr(blkbuf, bp); + else + blkbuf[0] = '\0'; + + (void) printf("zdb_blkptr_cb: " + "Got error %d reading " + "<%llu, %llu, %lld, %llx> %s -- skipping\n", + ioerr, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf); + } + mutex_exit(&spa->spa_scrub_lock); +} + static int -zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, +zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { zdb_cb_t *zcb = arg; @@ -1961,44 +2118,29 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, type = BP_GET_TYPE(bp); - zdb_count_block(zcb, zilog, bp, type); + zdb_count_block(zcb, zilog, bp, + (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); - is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata); + is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { - int ioerr; size_t size = BP_GET_PSIZE(bp); - void *data = malloc(size); + void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; /* If it's an intent log block, failure is expected. */ if (zb->zb_level == ZB_ZIL_LEVEL) flags |= ZIO_FLAG_SPECULATIVE; - ioerr = zio_wait(zio_read(NULL, spa, bp, data, size, - NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - - free(data); + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > max_inflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); - if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) { - zcb->zcb_haderrors = 1; - zcb->zcb_errors[ioerr]++; + zio_nowait(zio_read(NULL, spa, bp, data, size, + zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - if (dump_opt['b'] >= 2) - sprintf_blkptr(blkbuf, bp); - else - blkbuf[0] = '\0'; - - (void) printf("zdb_blkptr_cb: " - "Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- skipping\n", - ioerr, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf); - } } zcb->zcb_readfails = 0; @@ -2059,6 +2201,7 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) ddt_bookmark_t ddb = { 0 }; ddt_entry_t dde; int error; + int p; while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { blkptr_t blk; @@ -2069,7 +2212,7 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) ASSERT(ddt_phys_total_refcnt(&dde) > 1); - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddb.ddb_checksum, @@ -2097,19 +2240,20 @@ static void zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) { zcb->zcb_spa = spa; + int c, m; if (!dump_opt['L']) { vdev_t *rvd = spa->spa_root_vdev; - for (int c = 0; c < rvd->vdev_children; c++) { + for (c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; - for (int m = 0; m < vd->vdev_ms_count; m++) { + for (m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; mutex_enter(&msp->ms_lock); - space_map_unload(&msp->ms_map); - VERIFY(space_map_load(&msp->ms_map, + space_map_unload(msp->ms_map); + VERIFY(space_map_load(msp->ms_map, &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0); - msp->ms_map.sm_ppd = vd; + msp->ms_map->sm_ppd = vd; mutex_exit(&msp->ms_lock); } } @@ -2125,14 +2269,16 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) static void zdb_leak_fini(spa_t *spa) { + int c, m; + if (!dump_opt['L']) { vdev_t *rvd = spa->spa_root_vdev; - for (int c = 0; c < rvd->vdev_children; c++) { + for (c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; - for (int m = 0; m < vd->vdev_ms_count; m++) { + for (m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; mutex_enter(&msp->ms_lock); - space_map_unload(&msp->ms_map); + space_map_unload(msp->ms_map); mutex_exit(&msp->ms_lock); } } @@ -2158,11 +2304,12 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) static int dump_block_stats(spa_t *spa) { - zdb_cb_t zcb = { 0 }; + zdb_cb_t zcb; zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; int leaks = 0; + int e; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", @@ -2179,6 +2326,7 @@ dump_block_stats(spa_t *spa) * it's not part of any space map) is a double allocation, * reference to a freed block, or an unclaimed log block. */ + bzero(&zcb, sizeof(zdb_cb_t)); zdb_leak_init(spa, &zcb); /* @@ -2188,16 +2336,34 @@ dump_block_stats(spa_t *spa) count_block_cb, &zcb, NULL); (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, count_block_cb, &zcb, NULL); + if (spa_feature_is_active(spa, + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { + VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, + spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, + &zcb, NULL)); + } if (dump_opt['c'] > 1) flags |= TRAVERSE_PREFETCH_DATA; zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); + /* + * If we've traversed the data blocks then we need to wait for those + * I/Os to complete. We leverage "The Godfather" zio to wait on + * all async I/Os to complete. + */ + if (dump_opt['c']) { + (void) zio_wait(spa->spa_async_zio_root); + spa->spa_async_zio_root = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } + if (zcb.zcb_haderrors) { (void) printf("\nError counts:\n\n"); (void) printf("\t%5s %s\n", "errno", "count"); - for (int e = 0; e < 256; e++) { + for (e = 0; e < 256; e++) { if (zcb.zcb_errors[e] != 0) { (void) printf("\t%5d %llu\n", e, (u_longlong_t)zcb.zcb_errors[e]); @@ -2346,7 +2512,7 @@ typedef struct zdb_ddt_entry { /* ARGSUSED */ static int zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { avl_tree_t *t = arg; avl_index_t where; @@ -2364,7 +2530,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || - BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) + BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) return (0); ddt_key_fill(&zdde_search.zdde_key, bp); @@ -2391,8 +2557,11 @@ dump_simulated_ddt(spa_t *spa) avl_tree_t t; void *cookie = NULL; zdb_ddt_entry_t *zdde; - ddt_histogram_t ddh_total = { 0 }; - ddt_stat_t dds_total = { 0 }; + ddt_histogram_t ddh_total; + ddt_stat_t dds_total; + + bzero(&ddh_total, sizeof (ddt_histogram_t)); + bzero(&dds_total, sizeof (ddt_stat_t)); avl_create(&t, ddt_entry_compare, sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); @@ -2469,7 +2638,14 @@ dump_zpool(spa_t *spa) dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees"); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj, - "Pool frees"); + "Pool snapshot frees"); + } + + if (spa_feature_is_active(spa, + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { + dump_bptree(spa->spa_meta_objset, + spa->spa_dsl_pool->dp_bptree_obj, + "Pool dataset frees"); } dump_dtl(spa->spa_root_vdev, 0); } @@ -2532,7 +2708,7 @@ zdb_dump_block_raw(void *buf, uint64_t size, int flags) { if (flags & ZDB_FLAG_BSWAP) byteswap_uint64_array(buf, size); - (void) write(1, buf, size); + VERIFY(write(fileno(stdout), buf, size) == size); } static void @@ -2724,7 +2900,7 @@ zdb_read_block(char *thing, spa_t *spa) psize = size; lsize = size; - pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); + pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, 512, UMEM_NOFAIL); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); BP_ZERO(bp); @@ -2862,7 +3038,7 @@ find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv) nvlist_t *match = NULL; char *name = NULL; char *sepp = NULL; - char sep; + char sep = 0; int count = 0; importargs_t args = { 0 }; @@ -2938,13 +3114,23 @@ main(int argc, char **argv) nvlist_t *policy = NULL; uint64_t max_txg = UINT64_MAX; int rewind = ZPOOL_NEVER_REWIND; + char *spa_config_path_env; (void) setrlimit(RLIMIT_NOFILE, &rl); (void) enable_extended_FILE_stdio(-1, -1); dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) { + /* + * If there is an environment variable SPA_CONFIG_PATH it overrides + * default spa_config_path setting. If -U flag is specified it will + * override this environment variable settings once again. + */ + spa_config_path_env = getenv("SPA_CONFIG_PATH"); + if (spa_config_path_env != NULL) + spa_config_path = spa_config_path_env; + + while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) { switch (c) { case 'b': case 'c': @@ -2973,6 +3159,15 @@ main(int argc, char **argv) case 'v': verbose++; break; + case 'M': + max_inflight = strtoull(optarg, NULL, 0); + if (max_inflight == 0) { + (void) fprintf(stderr, "maximum number " + "of inflight I/Os must be greater " + "than 0\n"); + usage(); + } + break; case 'p': if (searchdirs == NULL) { searchdirs = umem_alloc(sizeof (char *), @@ -3011,8 +3206,8 @@ main(int argc, char **argv) } kernel_init(FREAD); - g_zfs = libzfs_init(); - ASSERT(g_zfs != NULL); + if ((g_zfs = libzfs_init()) == NULL) + return (1); if (dump_all) verbose = MAX(verbose, 1); @@ -3072,8 +3267,11 @@ main(int argc, char **argv) fatal("can't open '%s': %s", target, strerror(ENOMEM)); } - if ((error = spa_import(name, cfg, NULL)) != 0) - error = spa_import_verbatim(name, cfg, NULL); + if ((error = spa_import(name, cfg, NULL, + ZFS_IMPORT_MISSING_LOG)) != 0) { + error = spa_import(name, cfg, NULL, + ZFS_IMPORT_VERBATIM); + } } } @@ -3124,7 +3322,13 @@ main(int argc, char **argv) argv[i], strerror(errno)); } } - (os != NULL) ? dump_dir(os) : dump_zpool(spa); + if (os != NULL) { + dump_dir(os); + } else if (zopt_objects > 0 && !dump_opt['m']) { + dump_dir(spa->spa_meta_objset); + } else { + dump_zpool(spa); + } } else { flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; flagbits['c'] = ZDB_FLAG_CHECKSUM;