X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=zfs%2Fzcmd%2Fzdb%2Fzdb.c;h=253a1346a498681454e9215b95223d9f3065f462;hb=b128c09fbee863d15be744a2ce602b514eddbe3a;hp=a3b0f1238e6aa355afd014d2a3ff330b0d85f879;hpb=34dc7c2f2553220ebc6e29ca195fb6d57155f95f;p=zfs.git diff --git a/zfs/zcmd/zdb/zdb.c b/zfs/zcmd/zdb/zdb.c index a3b0f12..253a134 100644 --- a/zfs/zcmd/zdb/zdb.c +++ b/zfs/zcmd/zdb/zdb.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "@(#)zdb.c 1.40 08/04/01 SMI" - #include #include #include @@ -52,6 +50,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #undef verify #include @@ -64,8 +63,6 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); extern void dump_intent_log(zilog_t *); uint64_t *zopt_object = NULL; int zopt_objects = 0; -int zdb_advance = ADVANCE_PRE; -zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; libzfs_handle_t *g_zfs; boolean_t zdb_sig_user_data = B_TRUE; int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256; @@ -90,8 +87,8 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] " - "[-B os:obj:level:blkid] [-S user:cksumalg] " + "Usage: %s [-udibcsv] [-U cachefile_path] " + "[-S user:cksumalg] " "dataset [object...]\n" " %s -C [pool]\n" " %s -l dev\n" @@ -111,13 +108,8 @@ usage(void) "dump blkptr signatures\n"); (void) fprintf(stderr, " -v verbose (applies to all others)\n"); (void) fprintf(stderr, " -l dump label contents\n"); - (void) fprintf(stderr, " -L live pool (allows some errors)\n"); - (void) fprintf(stderr, " -O [!] " - "visitation order\n"); (void) fprintf(stderr, " -U cachefile_path -- use alternate " "cachefile\n"); - (void) fprintf(stderr, " -B objset:object:level:blkid -- " - "simulate bad block\n"); (void) fprintf(stderr, " -R read and display block from a " "device\n"); (void) fprintf(stderr, " -e Pool is exported/destroyed/" @@ -140,7 +132,7 @@ fatal(const char *fmt, ...) va_end(ap); (void) fprintf(stderr, "\n"); - exit(1); + abort(); } static void @@ -510,8 +502,7 @@ dump_metaslabs(spa_t *spa) for (c = 0; c < rvd->vdev_children; c++) { vd = rvd->vdev_child[c]; - (void) printf("\n vdev %llu = %s\n\n", - (u_longlong_t)vd->vdev_id, vdev_description(vd)); + (void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id); if (dump_opt['d'] <= 5) { (void) printf("\t%10s %10s %5s\n", @@ -536,7 +527,10 @@ dump_dtl(vdev_t *vd, int indent) if (indent == 0) (void) printf("\nDirty time logs:\n\n"); - (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); + (void) printf("\t%*s%s\n", indent, "", + vd->vdev_path ? vd->vdev_path : + vd->vdev_parent ? vd->vdev_ops->vdev_op_type : + spa_name(vd->vdev_spa)); for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { /* @@ -571,7 +565,7 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) } static uint64_t -blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) +blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid) { if (level < 0) return (blkid); @@ -602,115 +596,104 @@ sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas) (u_longlong_t)bp->blk_birth); } -/* ARGSUSED */ -static int -zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) +static void +print_indirect(blkptr_t *bp, const zbookmark_t *zb, + const dnode_phys_t *dnp) { - zbookmark_t *zb = &bc->bc_bookmark; - blkptr_t *bp = &bc->bc_blkptr; - void *data = bc->bc_data; - dnode_phys_t *dnp = bc->bc_dnode; - char blkbuf[BP_SPRINTF_LEN + 80]; + char blkbuf[BP_SPRINTF_LEN]; int l; - if (bc->bc_errno) { - (void) sprintf(blkbuf, - "Error %d reading <%llu, %llu, %lld, %llu>: ", - bc->bc_errno, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid); - goto out; - } - - if (zb->zb_level == -1) { - ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); - ASSERT3U(BP_GET_LEVEL(bp), ==, 0); - } else { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - if (zb->zb_level > 0) { - uint64_t fill = 0; - blkptr_t *bpx, *bpend; - - for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); - bpx < bpend; bpx++) { - if (bpx->blk_birth != 0) { - fill += bpx->blk_fill; - } else { - ASSERT(bpx->blk_fill == 0); - } - } - ASSERT3U(fill, ==, bp->blk_fill); - } - - if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { - uint64_t fill = 0; - dnode_phys_t *dnx, *dnend; - - for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); - dnx < dnend; dnx++) { - if (dnx->dn_type != DMU_OT_NONE) - fill++; - } - ASSERT3U(fill, ==, bp->blk_fill); - } - - (void) sprintf(blkbuf, "%16llx ", + (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); ASSERT(zb->zb_level >= 0); for (l = dnp->dn_nlevels - 1; l >= -1; l--) { if (l == zb->zb_level) { - (void) sprintf(blkbuf + strlen(blkbuf), "L%llx", - (u_longlong_t)zb->zb_level); + (void) printf("L%llx", (u_longlong_t)zb->zb_level); } else { - (void) sprintf(blkbuf + strlen(blkbuf), " "); + (void) printf(" "); } } -out: - if (bp->blk_birth == 0) { - (void) sprintf(blkbuf + strlen(blkbuf), ""); - (void) printf("%s\n", blkbuf); - } else { - sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp, - dump_opt['d'] > 5 ? 1 : 0); - (void) printf("%s\n", blkbuf); + sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0); + (void) printf("%s\n", blkbuf); +} + +#define SET_BOOKMARK(zb, objset, object, level, blkid) \ +{ \ + (zb)->zb_objset = objset; \ + (zb)->zb_object = object; \ + (zb)->zb_level = level; \ + (zb)->zb_blkid = blkid; \ +} + +static int +visit_indirect(spa_t *spa, const dnode_phys_t *dnp, + blkptr_t *bp, const zbookmark_t *zb) +{ + int err; + + if (bp->blk_birth == 0) + return (0); + + print_indirect(bp, zb, dnp); + + if (BP_GET_LEVEL(bp) > 0) { + uint32_t flags = ARC_WAIT; + int i; + blkptr_t *cbp; + int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + arc_buf_t *buf; + uint64_t fill = 0; + + err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + if (err) + return (err); + + /* recursively visit blocks below this */ + cbp = buf->b_data; + for (i = 0; i < epb; i++, cbp++) { + zbookmark_t czb; + + SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, + zb->zb_level - 1, + zb->zb_blkid * epb + i); + err = visit_indirect(spa, dnp, cbp, &czb); + if (err) + break; + fill += cbp->blk_fill; + } + ASSERT3U(fill, ==, bp->blk_fill); + (void) arc_buf_remove_ref(buf, &buf); } - return (bc->bc_errno ? ERESTART : 0); + return (err); } /*ARGSUSED*/ static void -dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) +dump_indirect(dnode_t *dn) { - traverse_handle_t *th; - uint64_t objset = dmu_objset_id(os); - int advance = zdb_advance; + dnode_phys_t *dnp = dn->dn_phys; + int j; + zbookmark_t czb; (void) printf("Indirect blocks:\n"); - if (object == 0) - advance |= ADVANCE_DATA; - - th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, - ZIO_FLAG_CANFAIL); - th->th_noread = zdb_noread; - - traverse_add_dnode(th, 0, -1ULL, objset, object); - - while (traverse_more(th) == EAGAIN) - continue; + SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os), + dn->dn_object, dnp->dn_nlevels - 1, 0); + for (j = 0; j < dnp->dn_nblkptr; j++) { + czb.zb_blkid = j; + (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp, + &dnp->dn_blkptr[j], &czb); + } (void) printf("\n"); - - traverse_fini(th); } /*ARGSUSED*/ @@ -719,7 +702,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) { dsl_dir_phys_t *dd = data; time_t crtime; - char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; + char nice[6]; if (dd == NULL) return; @@ -727,12 +710,6 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); crtime = dd->dd_creation_time; - nicenum(dd->dd_used_bytes, used); - nicenum(dd->dd_compressed_bytes, compressed); - nicenum(dd->dd_uncompressed_bytes, uncompressed); - nicenum(dd->dd_quota, quota); - nicenum(dd->dd_reserved, resv); - (void) printf("\t\tcreation_time = %s", ctime(&crtime)); (void) printf("\t\thead_dataset_obj = %llu\n", (u_longlong_t)dd->dd_head_dataset_obj); @@ -742,15 +719,32 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) (u_longlong_t)dd->dd_origin_obj); (void) printf("\t\tchild_dir_zapobj = %llu\n", (u_longlong_t)dd->dd_child_dir_zapobj); - (void) printf("\t\tused_bytes = %s\n", used); - (void) printf("\t\tcompressed_bytes = %s\n", compressed); - (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); - (void) printf("\t\tquota = %s\n", quota); - (void) printf("\t\treserved = %s\n", resv); + nicenum(dd->dd_used_bytes, nice); + (void) printf("\t\tused_bytes = %s\n", nice); + nicenum(dd->dd_compressed_bytes, nice); + (void) printf("\t\tcompressed_bytes = %s\n", nice); + nicenum(dd->dd_uncompressed_bytes, nice); + (void) printf("\t\tuncompressed_bytes = %s\n", nice); + nicenum(dd->dd_quota, nice); + (void) printf("\t\tquota = %s\n", nice); + nicenum(dd->dd_reserved, nice); + (void) printf("\t\treserved = %s\n", nice); (void) printf("\t\tprops_zapobj = %llu\n", (u_longlong_t)dd->dd_props_zapobj); (void) printf("\t\tdeleg_zapobj = %llu\n", (u_longlong_t)dd->dd_deleg_zapobj); + (void) printf("\t\tflags = %llx\n", + (u_longlong_t)dd->dd_flags); + +#define DO(which) \ + nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \ + (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) + DO(HEAD); + DO(SNAP); + DO(CHILD); + DO(CHILD_RSRV); + DO(REFRSRV); +#undef DO } /*ARGSUSED*/ @@ -773,7 +767,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) nicenum(ds->ds_unique_bytes, unique); sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp); - (void) printf("\t\tdataset_obj = %llu\n", + (void) printf("\t\tdir_obj = %llu\n", (u_longlong_t)ds->ds_dir_obj); (void) printf("\t\tprev_snap_obj = %llu\n", (u_longlong_t)ds->ds_prev_snap_obj); @@ -800,6 +794,10 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) (u_longlong_t)ds->ds_guid); (void) printf("\t\tflags = %llx\n", (u_longlong_t)ds->ds_flags); + (void) printf("\t\tnext_clones_obj = %llu\n", + (u_longlong_t)ds->ds_next_clones_obj); + (void) printf("\t\tprops_obj = %llu\n", + (u_longlong_t)ds->ds_props_obj); (void) printf("\t\tbp = %s\n", blkbuf); } @@ -1007,6 +1005,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { dump_uint8, /* ZFS SYSACL */ dump_none, /* FUID nvlist */ dump_packed_nvlist, /* FUID nvlist size */ + dump_zap, /* DSL dataset next clones */ + dump_zap, /* DSL scrub queue */ }; static void @@ -1076,7 +1076,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) } if (verbosity >= 5) - dump_indirect(os, object, NULL, 0); + dump_indirect(dn); if (verbosity >= 5) { /* @@ -1093,13 +1093,13 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) } for (;;) { - error = dnode_next_offset(dn, B_FALSE, &start, minlvl, - blkfill, 0); + error = dnode_next_offset(dn, + 0, &start, minlvl, blkfill, 0); if (error) break; end = start; - error = dnode_next_offset(dn, B_TRUE, &end, minlvl, - blkfill, 0); + error = dnode_next_offset(dn, + DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); nicenum(end - start, segsize); (void) printf("\t\tsegment [%016llx, %016llx)" " size %5s\n", (u_longlong_t)start, @@ -1139,8 +1139,8 @@ dump_dir(objset_t *os) if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; usedobjs = os->os->os_rootbp->blk_fill; - refdbytes = - os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; + refdbytes = os->os->os_spa->spa_dsl_pool-> + dp_mos_dir->dd_phys->dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } @@ -1174,6 +1174,9 @@ dump_dir(objset_t *os) if (verbosity < 2) return; + if (os->os->os_rootbp->blk_birth == 0) + return; + if (zopt_objects != 0) { for (i = 0; i < zopt_objects; i++) dump_object(os, zopt_object[i], verbosity, @@ -1235,6 +1238,52 @@ dump_config(const char *pool) } static void +dump_cachefile(const char *cachefile) +{ + int fd; + struct stat64 statbuf; + char *buf; + nvlist_t *config; + + if ((fd = open64(cachefile, O_RDONLY)) < 0) { + (void) printf("cannot open '%s': %s\n", cachefile, + strerror(errno)); + exit(1); + } + + if (fstat64(fd, &statbuf) != 0) { + (void) printf("failed to stat '%s': %s\n", cachefile, + strerror(errno)); + exit(1); + } + + if ((buf = malloc(statbuf.st_size)) == NULL) { + (void) fprintf(stderr, "failed to allocate %llu bytes\n", + (u_longlong_t)statbuf.st_size); + exit(1); + } + + if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { + (void) fprintf(stderr, "failed to read %llu bytes\n", + (u_longlong_t)statbuf.st_size); + exit(1); + } + + (void) close(fd); + + if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { + (void) fprintf(stderr, "failed to unpack nvlist\n"); + exit(1); + } + + free(buf); + + dump_nvlist(config, 0); + + nvlist_free(config); +} + +static void dump_label(const char *dev) { int fd; @@ -1290,7 +1339,7 @@ dump_one_dir(char *dsname, void *arg) objset_t *os; error = dmu_objset_open(dsname, DMU_OST_ANY, - DS_MODE_STANDARD | DS_MODE_READONLY, &os); + DS_MODE_USER | DS_MODE_READONLY, &os); if (error) { (void) printf("Could not open %s\n", dsname); return (0); @@ -1302,159 +1351,72 @@ dump_one_dir(char *dsname, void *arg) } static void -zdb_space_map_load(spa_t *spa) +zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) { - vdev_t *rvd = spa->spa_root_vdev; - vdev_t *vd; - int c, m, error; + vdev_t *vd = sm->sm_ppd; - for (c = 0; c < rvd->vdev_children; c++) { - vd = rvd->vdev_child[c]; - for (m = 0; m < vd->vdev_ms_count; m++) { - metaslab_t *msp = vd->vdev_ms[m]; - mutex_enter(&msp->ms_lock); - error = space_map_load(&msp->ms_allocmap[0], NULL, - SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset); - mutex_exit(&msp->ms_lock); - if (error) - fatal("%s bad space map #%d, error %d", - spa->spa_name, c, error); - } - } + (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", + (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); } -static int -zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb) +/* ARGSUSED */ +static void +zdb_space_map_load(space_map_t *sm) { - dva_t *dva = bp->blk_dva; - vdev_t *vd; - metaslab_t *msp; - space_map_t *allocmap, *freemap; - int error; - int d; - blkptr_t blk = *bp; - - for (d = 0; d < BP_GET_NDVAS(bp); d++) { - uint64_t vdev = DVA_GET_VDEV(&dva[d]); - uint64_t offset = DVA_GET_OFFSET(&dva[d]); - uint64_t size = DVA_GET_ASIZE(&dva[d]); - - if ((vd = vdev_lookup_top(spa, vdev)) == NULL) - return (ENXIO); - - if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) - return (ENXIO); - - msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; - allocmap = &msp->ms_allocmap[0]; - freemap = &msp->ms_freemap[0]; - - /* Prepare our copy of the bp in case we need to read GBHs */ - if (DVA_GET_GANG(&dva[d])) { - size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); - DVA_SET_ASIZE(&blk.blk_dva[d], size); - DVA_SET_GANG(&blk.blk_dva[d], 0); - } - - mutex_enter(&msp->ms_lock); - if (space_map_contains(freemap, offset, size)) { - mutex_exit(&msp->ms_lock); - return (EAGAIN); /* allocated more than once */ - } - - if (!space_map_contains(allocmap, offset, size)) { - mutex_exit(&msp->ms_lock); - return (ESTALE); /* not allocated at all */ - } - - space_map_remove(allocmap, offset, size); - space_map_add(freemap, offset, size); - - mutex_exit(&msp->ms_lock); - } - - if (BP_IS_GANG(bp)) { - zio_gbh_phys_t gbh; - int g; - - /* LINTED - compile time assert */ - ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); - - BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); - BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); - BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); - BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); - error = zio_wait(zio_read(NULL, spa, &blk, &gbh, - SPA_GANGBLOCKSIZE, NULL, NULL, ZIO_PRIORITY_SYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb)); - if (error) - return (error); - if (BP_SHOULD_BYTESWAP(&blk)) - byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); - for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { - if (BP_IS_HOLE(&gbh.zg_blkptr[g])) - break; - error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb); - if (error) - return (error); - } - } - - return (0); } static void -zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) +zdb_space_map_unload(space_map_t *sm) { - metaslab_t *msp; - - /* LINTED */ - msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); + space_map_vacate(sm, zdb_leak, sm); +} - (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", - (u_longlong_t)msp->ms_group->mg_vd->vdev_id, - (u_longlong_t)start, - (u_longlong_t)size); +/* ARGSUSED */ +static void +zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size) +{ } +static space_map_ops_t zdb_space_map_ops = { + zdb_space_map_load, + zdb_space_map_unload, + NULL, /* alloc */ + zdb_space_map_claim, + NULL /* free */ +}; + static void -zdb_space_map_unload(spa_t *spa) +zdb_leak_init(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; - vdev_t *vd; - int c, m; - for (c = 0; c < rvd->vdev_children; c++) { - vd = rvd->vdev_child[c]; - for (m = 0; m < vd->vdev_ms_count; m++) { + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; + for (int m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; mutex_enter(&msp->ms_lock); - space_map_vacate(&msp->ms_allocmap[0], zdb_leak, - &msp->ms_allocmap[0]); - space_map_unload(&msp->ms_allocmap[0]); - space_map_vacate(&msp->ms_freemap[0], NULL, NULL); + VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops, + SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0); + msp->ms_map.sm_ppd = vd; mutex_exit(&msp->ms_lock); } } } static void -zdb_refresh_ubsync(spa_t *spa) +zdb_leak_fini(spa_t *spa) { - uberblock_t ub = { 0 }; vdev_t *rvd = spa->spa_root_vdev; - zio_t *zio; - - /* - * Reload the uberblock. - */ - zio = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); - vdev_uberblock_load(zio, rvd, &ub); - (void) zio_wait(zio); - if (ub.ub_txg != 0) - spa->spa_ubsync = ub; + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; + for (int m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + mutex_enter(&msp->ms_lock); + space_map_unload(&msp->ms_map); + mutex_exit(&msp->ms_lock); + } + } } /* @@ -1471,22 +1433,19 @@ typedef struct zdb_blkstats { #define DMU_OT_DEFERRED DMU_OT_NONE #define DMU_OT_TOTAL DMU_OT_NUMTYPES -#define ZB_TOTAL ZB_MAXLEVEL +#define ZB_TOTAL DN_MAX_LEVELS typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; uint64_t zcb_errors[256]; - traverse_blk_cache_t *zcb_cache; int zcb_readfails; int zcb_haderrors; } zdb_cb_t; static void -zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) +zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type) { - int i, error; - - for (i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; int t = (i & 1) ? type : DMU_OT_TOTAL; zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; @@ -1500,7 +1459,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) if (dump_opt['S']) { boolean_t print_sig; - print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 && + print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS); if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg) @@ -1522,100 +1481,80 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) } } - if (dump_opt['L']) - return; - - error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark); - - if (error == 0) - return; - - if (error == EAGAIN) - (void) fatal("double-allocation, bp=%p", bp); - - if (error == ESTALE) - (void) fatal("reference to freed block, bp=%p", bp); - - (void) fatal("fatal error %d in bp %p", error, bp); + VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp, + NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0); } static int -zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, + const dnode_phys_t *dnp, void *arg) { - zbookmark_t *zb = &bc->bc_bookmark; zdb_cb_t *zcb = arg; - blkptr_t *bp = &bc->bc_blkptr; - dmu_object_type_t type = BP_GET_TYPE(bp); char blkbuf[BP_SPRINTF_LEN]; - int error = 0; - if (bc->bc_errno) { - if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { - zdb_refresh_ubsync(spa); - error = EAGAIN; - } else { + if (bp == NULL) + return (0); + + zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp)); + + if (dump_opt['c'] || dump_opt['S']) { + int ioerr, size; + void *data; + + size = BP_GET_LSIZE(bp); + data = malloc(size); + ioerr = zio_wait(zio_read(NULL, spa, bp, data, size, + NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb)); + free(data); + + /* We expect io errors on intent log */ + if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) { zcb->zcb_haderrors = 1; - zcb->zcb_errors[bc->bc_errno]++; - error = ERESTART; - } + zcb->zcb_errors[ioerr]++; - if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); - else - blkbuf[0] = '\0'; - - if (!dump_opt['S']) { - (void) printf("zdb_blkptr_cb: Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- %s\n", - bc->bc_errno, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf, - error == EAGAIN ? "retrying" : "skipping"); + if (dump_opt['b'] >= 2) + sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); + else + blkbuf[0] = '\0'; + + if (!dump_opt['S']) { + (void) printf("zdb_blkptr_cb: " + "Got error %d reading " + "<%llu, %llu, %lld, %llx> %s -- skipping\n", + ioerr, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf); + } } - - return (error); } zcb->zcb_readfails = 0; - ASSERT(!BP_IS_HOLE(bp)); - if (dump_opt['b'] >= 4) { sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); (void) printf("objset %llu object %llu offset 0x%llx %s\n", (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, - (u_longlong_t)blkid2offset(bc->bc_dnode, - zb->zb_level, zb->zb_blkid), blkbuf); + (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid), + blkbuf); } - zdb_count_block(spa, zcb, bp, type); - return (0); } static int dump_block_stats(spa_t *spa) { - traverse_handle_t *th; zdb_cb_t zcb = { 0 }; - traverse_blk_cache_t dummy_cache = { 0 }; zdb_blkstats_t *zb, *tzb; uint64_t alloc, space, logalloc; vdev_t *rvd = spa->spa_root_vdev; int leaks = 0; - int advance = zdb_advance; - int c, e, flags; - - zcb.zcb_cache = &dummy_cache; - - if (dump_opt['c'] || dump_opt['S']) - advance |= ADVANCE_DATA; - - advance |= ADVANCE_PRUNE | ADVANCE_ZIL; + int c, e; if (!dump_opt['S']) { (void) printf("\nTraversing all blocks to %sverify" @@ -1624,14 +1563,14 @@ dump_block_stats(spa_t *spa) } /* - * Load all space maps. As we traverse the pool, if we find a block - * that's not in its space map, that indicates a double-allocation, - * reference to a freed block, or an unclaimed block. Otherwise we - * remove the block from the space map. If the space maps are not - * empty when we're done, that indicates leaked blocks. + * Load all space maps as SM_ALLOC maps, then traverse the pool + * claiming each block we discover. If the pool is perfectly + * consistent, the space maps will be empty when we're done. + * Anything left over is a leak; any block we can't claim (because + * it's not part of any space map) is a double allocation, + * reference to a freed block, or an unclaimed log block. */ - if (!dump_opt['L']) - zdb_space_map_load(spa); + zdb_leak_init(spa); /* * If there's a deferred-free bplist, process that first. @@ -1657,22 +1596,7 @@ dump_block_stats(spa_t *spa) bplist_close(bpl); } - /* - * Now traverse the pool. If we're reading all data to verify - * checksums, do a scrubbing read so that we validate all copies. - */ - flags = ZIO_FLAG_CANFAIL; - if (advance & ADVANCE_DATA) - flags |= ZIO_FLAG_SCRUB; - th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); - th->th_noread = zdb_noread; - - traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES); - - while (traverse_more(th) == EAGAIN) - continue; - - traverse_fini(th); + zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb); if (zcb.zcb_haderrors && !dump_opt['S']) { (void) printf("\nError counts:\n\n"); @@ -1688,8 +1612,7 @@ dump_block_stats(spa_t *spa) /* * Report any leaked segments. */ - if (!dump_opt['L']) - zdb_space_map_unload(spa); + zdb_leak_fini(spa); /* * If we're interested in printing out the blkptr signatures, @@ -1699,10 +1622,6 @@ dump_block_stats(spa_t *spa) if (dump_opt['S']) return (zcb.zcb_haderrors ? 3 : 0); - if (dump_opt['L']) - (void) printf("\n\n *** Live pool traversal; " - "block counts are only approximate ***\n\n"); - alloc = spa_get_alloc(spa); space = spa_get_space(spa); @@ -1830,8 +1749,6 @@ dump_zpool(spa_t *spa) dsl_pool_t *dp = spa_get_dsl(spa); int rc = 0; - spa_config_enter(spa, RW_READER, FTAG); - if (dump_opt['u']) dump_uberblock(&spa->spa_uberblock); @@ -1843,7 +1760,7 @@ dump_zpool(spa_t *spa) dump_dtl(spa->spa_root_vdev, 0); dump_metaslabs(spa); } - (void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL, + (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); } @@ -1853,8 +1770,6 @@ dump_zpool(spa_t *spa) if (dump_opt['s']) show_pool_stats(spa); - spa_config_exit(spa, FTAG); - if (rc != 0) exit(rc); } @@ -2061,12 +1976,12 @@ zdb_read_block(char *thing, spa_t **spap) zio_t *zio; vdev_t *vd; void *buf; - char *s, *p, *dup, *spa_name, *vdev, *flagstr; + char *s, *p, *dup, *pool, *vdev, *flagstr; int i, error, zio_flags; dup = strdup(thing); s = strtok(dup, ":"); - spa_name = s ? s : ""; + pool = s ? s : ""; s = strtok(NULL, ":"); vdev = s ? s : ""; s = strtok(NULL, ":"); @@ -2116,14 +2031,13 @@ zdb_read_block(char *thing, spa_t **spap) } } - if (spa == NULL || spa->spa_name == NULL || - strcmp(spa->spa_name, spa_name)) { - if (spa && spa->spa_name) + if (spa == NULL || strcmp(spa_name(spa), pool) != 0) { + if (spa) spa_close(spa, (void *)zdb_read_block); - error = spa_open(spa_name, spap, (void *)zdb_read_block); + error = spa_open(pool, spap, (void *)zdb_read_block); if (error) fatal("Failed to open pool '%s': %s", - spa_name, strerror(error)); + pool, strerror(error)); spa = *spap; } @@ -2143,16 +2057,15 @@ zdb_read_block(char *thing, spa_t **spap) buf = umem_alloc(size, UMEM_NOFAIL); zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | - ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK; - - if (flags & ZDB_FLAG_PHYS) - zio_flags |= ZIO_FLAG_PHYSICAL; + ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY; + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); zio = zio_root(spa, NULL, NULL, 0); /* XXX todo - cons up a BP so RAID-Z will be happy */ zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL)); error = zio_wait(zio); + spa_config_exit(spa, SCL_STATE, FTAG); if (error) { (void) printf("Read of %s failed, error: %d\n", thing, error); @@ -2182,7 +2095,9 @@ nvlist_string_match(nvlist_t *config, char *name, char *tgt) { char *s; - verify(nvlist_lookup_string(config, name, &s) == 0); + if (nvlist_lookup_string(config, name, &s) != 0) + return (B_FALSE); + return (strcmp(s, tgt) == 0); } @@ -2191,7 +2106,9 @@ nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt) { uint64_t val; - verify(nvlist_lookup_uint64(config, name, &val) == 0); + if (nvlist_lookup_uint64(config, name, &val) != 0) + return (B_FALSE); + return (val == tgt); } @@ -2265,19 +2182,16 @@ pool_match(nvlist_t *config, char *tgt) } static int -find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir, - char *cachefile) +find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir) { nvlist_t *pools; int error = ENOENT; nvlist_t *match = NULL; if (vdev_dir != NULL) - pools = zpool_find_import(g_zfs, 1, &vdev_dir, B_TRUE); - else if (cachefile != NULL) - pools = zpool_find_import_cached(g_zfs, cachefile, B_TRUE); + pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir); else - pools = zpool_find_import(g_zfs, 0, NULL, B_TRUE); + pools = zpool_find_import_activeok(g_zfs, 0, NULL); if (pools != NULL) { nvpair_t *elem = NULL; @@ -2313,17 +2227,15 @@ main(int argc, char **argv) int dump_all = 1; int verbose = 0; int error; - int flag, set; int exported = 0; char *vdev_dir = NULL; - char *cachefile = NULL; (void) setrlimit(RLIMIT_NOFILE, &rl); (void) enable_extended_FILE_stdio(-1, -1); dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) { + while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) { switch (c) { case 'u': case 'd': @@ -2337,54 +2249,11 @@ main(int argc, char **argv) dump_opt[c]++; dump_all = 0; break; - case 'L': - dump_opt[c]++; - break; - case 'O': - endstr = optarg; - if (endstr[0] == '!') { - endstr++; - set = 0; - } else { - set = 1; - } - if (strcmp(endstr, "post") == 0) { - flag = ADVANCE_PRE; - set = !set; - } else if (strcmp(endstr, "pre") == 0) { - flag = ADVANCE_PRE; - } else if (strcmp(endstr, "prune") == 0) { - flag = ADVANCE_PRUNE; - } else if (strcmp(endstr, "data") == 0) { - flag = ADVANCE_DATA; - } else if (strcmp(endstr, "holes") == 0) { - flag = ADVANCE_HOLES; - } else { - usage(); - } - if (set) - zdb_advance |= flag; - else - zdb_advance &= ~flag; - break; - case 'B': - endstr = optarg - 1; - zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); - zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); - zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); - zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); - (void) printf("simulating bad block " - "<%llu, %llu, %lld, %llx>\n", - (u_longlong_t)zdb_noread.zb_objset, - (u_longlong_t)zdb_noread.zb_object, - (u_longlong_t)zdb_noread.zb_level, - (u_longlong_t)zdb_noread.zb_blkid); - break; case 'v': verbose++; break; case 'U': - cachefile = optarg; + spa_config_path = optarg; break; case 'e': exported = 1; @@ -2416,21 +2285,17 @@ main(int argc, char **argv) } } - if (vdev_dir != NULL && exported == 0) - (void) fatal("-p option requires use of -e\n"); + if (vdev_dir != NULL && exported == 0) { + (void) fprintf(stderr, "-p option requires use of -e\n"); + usage(); + } kernel_init(FREAD); g_zfs = libzfs_init(); ASSERT(g_zfs != NULL); - /* - * Disable vdev caching. If we don't do this, live pool traversal - * won't make progress because it will never see disk updates. - */ - zfs_vdev_cache_size = 0; - for (c = 0; c < 256; c++) { - if (dump_all && c != 'L' && c != 'l' && c != 'R') + if (dump_all && c != 'l' && c != 'R') dump_opt[c] = 1; if (dump_opt[c]) dump_opt[c] += verbose; @@ -2441,7 +2306,7 @@ main(int argc, char **argv) if (argc < 1) { if (dump_opt['C']) { - dump_config(NULL); + dump_cachefile(spa_config_path); return (0); } usage(); @@ -2476,21 +2341,18 @@ main(int argc, char **argv) if (dump_opt['C']) dump_config(argv[0]); - if (exported == 0 && cachefile == NULL) { - if (strchr(argv[0], '/') != NULL) { - error = dmu_objset_open(argv[0], DMU_OST_ANY, - DS_MODE_STANDARD | DS_MODE_READONLY, &os); - } else { - error = spa_open(argv[0], &spa, FTAG); - } - } else { + error = 0; + if (exported) { /* * Check to see if the name refers to an exported zpool */ + char *slash; nvlist_t *exported_conf = NULL; - error = find_exported_zpool(argv[0], &exported_conf, vdev_dir, - cachefile); + if ((slash = strchr(argv[0], '/')) != NULL) + *slash = '\0'; + + error = find_exported_zpool(argv[0], &exported_conf, vdev_dir); if (error == 0) { nvlist_t *nvl = NULL; @@ -2504,12 +2366,23 @@ main(int argc, char **argv) } if (error == 0) - error = spa_import(argv[0], exported_conf, nvl); - if (error == 0) - error = spa_open(argv[0], &spa, FTAG); + error = spa_import_faulted(argv[0], + exported_conf, nvl); nvlist_free(nvl); } + + if (slash != NULL) + *slash = '/'; + } + + if (error == 0) { + if (strchr(argv[0], '/') != NULL) { + error = dmu_objset_open(argv[0], DMU_OST_ANY, + DS_MODE_USER | DS_MODE_READONLY, &os); + } else { + error = spa_open(argv[0], &spa, FTAG); + } } if (error)