Add AUTHORS to master branch
[zfs.git] / module / zfs / dsl_scrub.c
index dbdfe8c..03ebb90 100644 (file)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,6 +45,8 @@ typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
 
 static scrub_cb_t dsl_pool_scrub_clean_cb;
 static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
+static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+    uint64_t objset, uint64_t object);
 
 int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */
 int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */
@@ -95,6 +97,9 @@ dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
                            ESC_ZFS_RESILVER_START);
                        dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg,
                            tx->tx_txg);
+               } else {
+                       spa_event_notify(dp->dp_spa, NULL,
+                           ESC_ZFS_SCRUB_START);
                }
 
                /* zero out the scrub stats in all vdev_stat_t's */
@@ -212,8 +217,9 @@ dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
         */
        vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg,
            *completep ? dp->dp_scrub_max_txg : 0, B_TRUE);
-       if (dp->dp_scrub_min_txg && *completep)
-               spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_FINISH);
+       if (*completep)
+               spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ?
+                   ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
        spa_errlog_rotate(dp->dp_spa);
 
        /*
@@ -344,6 +350,12 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
        if (bp->blk_birth <= dp->dp_scrub_min_txg)
                return;
 
+       /*
+        * One block ("stubby") can be allocated a long time ago; we
+        * want to visit that one because it has been allocated
+        * (on-disk) even if it hasn't been claimed (even though for
+        * plain scrub there's nothing to do to it).
+        */
        if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
                return;
 
@@ -369,6 +381,11 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
                if (bp->blk_birth <= dp->dp_scrub_min_txg)
                        return;
 
+               /*
+                * birth can be < claim_txg if this record's txg is
+                * already txg sync'ed (but this log block contains
+                * other records that are not synced)
+                */
                if (claim_txg == 0 || bp->blk_birth < claim_txg)
                        return;
 
@@ -468,7 +485,7 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
        } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
                uint32_t flags = ARC_WAIT;
                dnode_phys_t *child_dnp;
-               int i, j;
+               int i;
                int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
                err = arc_read(NULL, dp->dp_spa, bp, pbuf,
@@ -483,20 +500,12 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
                child_dnp = buf->b_data;
 
                for (i = 0; i < epb; i++, child_dnp++) {
-                       for (j = 0; j < child_dnp->dn_nblkptr; j++) {
-                               zbookmark_t czb;
-
-                               SET_BOOKMARK(&czb, zb->zb_objset,
-                                   zb->zb_blkid * epb + i,
-                                   child_dnp->dn_nlevels - 1, j);
-                               scrub_visitbp(dp, child_dnp, buf,
-                                   &child_dnp->dn_blkptr[j], &czb);
-                       }
+                       scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
+                           zb->zb_blkid * epb + i);
                }
        } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
                uint32_t flags = ARC_WAIT;
                objset_phys_t *osp;
-               int j;
 
                err = arc_read_nolock(NULL, dp->dp_spa, bp,
                    arc_getbuf_func, &buf,
@@ -512,13 +521,13 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
 
                traverse_zil(dp, &osp->os_zil_header);
 
-               for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
-                       zbookmark_t czb;
-
-                       SET_BOOKMARK(&czb, zb->zb_objset, 0,
-                           osp->os_meta_dnode.dn_nlevels - 1, j);
-                       scrub_visitbp(dp, &osp->os_meta_dnode, buf,
-                           &osp->os_meta_dnode.dn_blkptr[j], &czb);
+               scrub_visitdnode(dp, &osp->os_meta_dnode,
+                   buf, zb->zb_objset, 0);
+               if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+                       scrub_visitdnode(dp, &osp->os_userused_dnode,
+                           buf, zb->zb_objset, 0);
+                       scrub_visitdnode(dp, &osp->os_groupused_dnode,
+                           buf, zb->zb_objset, 0);
                }
        }
 
@@ -528,6 +537,21 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
 }
 
 static void
+scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+    uint64_t objset, uint64_t object)
+{
+       int j;
+
+       for (j = 0; j < dnp->dn_nblkptr; j++) {
+               zbookmark_t czb;
+
+               SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+               scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
+       }
+
+}
+
+static void
 scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
 {
        zbookmark_t zb;
@@ -1000,6 +1024,8 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
 int
 dsl_pool_scrub_clean(dsl_pool_t *dp)
 {
+       spa_t *spa = dp->dp_spa;
+
        /*
         * Purge all vdev caches.  We do this here rather than in sync
         * context because this requires a writer lock on the spa_config
@@ -1007,11 +1033,11 @@ dsl_pool_scrub_clean(dsl_pool_t *dp)
         * spa_scrub_reopen flag indicates that vdev_open() should not
         * attempt to start another scrub.
         */
-       spa_config_enter(dp->dp_spa, SCL_ALL, FTAG, RW_WRITER);
-       dp->dp_spa->spa_scrub_reopen = B_TRUE;
-       vdev_reopen(dp->dp_spa->spa_root_vdev);
-       dp->dp_spa->spa_scrub_reopen = B_FALSE;
-       spa_config_exit(dp->dp_spa, SCL_ALL, FTAG);
+       spa_vdev_state_enter(spa);
+       spa->spa_scrub_reopen = B_TRUE;
+       vdev_reopen(spa->spa_root_vdev);
+       spa->spa_scrub_reopen = B_FALSE;
+       (void) spa_vdev_state_exit(spa, NULL, 0);
 
        return (dsl_pool_scrub_setup(dp, SCRUB_FUNC_CLEAN));
 }