Add Solaris FMA style support
[zfs.git] / module / zfs / dbuf.c
index 113fa1f..1b6f242 100644 (file)
@@ -329,7 +329,7 @@ dbuf_verify(dmu_buf_impl_t *db)
                if (db->db_parent == dn->dn_dbuf) {
                        /* db is pointed to by the dnode */
                        /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
-                       if (db->db.db_object == DMU_META_DNODE_OBJECT)
+                       if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
                                ASSERT(db->db_parent == NULL);
                        else
                                ASSERT(db->db_parent != NULL);
@@ -465,15 +465,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
        ASSERT(db->db_buf == NULL);
 
        if (db->db_blkid == DB_BONUS_BLKID) {
-               int bonuslen = dn->dn_bonuslen;
+               int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
 
                ASSERT3U(bonuslen, <=, db->db.db_size);
                db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
                arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
                if (bonuslen < DN_MAX_BONUSLEN)
                        bzero(db->db.db_data, DN_MAX_BONUSLEN);
-               bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
-                   bonuslen);
+               if (bonuslen)
+                       bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
                dbuf_update_data(db);
                db->db_state = DB_CACHED;
                mutex_exit(&db->db_mtx);
@@ -908,15 +908,11 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
         * Shouldn't dirty a regular buffer in syncing context.  Private
         * objects may be dirtied in syncing context, but only if they
         * were already pre-dirtied in open context.
-        * XXX We may want to prohibit dirtying in syncing context even
-        * if they did pre-dirty.
         */
        ASSERT(!dmu_tx_is_syncing(tx) ||
            BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
-           dn->dn_object == DMU_META_DNODE_OBJECT ||
-           dn->dn_objset->os_dsl_dataset == NULL ||
-           dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir));
-
+           DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+           dn->dn_objset->os_dsl_dataset == NULL);
        /*
         * We make this assert for private objects as well, but after we
         * check if we're already dirty.  They are allowed to re-dirty
@@ -975,7 +971,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
        /*
         * Only valid if not already dirty.
         */
-       ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
+       ASSERT(dn->dn_object == 0 ||
+           dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
            (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
 
        ASSERT3U(dn->dn_nlevels, >, db->db_level);
@@ -987,15 +984,13 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 
        /*
         * We should only be dirtying in syncing context if it's the
-        * mos, a spa os, or we're initializing the os.  However, we are
-        * allowed to dirty in syncing context provided we already
-        * dirtied it in open context.  Hence we must make this
-        * assertion only if we're not already dirty.
+        * mos or we're initializing the os or it's a special object.
+        * However, we are allowed to dirty in syncing context provided
+        * we already dirtied it in open context.  Hence we must make
+        * this assertion only if we're not already dirty.
         */
-       ASSERT(!dmu_tx_is_syncing(tx) ||
-           os->os_dsl_dataset == NULL ||
-           !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
-           !BP_IS_HOLE(os->os_rootbp));
+       ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+           os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
        ASSERT(db->db.db_size != 0);
 
        dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@@ -1312,6 +1307,68 @@ dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
 }
 
 /*
+ * Directly assign a provided arc buf to a given dbuf if it's not referenced
+ * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
+ */
+void
+dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
+{
+       ASSERT(!refcount_is_zero(&db->db_holds));
+       ASSERT(db->db_dnode->dn_object != DMU_META_DNODE_OBJECT);
+       ASSERT(db->db_blkid != DB_BONUS_BLKID);
+       ASSERT(db->db_level == 0);
+       ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
+       ASSERT(buf != NULL);
+       ASSERT(arc_buf_size(buf) == db->db.db_size);
+       ASSERT(tx->tx_txg != 0);
+
+       arc_return_buf(buf, db);
+       ASSERT(arc_released(buf));
+
+       mutex_enter(&db->db_mtx);
+
+       while (db->db_state == DB_READ || db->db_state == DB_FILL)
+               cv_wait(&db->db_changed, &db->db_mtx);
+
+       ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
+
+       if (db->db_state == DB_CACHED &&
+           refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+               mutex_exit(&db->db_mtx);
+               (void) dbuf_dirty(db, tx);
+               bcopy(buf->b_data, db->db.db_data, db->db.db_size);
+               VERIFY(arc_buf_remove_ref(buf, db) == 1);
+               return;
+       }
+
+       if (db->db_state == DB_CACHED) {
+               dbuf_dirty_record_t *dr = db->db_last_dirty;
+
+               ASSERT(db->db_buf != NULL);
+               if (dr != NULL && dr->dr_txg == tx->tx_txg) {
+                       ASSERT(dr->dt.dl.dr_data == db->db_buf);
+                       if (!arc_released(db->db_buf)) {
+                               ASSERT(dr->dt.dl.dr_override_state ==
+                                   DR_OVERRIDDEN);
+                               arc_release(db->db_buf, db);
+                       }
+                       dr->dt.dl.dr_data = buf;
+                       VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+               } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
+                       arc_release(db->db_buf, db);
+                       VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+               }
+               db->db_buf = NULL;
+       }
+       ASSERT(db->db_buf == NULL);
+       dbuf_set_data(db, buf);
+       db->db_state = DB_FILL;
+       mutex_exit(&db->db_mtx);
+       (void) dbuf_dirty(db, tx);
+       dbuf_fill_done(db, tx);
+}
+
+/*
  * "Clear" the contents of this dbuf.  This will mark the dbuf
  * EVICTING and clear *most* of its references.  Unfortunetely,
  * when we are not holding the dn_dbufs_mtx, we can't clear the
@@ -1855,6 +1912,19 @@ dmu_buf_get_user(dmu_buf_t *db_fake)
        return (db->db_user_ptr);
 }
 
+boolean_t
+dmu_buf_freeable(dmu_buf_t *dbuf)
+{
+       boolean_t res = B_FALSE;
+       dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+
+       if (db->db_blkptr)
+               res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
+                   db->db_blkptr->blk_birth);
+
+       return (res);
+}
+
 static void
 dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
 {
@@ -1943,7 +2013,6 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
        dnode_t *dn = db->db_dnode;
        objset_impl_t *os = dn->dn_objset;
        uint64_t txg = tx->tx_txg;
-       int blksz;
 
        ASSERT(dmu_tx_is_syncing(tx));
 
@@ -2049,32 +2118,25 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
                return;
        }
 
-       if (db->db_state != DB_NOFILL) {
-               blksz = arc_buf_size(*datap);
-
-               if (dn->dn_object != DMU_META_DNODE_OBJECT) {
-                       /*
-                        * If this buffer is currently "in use" (i.e., there
-                        * are active holds and db_data still references it),
-                        * then make a copy before we start the write so that
-                        * any modifications from the open txg will not leak
-                        * into this write.
-                        *
-                        * NOTE: this copy does not need to be made for
-                        * objects only modified in the syncing context (e.g.
-                        * DNONE_DNODE blocks).
-                        */
-                       if (refcount_count(&db->db_holds) > 1 &&
-                           *datap == db->db_buf) {
-                               arc_buf_contents_t type =
-                                   DBUF_GET_BUFC_TYPE(db);
-                               *datap =
-                                   arc_buf_alloc(os->os_spa, blksz, db, type);
-                               bcopy(db->db.db_data, (*datap)->b_data, blksz);
-                       }
-               }
-
-               ASSERT(*datap != NULL);
+       if (db->db_state != DB_NOFILL &&
+           dn->dn_object != DMU_META_DNODE_OBJECT &&
+           refcount_count(&db->db_holds) > 1 &&
+           *datap == db->db_buf) {
+               /*
+                * If this buffer is currently "in use" (i.e., there
+                * are active holds and db_data still references it),
+                * then make a copy before we start the write so that
+                * any modifications from the open txg will not leak
+                * into this write.
+                *
+                * NOTE: this copy does not need to be made for
+                * objects only modified in the syncing context (e.g.
+                * DNONE_DNODE blocks).
+                */
+               int blksz = arc_buf_size(*datap);
+               arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
+               *datap = arc_buf_alloc(os->os_spa, blksz, db, type);
+               bcopy(db->db.db_data, (*datap)->b_data, blksz);
        }
        db->db_data_pending = dr;