+boolean_t
+dmu_objset_is_dirty(objset_t *os, uint64_t txg)
+{
+ return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
+ !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
+}
+
+boolean_t
+dmu_objset_is_dirty_anywhere(objset_t *os)
+{
+ int t;
+
+ for (t = 0; t < TXG_SIZE; t++)
+ if (dmu_objset_is_dirty(os, t))
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+
+void
+dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+{
+ used_cbs[ost] = cb;
+}
+
+boolean_t
+dmu_objset_userused_enabled(objset_t *os)
+{
+ return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
+ used_cbs[os->os_phys->os_type] != NULL &&
+ DMU_USERUSED_DNODE(os) != NULL);
+}
+
+static void
+do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
+ uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
+{
+ if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
+ int64_t delta = DNODE_SIZE + used;
+ if (subtract)
+ delta = -delta;
+ VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
+ user, delta, tx));
+ VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
+ group, delta, tx));
+ }
+}
+
+void
+dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ list_t *list = &os->os_synced_dnodes;
+
+ ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+
+ while ((dn = list_head(list)) != NULL) {
+ int flags;
+ ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
+ ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
+ dn->dn_phys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED);
+
+ /* Allocate the user/groupused objects if necessary. */
+ if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
+ VERIFY(0 == zap_create_claim(os,
+ DMU_USERUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ VERIFY(0 == zap_create_claim(os,
+ DMU_GROUPUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ }
+
+ /*
+ * We intentionally modify the zap object even if the
+ * net delta is zero. Otherwise
+ * the block of the zap obj could be shared between
+ * datasets but need to be different between them after
+ * a bprewrite.
+ */
+
+ flags = dn->dn_id_flags;
+ ASSERT(flags);
+ if (flags & DN_ID_OLD_EXIST) {
+ do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
+ dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
+ }
+ if (flags & DN_ID_NEW_EXIST) {
+ do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
+ dn->dn_phys->dn_flags, dn->dn_newuid,
+ dn->dn_newgid, B_FALSE, tx);
+ }
+
+ mutex_enter(&dn->dn_mtx);
+ dn->dn_oldused = 0;
+ dn->dn_oldflags = 0;
+ if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
+ dn->dn_olduid = dn->dn_newuid;
+ dn->dn_oldgid = dn->dn_newgid;
+ dn->dn_id_flags |= DN_ID_OLD_EXIST;
+ if (dn->dn_bonuslen == 0)
+ dn->dn_id_flags |= DN_ID_CHKED_SPILL;
+ else
+ dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+ }
+ dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
+ mutex_exit(&dn->dn_mtx);
+
+ list_remove(list, dn);
+ dnode_rele(dn, list);
+ }
+}
+
+/*
+ * Returns a pointer to data to find uid/gid from
+ *
+ * If a dirty record for transaction group that is syncing can't
+ * be found then NULL is returned. In the NULL case it is assumed
+ * the uid/gid aren't changing.
+ */
+static void *
+dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
+{
+ dbuf_dirty_record_t *dr, **drp;
+ void *data;
+
+ if (db->db_dirtycnt == 0)
+ return (db->db.db_data); /* Nothing is changing */
+
+ for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
+ if (dr->dr_txg == tx->tx_txg)
+ break;
+
+ if (dr == NULL) {
+ data = NULL;
+ } else {
+ dnode_t *dn;
+
+ DB_DNODE_ENTER(dr->dr_dbuf);
+ dn = DB_DNODE(dr->dr_dbuf);
+
+ if (dn->dn_bonuslen == 0 &&
+ dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
+ data = dr->dt.dl.dr_data->b_data;
+ else
+ data = dr->dt.dl.dr_data;
+
+ DB_DNODE_EXIT(dr->dr_dbuf);
+ }
+
+ return (data);
+}
+
+void
+dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
+{
+ objset_t *os = dn->dn_objset;
+ void *data = NULL;
+ dmu_buf_impl_t *db = NULL;
+ uint64_t *user = NULL, *group = NULL;
+ int flags = dn->dn_id_flags;
+ int error;
+ boolean_t have_spill = B_FALSE;
+
+ if (!dmu_objset_userused_enabled(dn->dn_objset))
+ return;
+
+ if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
+ DN_ID_CHKED_SPILL)))
+ return;
+
+ if (before && dn->dn_bonuslen != 0)
+ data = DN_BONUS(dn->dn_phys);
+ else if (!before && dn->dn_bonuslen != 0) {
+ if (dn->dn_bonus) {
+ db = dn->dn_bonus;
+ mutex_enter(&db->db_mtx);
+ data = dmu_objset_userquota_find_data(db, tx);
+ } else {
+ data = DN_BONUS(dn->dn_phys);
+ }
+ } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
+ int rf = 0;
+
+ if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
+ rf |= DB_RF_HAVESTRUCT;
+ error = dmu_spill_hold_by_dnode(dn,
+ rf | DB_RF_MUST_SUCCEED,
+ FTAG, (dmu_buf_t **)&db);
+ ASSERT(error == 0);
+ mutex_enter(&db->db_mtx);
+ data = (before) ? db->db.db_data :
+ dmu_objset_userquota_find_data(db, tx);
+ have_spill = B_TRUE;
+ } else {
+ mutex_enter(&dn->dn_mtx);
+ dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+ mutex_exit(&dn->dn_mtx);
+ return;
+ }
+
+ if (before) {
+ ASSERT(data);
+ user = &dn->dn_olduid;
+ group = &dn->dn_oldgid;
+ } else if (data) {
+ user = &dn->dn_newuid;
+ group = &dn->dn_newgid;
+ }
+
+ /*
+ * Must always call the callback in case the object
+ * type has changed and that type isn't an object type to track
+ */
+ error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
+ user, group);
+
+ /*
+ * Preserve existing uid/gid when the callback can't determine
+ * what the new uid/gid are and the callback returned EEXIST.
+ * The EEXIST error tells us to just use the existing uid/gid.
+ * If we don't know what the old values are then just assign
+ * them to 0, since that is a new file being created.
+ */
+ if (!before && data == NULL && error == EEXIST) {
+ if (flags & DN_ID_OLD_EXIST) {
+ dn->dn_newuid = dn->dn_olduid;
+ dn->dn_newgid = dn->dn_oldgid;
+ } else {
+ dn->dn_newuid = 0;
+ dn->dn_newgid = 0;
+ }
+ error = 0;
+ }
+
+ if (db)
+ mutex_exit(&db->db_mtx);
+
+ mutex_enter(&dn->dn_mtx);
+ if (error == 0 && before)
+ dn->dn_id_flags |= DN_ID_OLD_EXIST;
+ if (error == 0 && !before)
+ dn->dn_id_flags |= DN_ID_NEW_EXIST;
+
+ if (have_spill) {
+ dn->dn_id_flags |= DN_ID_CHKED_SPILL;
+ } else {
+ dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+ }
+ mutex_exit(&dn->dn_mtx);
+ if (have_spill)
+ dmu_buf_rele((dmu_buf_t *)db, FTAG);
+}
+
+boolean_t
+dmu_objset_userspace_present(objset_t *os)
+{
+ return (os->os_phys->os_flags &
+ OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+}
+
+int
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+ uint64_t obj;
+ int err = 0;
+
+ if (dmu_objset_userspace_present(os))
+ return (0);
+ if (!dmu_objset_userused_enabled(os))
+ return (ENOTSUP);
+ if (dmu_objset_is_snapshot(os))
+ return (EINVAL);
+
+ /*
+ * We simply need to mark every object dirty, so that it will be
+ * synced out and now accounted. If this is called
+ * concurrently, or if we already did some work before crashing,
+ * that's fine, since we track each object's accounted state
+ * independently.
+ */
+
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ int objerr;
+
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ return (EINTR);
+
+ objerr = dmu_bonus_hold(os, obj, FTAG, &db);
+ if (objerr)
+ continue;
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, obj);
+ objerr = dmu_tx_assign(tx, TXG_WAIT);
+ if (objerr) {
+ dmu_tx_abort(tx);
+ continue;
+ }
+ dmu_buf_will_dirty(db, tx);
+ dmu_buf_rele(db, FTAG);
+ dmu_tx_commit(tx);
+ }
+
+ os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ return (0);
+}
+