Update core ZFS code from build 121 to build 141.
[zfs.git] / module / zfs / dnode_sync.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24
25 #include <sys/zfs_context.h>
26 #include <sys/dbuf.h>
27 #include <sys/dnode.h>
28 #include <sys/dmu.h>
29 #include <sys/dmu_tx.h>
30 #include <sys/dmu_objset.h>
31 #include <sys/dsl_dataset.h>
32 #include <sys/spa.h>
33
34 static void
35 dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
36 {
37         dmu_buf_impl_t *db;
38         int txgoff = tx->tx_txg & TXG_MASK;
39         int nblkptr = dn->dn_phys->dn_nblkptr;
40         int old_toplvl = dn->dn_phys->dn_nlevels - 1;
41         int new_level = dn->dn_next_nlevels[txgoff];
42         int i;
43
44         rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
45
46         /* this dnode can't be paged out because it's dirty */
47         ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
48         ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
49         ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
50
51         db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
52         ASSERT(db != NULL);
53
54         dn->dn_phys->dn_nlevels = new_level;
55         dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
56             dn->dn_object, dn->dn_phys->dn_nlevels);
57
58         /* check for existing blkptrs in the dnode */
59         for (i = 0; i < nblkptr; i++)
60                 if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
61                         break;
62         if (i != nblkptr) {
63                 /* transfer dnode's block pointers to new indirect block */
64                 (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
65                 ASSERT(db->db.db_data);
66                 ASSERT(arc_released(db->db_buf));
67                 ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
68                 bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
69                     sizeof (blkptr_t) * nblkptr);
70                 arc_buf_freeze(db->db_buf);
71         }
72
73         /* set dbuf's parent pointers to new indirect buf */
74         for (i = 0; i < nblkptr; i++) {
75                 dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
76
77                 if (child == NULL)
78                         continue;
79                 ASSERT3P(child->db_dnode, ==, dn);
80                 if (child->db_parent && child->db_parent != dn->dn_dbuf) {
81                         ASSERT(child->db_parent->db_level == db->db_level);
82                         ASSERT(child->db_blkptr !=
83                             &dn->dn_phys->dn_blkptr[child->db_blkid]);
84                         mutex_exit(&child->db_mtx);
85                         continue;
86                 }
87                 ASSERT(child->db_parent == NULL ||
88                     child->db_parent == dn->dn_dbuf);
89
90                 child->db_parent = db;
91                 dbuf_add_ref(db, child);
92                 if (db->db.db_data)
93                         child->db_blkptr = (blkptr_t *)db->db.db_data + i;
94                 else
95                         child->db_blkptr = NULL;
96                 dprintf_dbuf_bp(child, child->db_blkptr,
97                     "changed db_blkptr to new indirect %s", "");
98
99                 mutex_exit(&child->db_mtx);
100         }
101
102         bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
103
104         dbuf_rele(db, FTAG);
105
106         rw_exit(&dn->dn_struct_rwlock);
107 }
108
109 static int
110 free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
111 {
112         dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
113         uint64_t bytesfreed = 0;
114         int i, blocks_freed = 0;
115
116         dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
117
118         for (i = 0; i < num; i++, bp++) {
119                 if (BP_IS_HOLE(bp))
120                         continue;
121
122                 bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
123                 ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
124                 bzero(bp, sizeof (blkptr_t));
125                 blocks_freed += 1;
126         }
127         dnode_diduse_space(dn, -bytesfreed);
128         return (blocks_freed);
129 }
130
131 #ifdef ZFS_DEBUG
132 static void
133 free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
134 {
135         int off, num;
136         int i, err, epbs;
137         uint64_t txg = tx->tx_txg;
138
139         epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
140         off = start - (db->db_blkid * 1<<epbs);
141         num = end - start + 1;
142
143         ASSERT3U(off, >=, 0);
144         ASSERT3U(num, >=, 0);
145         ASSERT3U(db->db_level, >, 0);
146         ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift);
147         ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
148         ASSERT(db->db_blkptr != NULL);
149
150         for (i = off; i < off+num; i++) {
151                 uint64_t *buf;
152                 dmu_buf_impl_t *child;
153                 dbuf_dirty_record_t *dr;
154                 int j;
155
156                 ASSERT(db->db_level == 1);
157
158                 rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
159                 err = dbuf_hold_impl(db->db_dnode, db->db_level-1,
160                     (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
161                 rw_exit(&db->db_dnode->dn_struct_rwlock);
162                 if (err == ENOENT)
163                         continue;
164                 ASSERT(err == 0);
165                 ASSERT(child->db_level == 0);
166                 dr = child->db_last_dirty;
167                 while (dr && dr->dr_txg > txg)
168                         dr = dr->dr_next;
169                 ASSERT(dr == NULL || dr->dr_txg == txg);
170
171                 /* data_old better be zeroed */
172                 if (dr) {
173                         buf = dr->dt.dl.dr_data->b_data;
174                         for (j = 0; j < child->db.db_size >> 3; j++) {
175                                 if (buf[j] != 0) {
176                                         panic("freed data not zero: "
177                                             "child=%p i=%d off=%d num=%d\n",
178                                             (void *)child, i, off, num);
179                                 }
180                         }
181                 }
182
183                 /*
184                  * db_data better be zeroed unless it's dirty in a
185                  * future txg.
186                  */
187                 mutex_enter(&child->db_mtx);
188                 buf = child->db.db_data;
189                 if (buf != NULL && child->db_state != DB_FILL &&
190                     child->db_last_dirty == NULL) {
191                         for (j = 0; j < child->db.db_size >> 3; j++) {
192                                 if (buf[j] != 0) {
193                                         panic("freed data not zero: "
194                                             "child=%p i=%d off=%d num=%d\n",
195                                             (void *)child, i, off, num);
196                                 }
197                         }
198                 }
199                 mutex_exit(&child->db_mtx);
200
201                 dbuf_rele(child, FTAG);
202         }
203 }
204 #endif
205
206 #define ALL -1
207
208 static int
209 free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
210     dmu_tx_t *tx)
211 {
212         dnode_t *dn = db->db_dnode;
213         blkptr_t *bp;
214         dmu_buf_impl_t *subdb;
215         uint64_t start, end, dbstart, dbend, i;
216         int epbs, shift, err;
217         int all = TRUE;
218         int blocks_freed = 0;
219
220         /*
221          * There is a small possibility that this block will not be cached:
222          *   1 - if level > 1 and there are no children with level <= 1
223          *   2 - if we didn't get a dirty hold (because this block had just
224          *       finished being written -- and so had no holds), and then this
225          *       block got evicted before we got here.
226          */
227         if (db->db_state != DB_CACHED)
228                 (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
229
230         dbuf_release_bp(db);
231         bp = (blkptr_t *)db->db.db_data;
232
233         epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
234         shift = (db->db_level - 1) * epbs;
235         dbstart = db->db_blkid << epbs;
236         start = blkid >> shift;
237         if (dbstart < start) {
238                 bp += start - dbstart;
239                 all = FALSE;
240         } else {
241                 start = dbstart;
242         }
243         dbend = ((db->db_blkid + 1) << epbs) - 1;
244         end = (blkid + nblks - 1) >> shift;
245         if (dbend <= end)
246                 end = dbend;
247         else if (all)
248                 all = trunc;
249         ASSERT3U(start, <=, end);
250
251         if (db->db_level == 1) {
252                 FREE_VERIFY(db, start, end, tx);
253                 blocks_freed = free_blocks(dn, bp, end-start+1, tx);
254                 arc_buf_freeze(db->db_buf);
255                 ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
256                 return (all ? ALL : blocks_freed);
257         }
258
259         for (i = start; i <= end; i++, bp++) {
260                 if (BP_IS_HOLE(bp))
261                         continue;
262                 rw_enter(&dn->dn_struct_rwlock, RW_READER);
263                 err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
264                 ASSERT3U(err, ==, 0);
265                 rw_exit(&dn->dn_struct_rwlock);
266
267                 if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
268                         ASSERT3P(subdb->db_blkptr, ==, bp);
269                         blocks_freed += free_blocks(dn, bp, 1, tx);
270                 } else {
271                         all = FALSE;
272                 }
273                 dbuf_rele(subdb, FTAG);
274         }
275         arc_buf_freeze(db->db_buf);
276 #ifdef ZFS_DEBUG
277         bp -= (end-start)+1;
278         for (i = start; i <= end; i++, bp++) {
279                 if (i == start && blkid != 0)
280                         continue;
281                 else if (i == end && !trunc)
282                         continue;
283                 ASSERT3U(bp->blk_birth, ==, 0);
284         }
285 #endif
286         ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
287         return (all ? ALL : blocks_freed);
288 }
289
290 /*
291  * free_range: Traverse the indicated range of the provided file
292  * and "free" all the blocks contained there.
293  */
294 static void
295 dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
296 {
297         blkptr_t *bp = dn->dn_phys->dn_blkptr;
298         dmu_buf_impl_t *db;
299         int trunc, start, end, shift, i, err;
300         int dnlevel = dn->dn_phys->dn_nlevels;
301
302         if (blkid > dn->dn_phys->dn_maxblkid)
303                 return;
304
305         ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
306         trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
307         if (trunc)
308                 nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
309
310         /* There are no indirect blocks in the object */
311         if (dnlevel == 1) {
312                 if (blkid >= dn->dn_phys->dn_nblkptr) {
313                         /* this range was never made persistent */
314                         return;
315                 }
316                 ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
317                 (void) free_blocks(dn, bp + blkid, nblks, tx);
318                 if (trunc) {
319                         uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
320                             (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
321                         dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
322                         ASSERT(off < dn->dn_phys->dn_maxblkid ||
323                             dn->dn_phys->dn_maxblkid == 0 ||
324                             dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
325                 }
326                 return;
327         }
328
329         shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
330         start = blkid >> shift;
331         ASSERT(start < dn->dn_phys->dn_nblkptr);
332         end = (blkid + nblks - 1) >> shift;
333         bp += start;
334         for (i = start; i <= end; i++, bp++) {
335                 if (BP_IS_HOLE(bp))
336                         continue;
337                 rw_enter(&dn->dn_struct_rwlock, RW_READER);
338                 err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
339                 ASSERT3U(err, ==, 0);
340                 rw_exit(&dn->dn_struct_rwlock);
341
342                 if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
343                         ASSERT3P(db->db_blkptr, ==, bp);
344                         (void) free_blocks(dn, bp, 1, tx);
345                 }
346                 dbuf_rele(db, FTAG);
347         }
348         if (trunc) {
349                 uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
350                     (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
351                 dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
352                 ASSERT(off < dn->dn_phys->dn_maxblkid ||
353                     dn->dn_phys->dn_maxblkid == 0 ||
354                     dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
355         }
356 }
357
358 /*
359  * Try to kick all the dnodes dbufs out of the cache...
360  */
361 void
362 dnode_evict_dbufs(dnode_t *dn)
363 {
364         int progress;
365         int pass = 0;
366
367         do {
368                 dmu_buf_impl_t *db, marker;
369                 int evicting = FALSE;
370
371                 progress = FALSE;
372                 mutex_enter(&dn->dn_dbufs_mtx);
373                 list_insert_tail(&dn->dn_dbufs, &marker);
374                 db = list_head(&dn->dn_dbufs);
375                 for (; db != &marker; db = list_head(&dn->dn_dbufs)) {
376                         list_remove(&dn->dn_dbufs, db);
377                         list_insert_tail(&dn->dn_dbufs, db);
378                         ASSERT3P(db->db_dnode, ==, dn);
379
380                         mutex_enter(&db->db_mtx);
381                         if (db->db_state == DB_EVICTING) {
382                                 progress = TRUE;
383                                 evicting = TRUE;
384                                 mutex_exit(&db->db_mtx);
385                         } else if (refcount_is_zero(&db->db_holds)) {
386                                 progress = TRUE;
387                                 dbuf_clear(db); /* exits db_mtx for us */
388                         } else {
389                                 mutex_exit(&db->db_mtx);
390                         }
391
392                 }
393                 list_remove(&dn->dn_dbufs, &marker);
394                 /*
395                  * NB: we need to drop dn_dbufs_mtx between passes so
396                  * that any DB_EVICTING dbufs can make progress.
397                  * Ideally, we would have some cv we could wait on, but
398                  * since we don't, just wait a bit to give the other
399                  * thread a chance to run.
400                  */
401                 mutex_exit(&dn->dn_dbufs_mtx);
402                 if (evicting)
403                         delay(1);
404                 pass++;
405                 ASSERT(pass < 100); /* sanity check */
406         } while (progress);
407
408         rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
409         if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
410                 mutex_enter(&dn->dn_bonus->db_mtx);
411                 dbuf_evict(dn->dn_bonus);
412                 dn->dn_bonus = NULL;
413         }
414         rw_exit(&dn->dn_struct_rwlock);
415 }
416
417 static void
418 dnode_undirty_dbufs(list_t *list)
419 {
420         dbuf_dirty_record_t *dr;
421
422         while (dr = list_head(list)) {
423                 dmu_buf_impl_t *db = dr->dr_dbuf;
424                 uint64_t txg = dr->dr_txg;
425
426                 if (db->db_level != 0)
427                         dnode_undirty_dbufs(&dr->dt.di.dr_children);
428
429                 mutex_enter(&db->db_mtx);
430                 /* XXX - use dbuf_undirty()? */
431                 list_remove(list, dr);
432                 ASSERT(db->db_last_dirty == dr);
433                 db->db_last_dirty = NULL;
434                 db->db_dirtycnt -= 1;
435                 if (db->db_level == 0) {
436                         ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
437                             dr->dt.dl.dr_data == db->db_buf);
438                         dbuf_unoverride(dr);
439                 }
440                 kmem_free(dr, sizeof (dbuf_dirty_record_t));
441                 dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
442         }
443 }
444
445 static void
446 dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
447 {
448         int txgoff = tx->tx_txg & TXG_MASK;
449
450         ASSERT(dmu_tx_is_syncing(tx));
451
452         /*
453          * Our contents should have been freed in dnode_sync() by the
454          * free range record inserted by the caller of dnode_free().
455          */
456         ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
457         ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
458
459         dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
460         dnode_evict_dbufs(dn);
461         ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
462
463         /*
464          * XXX - It would be nice to assert this, but we may still
465          * have residual holds from async evictions from the arc...
466          *
467          * zfs_obj_to_path() also depends on this being
468          * commented out.
469          *
470          * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
471          */
472
473         /* Undirty next bits */
474         dn->dn_next_nlevels[txgoff] = 0;
475         dn->dn_next_indblkshift[txgoff] = 0;
476         dn->dn_next_blksz[txgoff] = 0;
477
478         /* ASSERT(blkptrs are zero); */
479         ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
480         ASSERT(dn->dn_type != DMU_OT_NONE);
481
482         ASSERT(dn->dn_free_txg > 0);
483         if (dn->dn_allocated_txg != dn->dn_free_txg)
484                 dbuf_will_dirty(dn->dn_dbuf, tx);
485         bzero(dn->dn_phys, sizeof (dnode_phys_t));
486
487         mutex_enter(&dn->dn_mtx);
488         dn->dn_type = DMU_OT_NONE;
489         dn->dn_maxblkid = 0;
490         dn->dn_allocated_txg = 0;
491         dn->dn_free_txg = 0;
492         dn->dn_have_spill = B_FALSE;
493         mutex_exit(&dn->dn_mtx);
494
495         ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
496
497         dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
498         /*
499          * Now that we've released our hold, the dnode may
500          * be evicted, so we musn't access it.
501          */
502 }
503
504 /*
505  * Write out the dnode's dirty buffers.
506  */
507 void
508 dnode_sync(dnode_t *dn, dmu_tx_t *tx)
509 {
510         free_range_t *rp;
511         dnode_phys_t *dnp = dn->dn_phys;
512         int txgoff = tx->tx_txg & TXG_MASK;
513         list_t *list = &dn->dn_dirty_records[txgoff];
514         static const dnode_phys_t zerodn = { 0 };
515         boolean_t kill_spill = B_FALSE;
516
517         ASSERT(dmu_tx_is_syncing(tx));
518         ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
519         ASSERT(dnp->dn_type != DMU_OT_NONE ||
520             bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
521         DNODE_VERIFY(dn);
522
523         ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
524
525         if (dmu_objset_userused_enabled(dn->dn_objset) &&
526             !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
527                 mutex_enter(&dn->dn_mtx);
528                 dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
529                 dn->dn_oldflags = dn->dn_phys->dn_flags;
530                 dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
531                 mutex_exit(&dn->dn_mtx);
532                 dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
533         } else {
534                 /* Once we account for it, we should always account for it. */
535                 ASSERT(!(dn->dn_phys->dn_flags &
536                     DNODE_FLAG_USERUSED_ACCOUNTED));
537         }
538
539         mutex_enter(&dn->dn_mtx);
540         if (dn->dn_allocated_txg == tx->tx_txg) {
541                 /* The dnode is newly allocated or reallocated */
542                 if (dnp->dn_type == DMU_OT_NONE) {
543                         /* this is a first alloc, not a realloc */
544                         dnp->dn_nlevels = 1;
545                         dnp->dn_nblkptr = dn->dn_nblkptr;
546                 }
547
548                 dnp->dn_type = dn->dn_type;
549                 dnp->dn_bonustype = dn->dn_bonustype;
550                 dnp->dn_bonuslen = dn->dn_bonuslen;
551         }
552
553         ASSERT(dnp->dn_nlevels > 1 ||
554             BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
555             BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
556             dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
557
558         if (dn->dn_next_blksz[txgoff]) {
559                 ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
560                     SPA_MINBLOCKSIZE) == 0);
561                 ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
562                     dn->dn_maxblkid == 0 || list_head(list) != NULL ||
563                     avl_last(&dn->dn_ranges[txgoff]) ||
564                     dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
565                     dnp->dn_datablkszsec);
566                 dnp->dn_datablkszsec =
567                     dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
568                 dn->dn_next_blksz[txgoff] = 0;
569         }
570
571         if (dn->dn_next_bonuslen[txgoff]) {
572                 if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN)
573                         dnp->dn_bonuslen = 0;
574                 else
575                         dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
576                 ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
577                 dn->dn_next_bonuslen[txgoff] = 0;
578         }
579
580         if (dn->dn_next_bonustype[txgoff]) {
581                 ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
582                 dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
583                 dn->dn_next_bonustype[txgoff] = 0;
584         }
585
586         /*
587          * We will either remove a spill block when a file is being removed
588          * or we have been asked to remove it.
589          */
590         if (dn->dn_rm_spillblk[txgoff] ||
591             ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
592             dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
593                 if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
594                         kill_spill = B_TRUE;
595                 dn->dn_rm_spillblk[txgoff] = 0;
596         }
597
598         if (dn->dn_next_indblkshift[txgoff]) {
599                 ASSERT(dnp->dn_nlevels == 1);
600                 dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
601                 dn->dn_next_indblkshift[txgoff] = 0;
602         }
603
604         /*
605          * Just take the live (open-context) values for checksum and compress.
606          * Strictly speaking it's a future leak, but nothing bad happens if we
607          * start using the new checksum or compress algorithm a little early.
608          */
609         dnp->dn_checksum = dn->dn_checksum;
610         dnp->dn_compress = dn->dn_compress;
611
612         mutex_exit(&dn->dn_mtx);
613
614         if (kill_spill) {
615                 (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
616                 mutex_enter(&dn->dn_mtx);
617                 dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
618                 mutex_exit(&dn->dn_mtx);
619         }
620
621         /* process all the "freed" ranges in the file */
622         while (rp = avl_last(&dn->dn_ranges[txgoff])) {
623                 dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
624                 /* grab the mutex so we don't race with dnode_block_freed() */
625                 mutex_enter(&dn->dn_mtx);
626                 avl_remove(&dn->dn_ranges[txgoff], rp);
627                 mutex_exit(&dn->dn_mtx);
628                 kmem_free(rp, sizeof (free_range_t));
629         }
630
631         if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
632                 dnode_sync_free(dn, tx);
633                 return;
634         }
635
636         if (dn->dn_next_nblkptr[txgoff]) {
637                 /* this should only happen on a realloc */
638                 ASSERT(dn->dn_allocated_txg == tx->tx_txg);
639                 if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) {
640                         /* zero the new blkptrs we are gaining */
641                         bzero(dnp->dn_blkptr + dnp->dn_nblkptr,
642                             sizeof (blkptr_t) *
643                             (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr));
644 #ifdef ZFS_DEBUG
645                 } else {
646                         int i;
647                         ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr);
648                         /* the blkptrs we are losing better be unallocated */
649                         for (i = dn->dn_next_nblkptr[txgoff];
650                             i < dnp->dn_nblkptr; i++)
651                                 ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
652 #endif
653                 }
654                 mutex_enter(&dn->dn_mtx);
655                 dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff];
656                 dn->dn_next_nblkptr[txgoff] = 0;
657                 mutex_exit(&dn->dn_mtx);
658         }
659
660         if (dn->dn_next_nlevels[txgoff]) {
661                 dnode_increase_indirection(dn, tx);
662                 dn->dn_next_nlevels[txgoff] = 0;
663         }
664
665         dbuf_sync_list(list, tx);
666
667         if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
668                 ASSERT3P(list_head(list), ==, NULL);
669                 dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
670         }
671
672         /*
673          * Although we have dropped our reference to the dnode, it
674          * can't be evicted until its written, and we haven't yet
675          * initiated the IO for the dnode's dbuf.
676          */
677 }