X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu.c;h=d864682024026ca44deaf837949f6430daf2ad73;hb=774f7c02b181f6e18b91ebdd3092df57151da3e3;hp=b6205bd500a870bb90c24341f24901a0e8da60e8;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b6205bd..d864682 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -85,6 +85,9 @@ const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { { byteswap_uint64_array, TRUE, "FUID table size" }, { zap_byteswap, TRUE, "DSL dataset next clones"}, { zap_byteswap, TRUE, "scrub work queue" }, + { zap_byteswap, TRUE, "ZFS user/group used" }, + { zap_byteswap, TRUE, "ZFS user/group quota" }, + { zap_byteswap, TRUE, "snapshot refcount tags"}, }; int @@ -180,22 +183,22 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) * whose dnodes are in the same block. */ static int -dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, - uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, + int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) { dsl_pool_t *dp = NULL; dmu_buf_t **dbp; uint64_t blkid, nblks, i; - uint32_t flags; + uint32_t dbuf_flags; int err; zio_t *zio; hrtime_t start; ASSERT(length <= DMU_MAX_ACCESS); - flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; - if (length > zfetch_array_rd_sz) - flags |= DB_RF_NOPREFETCH; + dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; + if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) + dbuf_flags |= DB_RF_NOPREFETCH; rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { @@ -210,6 +213,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, os_dsl_dataset->ds_object, (longlong_t)dn->dn_object, dn->dn_datablksz, (longlong_t)offset, (longlong_t)length); + rw_exit(&dn->dn_struct_rwlock); return (EIO); } nblks = 1; @@ -232,9 +236,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, } /* initiate async i/o */ if (read) { - rw_exit(&dn->dn_struct_rwlock); - (void) dbuf_read(db, zio, flags); - rw_enter(&dn->dn_struct_rwlock, RW_READER); + (void) dbuf_read(db, zio, dbuf_flags); } dbp[i] = &db->db; } @@ -285,7 +287,7 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, return (err); err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp); + numbufsp, dbpp, DMU_READ_PREFETCH); dnode_rele(dn, FTAG); @@ -300,7 +302,7 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, int err; err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp); + numbufsp, dbpp, DMU_READ_PREFETCH); return (err); } @@ -374,56 +376,51 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) dnode_rele(dn, FTAG); } +/* + * Get the next "chunk" of file data to free. We traverse the file from + * the end so that the file gets shorter over time (if we crashes in the + * middle, this will leave us in a better state). We find allocated file + * data by simply searching the allocated level 1 indirects. + */ static int -get_next_chunk(dnode_t *dn, uint64_t *offset, uint64_t limit) +get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit) { - uint64_t len = *offset - limit; - uint64_t chunk_len = dn->dn_datablksz * DMU_MAX_DELETEBLKCNT; - uint64_t subchunk = + uint64_t len = *start - limit; + uint64_t blkcnt = 0; + uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1)); + uint64_t iblkrange = dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); - ASSERT(limit <= *offset); + ASSERT(limit <= *start); - if (len <= chunk_len) { - *offset = limit; + if (len <= iblkrange * maxblks) { + *start = limit; return (0); } + ASSERT(ISP2(iblkrange)); - ASSERT(ISP2(subchunk)); - - while (*offset > limit) { - uint64_t initial_offset = P2ROUNDUP(*offset, subchunk); - uint64_t delta; + while (*start > limit && blkcnt < maxblks) { int err; - /* skip over allocated data */ + /* find next allocated L1 indirect */ err = dnode_next_offset(dn, - DNODE_FIND_HOLE|DNODE_FIND_BACKWARDS, offset, 1, 1, 0); - if (err == ESRCH) - *offset = limit; - else if (err) - return (err); + DNODE_FIND_BACKWARDS, start, 2, 1, 0); - ASSERT3U(*offset, <=, initial_offset); - *offset = P2ALIGN(*offset, subchunk); - delta = initial_offset - *offset; - if (delta >= chunk_len) { - *offset += delta - chunk_len; + /* if there are no more, then we are done */ + if (err == ESRCH) { + *start = limit; return (0); - } - chunk_len -= delta; - - /* skip over unallocated data */ - err = dnode_next_offset(dn, - DNODE_FIND_BACKWARDS, offset, 1, 1, 0); - if (err == ESRCH) - *offset = limit; - else if (err) + } else if (err) { return (err); + } + blkcnt += 1; - if (*offset < limit) - *offset = limit; - ASSERT3U(*offset, <, initial_offset); + /* reset offset to end of "next" block back */ + *start = P2ALIGN(*start, iblkrange); + if (*start <= limit) + *start = limit; + else + *start -= 1; } return (0); } @@ -442,7 +439,8 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, object_size = align == 1 ? dn->dn_datablksz : (dn->dn_maxblkid + 1) << dn->dn_datablkshift; - if (trunc || (end = offset + length) > object_size) + end = offset + length; + if (trunc || end > object_size) end = object_size; if (end <= offset) return (0); @@ -450,6 +448,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, while (length) { start = end; + /* assert(offset <= start) */ err = get_next_chunk(dn, &start, offset); if (err) return (err); @@ -540,11 +539,11 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - void *buf) + void *buf, uint32_t flags) { dnode_t *dn; dmu_buf_t **dbp; - int numbufs, i, err; + int numbufs, err; err = dnode_hold(os->os, object, FTAG, &dn); if (err) @@ -555,7 +554,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, * block. If we ever do the tail block optimization, we will need to * handle that here as well. */ - if (dn->dn_datablkshift == 0) { + if (dn->dn_maxblkid == 0) { int newsz = offset > dn->dn_datablksz ? 0 : MIN(size, dn->dn_datablksz - offset); bzero((char *)buf + newsz, size - newsz); @@ -564,13 +563,14 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, while (size > 0) { uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); + int i; /* * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, - TRUE, FTAG, &numbufs, &dbp); + TRUE, FTAG, &numbufs, &dbp, flags); if (err) break; @@ -799,9 +799,6 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); - if (err) - break; - offset += tocpy; size -= tocpy; } @@ -810,6 +807,58 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } #endif +/* + * Allocate a loaned anonymous arc buffer. + */ +arc_buf_t * +dmu_request_arcbuf(dmu_buf_t *handle, int size) +{ + dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode; + + return (arc_loan_buf(dn->dn_objset->os_spa, size)); +} + +/* + * Free a loaned arc buffer. + */ +void +dmu_return_arcbuf(arc_buf_t *buf) +{ + arc_return_buf(buf, FTAG); + VERIFY(arc_buf_remove_ref(buf, FTAG) == 1); +} + +/* + * When possible directly assign passed loaned arc buffer to a dbuf. + * If this is not possible copy the contents of passed arc buf via + * dmu_write(). + */ +void +dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, + dmu_tx_t *tx) +{ + dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode; + dmu_buf_impl_t *db; + uint32_t blksz = (uint32_t)arc_buf_size(buf); + uint64_t blkid; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + blkid = dbuf_whichblock(dn, offset); + VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); + rw_exit(&dn->dn_struct_rwlock); + + if (offset == db->db.db_offset && blksz == db->db.db_size) { + dbuf_assign_arcbuf(db, buf, tx); + dbuf_rele(db, FTAG); + } else { + dbuf_rele(db, FTAG); + ASSERT(dn->dn_objset->os.os == dn->dn_objset); + dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz, + buf->b_data, tx); + dmu_return_arcbuf(buf); + } +} + typedef struct { dbuf_dirty_record_t *dr; dmu_sync_cb_t *done;