* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
{ byteswap_uint64_array, TRUE, "FUID table size" },
{ zap_byteswap, TRUE, "DSL dataset next clones"},
{ zap_byteswap, TRUE, "scrub work queue" },
+ { zap_byteswap, TRUE, "ZFS user/group used" },
+ { zap_byteswap, TRUE, "ZFS user/group quota" },
+ { zap_byteswap, TRUE, "snapshot refcount tags"},
};
int
* whose dnodes are in the same block.
*/
static int
-dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
+ int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dsl_pool_t *dp = NULL;
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
- uint32_t flags;
+ uint32_t dbuf_flags;
int err;
zio_t *zio;
hrtime_t start;
ASSERT(length <= DMU_MAX_ACCESS);
- flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
- if (length > zfetch_array_rd_sz)
- flags |= DB_RF_NOPREFETCH;
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
+ if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
+ dbuf_flags |= DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
os_dsl_dataset->ds_object,
(longlong_t)dn->dn_object, dn->dn_datablksz,
(longlong_t)offset, (longlong_t)length);
+ rw_exit(&dn->dn_struct_rwlock);
return (EIO);
}
nblks = 1;
}
/* initiate async i/o */
if (read) {
- rw_exit(&dn->dn_struct_rwlock);
- (void) dbuf_read(db, zio, flags);
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ (void) dbuf_read(db, zio, dbuf_flags);
}
dbp[i] = &db->db;
}
return (err);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
dnode_rele(dn, FTAG);
int err;
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
return (err);
}
dnode_rele(dn, FTAG);
}
+/*
+ * Get the next "chunk" of file data to free. We traverse the file from
+ * the end so that the file gets shorter over time (if we crashes in the
+ * middle, this will leave us in a better state). We find allocated file
+ * data by simply searching the allocated level 1 indirects.
+ */
static int
-get_next_chunk(dnode_t *dn, uint64_t *offset, uint64_t limit)
+get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit)
{
- uint64_t len = *offset - limit;
- uint64_t chunk_len = dn->dn_datablksz * DMU_MAX_DELETEBLKCNT;
- uint64_t subchunk =
+ uint64_t len = *start - limit;
+ uint64_t blkcnt = 0;
+ uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1));
+ uint64_t iblkrange =
dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
- ASSERT(limit <= *offset);
+ ASSERT(limit <= *start);
- if (len <= chunk_len) {
- *offset = limit;
+ if (len <= iblkrange * maxblks) {
+ *start = limit;
return (0);
}
+ ASSERT(ISP2(iblkrange));
- ASSERT(ISP2(subchunk));
-
- while (*offset > limit) {
- uint64_t initial_offset = P2ROUNDUP(*offset, subchunk);
- uint64_t delta;
+ while (*start > limit && blkcnt < maxblks) {
int err;
- /* skip over allocated data */
+ /* find next allocated L1 indirect */
err = dnode_next_offset(dn,
- DNODE_FIND_HOLE|DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
- if (err == ESRCH)
- *offset = limit;
- else if (err)
- return (err);
+ DNODE_FIND_BACKWARDS, start, 2, 1, 0);
- ASSERT3U(*offset, <=, initial_offset);
- *offset = P2ALIGN(*offset, subchunk);
- delta = initial_offset - *offset;
- if (delta >= chunk_len) {
- *offset += delta - chunk_len;
+ /* if there are no more, then we are done */
+ if (err == ESRCH) {
+ *start = limit;
return (0);
- }
- chunk_len -= delta;
-
- /* skip over unallocated data */
- err = dnode_next_offset(dn,
- DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
- if (err == ESRCH)
- *offset = limit;
- else if (err)
+ } else if (err) {
return (err);
+ }
+ blkcnt += 1;
- if (*offset < limit)
- *offset = limit;
- ASSERT3U(*offset, <, initial_offset);
+ /* reset offset to end of "next" block back */
+ *start = P2ALIGN(*start, iblkrange);
+ if (*start <= limit)
+ *start = limit;
+ else
+ *start -= 1;
}
return (0);
}
object_size = align == 1 ? dn->dn_datablksz :
(dn->dn_maxblkid + 1) << dn->dn_datablkshift;
- if (trunc || (end = offset + length) > object_size)
+ end = offset + length;
+ if (trunc || end > object_size)
end = object_size;
if (end <= offset)
return (0);
while (length) {
start = end;
+ /* assert(offset <= start) */
err = get_next_chunk(dn, &start, offset);
if (err)
return (err);
int
dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf)
+ void *buf, uint32_t flags)
{
dnode_t *dn;
dmu_buf_t **dbp;
- int numbufs, i, err;
+ int numbufs, err;
err = dnode_hold(os->os, object, FTAG, &dn);
if (err)
* block. If we ever do the tail block optimization, we will need to
* handle that here as well.
*/
- if (dn->dn_datablkshift == 0) {
+ if (dn->dn_maxblkid == 0) {
int newsz = offset > dn->dn_datablksz ? 0 :
MIN(size, dn->dn_datablksz - offset);
bzero((char *)buf + newsz, size - newsz);
while (size > 0) {
uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
+ int i;
/*
* NB: we could do this block-at-a-time, but it's nice
* to be reading in parallel.
*/
err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
- TRUE, FTAG, &numbufs, &dbp);
+ TRUE, FTAG, &numbufs, &dbp, flags);
if (err)
break;
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
- if (err)
- break;
-
offset += tocpy;
size -= tocpy;
}
}
#endif
+/*
+ * Allocate a loaned anonymous arc buffer.
+ */
+arc_buf_t *
+dmu_request_arcbuf(dmu_buf_t *handle, int size)
+{
+ dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
+
+ return (arc_loan_buf(dn->dn_objset->os_spa, size));
+}
+
+/*
+ * Free a loaned arc buffer.
+ */
+void
+dmu_return_arcbuf(arc_buf_t *buf)
+{
+ arc_return_buf(buf, FTAG);
+ VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
+}
+
+/*
+ * When possible directly assign passed loaned arc buffer to a dbuf.
+ * If this is not possible copy the contents of passed arc buf via
+ * dmu_write().
+ */
+void
+dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+ dmu_tx_t *tx)
+{
+ dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
+ dmu_buf_impl_t *db;
+ uint32_t blksz = (uint32_t)arc_buf_size(buf);
+ uint64_t blkid;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ blkid = dbuf_whichblock(dn, offset);
+ VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
+ rw_exit(&dn->dn_struct_rwlock);
+
+ if (offset == db->db.db_offset && blksz == db->db.db_size) {
+ dbuf_assign_arcbuf(db, buf, tx);
+ dbuf_rele(db, FTAG);
+ } else {
+ dbuf_rele(db, FTAG);
+ ASSERT(dn->dn_objset->os.os == dn->dn_objset);
+ dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz,
+ buf->b_data, tx);
+ dmu_return_arcbuf(buf);
+ }
+}
+
typedef struct {
dbuf_dirty_record_t *dr;
dmu_sync_cb_t *done;