X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdmu.c;h=aaeec418d1681b06ce5c7220dc724c9718116a97;hb=baa40d45cbb336765b2f89d934cd9ea690e4f7c9;hp=39234eba53b2e4730d07d4259b680038dcd541cd;hpb=572e285762521df27fe5b026f409ba1a21abb7ac;p=zfs.git diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 39234eb..aaeec41 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -355,7 +355,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, uint32_t dbuf_flags; int err; zio_t *zio; - hrtime_t start; + hrtime_t start = 0; ASSERT(length <= DMU_MAX_ACCESS); @@ -793,7 +793,7 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, else dmu_buf_will_dirty(db, tx); - bcopy(buf, (char *)db->db_data + bufoff, tocpy); + (void) memcpy((char *)db->db_data + bufoff, buf, tocpy); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); @@ -831,6 +831,31 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, */ kstat_t *xuio_ksp = NULL; +typedef struct xuio_stats { + /* loaned yet not returned arc_buf */ + kstat_named_t xuiostat_onloan_rbuf; + kstat_named_t xuiostat_onloan_wbuf; + /* whether a copy is made when loaning out a read buffer */ + kstat_named_t xuiostat_rbuf_copied; + kstat_named_t xuiostat_rbuf_nocopy; + /* whether a copy is made when assigning a write buffer */ + kstat_named_t xuiostat_wbuf_copied; + kstat_named_t xuiostat_wbuf_nocopy; +} xuio_stats_t; + +static xuio_stats_t xuio_stats = { + { "onloan_read_buf", KSTAT_DATA_UINT64 }, + { "onloan_write_buf", KSTAT_DATA_UINT64 }, + { "read_buf_copied", KSTAT_DATA_UINT64 }, + { "read_buf_nocopy", KSTAT_DATA_UINT64 }, + { "write_buf_copied", KSTAT_DATA_UINT64 }, + { "write_buf_nocopy", KSTAT_DATA_UINT64 } +}; + +#define XUIOSTAT_INCR(stat, val) \ + atomic_add_64(&xuio_stats.stat.value.ui64, (val)) +#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1) + int dmu_xuio_init(xuio_t *xuio, int nblk) { @@ -950,85 +975,126 @@ xuio_stat_wbuf_nocopy() } #ifdef _KERNEL + +/* + * Copy up to size bytes between arg_buf and req based on the data direction + * described by the req. If an entire req's data cannot be transfered the + * req's is updated such that it's current index and bv offsets correctly + * reference any residual data which could not be copied. The return value + * is the number of bytes successfully copied to arg_buf. + */ +static int +dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req) +{ + struct bio_vec *bv; + struct req_iterator iter; + char *bv_buf; + int tocpy; + + *offset = 0; + rq_for_each_segment(bv, req, iter) { + + /* Fully consumed the passed arg_buf */ + ASSERT3S(*offset, <=, size); + if (size == *offset) + break; + + /* Skip fully consumed bv's */ + if (bv->bv_len == 0) + continue; + + tocpy = MIN(bv->bv_len, size - *offset); + ASSERT3S(tocpy, >=, 0); + + bv_buf = page_address(bv->bv_page) + bv->bv_offset; + ASSERT3P(bv_buf, !=, NULL); + + if (rq_data_dir(req) == WRITE) + memcpy(arg_buf + *offset, bv_buf, tocpy); + else + memcpy(bv_buf, arg_buf + *offset, tocpy); + + *offset += tocpy; + bv->bv_offset += tocpy; + bv->bv_len -= tocpy; + } + + return 0; +} + int -dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +dmu_read_req(objset_t *os, uint64_t object, struct request *req) { + uint64_t size = blk_rq_bytes(req); + uint64_t offset = blk_rq_pos(req) << 9; dmu_buf_t **dbp; int numbufs, i, err; - xuio_t *xuio = NULL; /* * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); + err = dmu_buf_hold_array(os, object, offset, size, TRUE, FTAG, + &numbufs, &dbp); if (err) return (err); - if (uio->uio_extflg == UIO_XUIO) - xuio = (xuio_t *)uio; - for (i = 0; i < numbufs; i++) { - int tocpy; - int bufoff; + int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + bufoff = offset - db->db_offset; + ASSERT3S(bufoff, >=, 0); - bufoff = uio->uio_loffset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); + if (tocpy == 0) + break; - if (xuio) { - dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; - arc_buf_t *dbuf_abuf = dbi->db_buf; - arc_buf_t *abuf = dbuf_loan_arcbuf(dbi); - err = dmu_xuio_add(xuio, abuf, bufoff, tocpy); - if (!err) { - uio->uio_resid -= tocpy; - uio->uio_loffset += tocpy; - } + err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req); + + if (didcpy < tocpy) + err = EIO; - if (abuf == dbuf_abuf) - XUIOSTAT_BUMP(xuiostat_rbuf_nocopy); - else - XUIOSTAT_BUMP(xuiostat_rbuf_copied); - } else { - err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_READ, uio); - } if (err) break; size -= tocpy; + offset += didcpy; + err = 0; } dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } -static int -dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) +int +dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) { + uint64_t size = blk_rq_bytes(req); + uint64_t offset = blk_rq_pos(req) << 9; dmu_buf_t **dbp; int numbufs; int err = 0; int i; - err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, - FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); + if (size == 0) + return (0); + + err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, + &numbufs, &dbp); if (err) return (err); for (i = 0; i < numbufs; i++) { - int tocpy; - int bufoff; + int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + bufoff = offset - db->db_offset; + ASSERT3S(bufoff, >=, 0); - bufoff = uio->uio_loffset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); + if (tocpy == 0) + break; ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); @@ -1037,28 +1103,28 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); - /* - * XXX uiomove could block forever (eg. nfs-backed - * pages). There needs to be a uiolockdown() function - * to lock the pages in memory, so that uiomove won't - * block. - */ - err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_WRITE, uio); + err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); + if (didcpy < tocpy) + err = EIO; + if (err) break; size -= tocpy; + offset += didcpy; + err = 0; } dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } +#endif +#ifdef HAVE_ZPL int dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_tx_t *tx) @@ -1611,6 +1677,7 @@ void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) { dnode_phys_t *dnp; + int i; rw_enter(&dn->dn_struct_rwlock, RW_READER); mutex_enter(&dn->dn_mtx); @@ -1629,7 +1696,7 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9; doi->doi_max_offset = (dnp->dn_maxblkid + 1) * dn->dn_datablksz; doi->doi_fill_count = 0; - for (int i = 0; i < dnp->dn_nblkptr; i++) + for (i = 0; i < dnp->dn_nblkptr; i++) doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill; mutex_exit(&dn->dn_mtx); @@ -1762,3 +1829,22 @@ dmu_fini(void) sa_cache_fini(); zfs_dbgmsg_fini(); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_bonus_hold); +EXPORT_SYMBOL(dmu_free_range); +EXPORT_SYMBOL(dmu_read); +EXPORT_SYMBOL(dmu_write); + +/* Get information on a DMU object. */ +EXPORT_SYMBOL(dmu_object_info); +EXPORT_SYMBOL(dmu_object_info_from_dnode); +EXPORT_SYMBOL(dmu_object_info_from_db); +EXPORT_SYMBOL(dmu_object_size_from_db); + +EXPORT_SYMBOL(dmu_object_set_blocksize); +EXPORT_SYMBOL(dmu_object_set_checksum); +EXPORT_SYMBOL(dmu_object_set_compress); + +EXPORT_SYMBOL(dmu_ot); +#endif