Add FAILFAST support
[zfs.git] / module / zfs / dmu.c
index 732742f..aaeec41 100644 (file)
@@ -793,7 +793,7 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
                else
                        dmu_buf_will_dirty(db, tx);
 
-               bcopy(buf, (char *)db->db_data + bufoff, tocpy);
+               (void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
 
                if (tocpy == db->db_size)
                        dmu_buf_fill_done(db, tx);
@@ -831,6 +831,31 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
  */
 kstat_t *xuio_ksp = NULL;
 
+typedef struct xuio_stats {
+       /* loaned yet not returned arc_buf */
+       kstat_named_t xuiostat_onloan_rbuf;
+       kstat_named_t xuiostat_onloan_wbuf;
+       /* whether a copy is made when loaning out a read buffer */
+       kstat_named_t xuiostat_rbuf_copied;
+       kstat_named_t xuiostat_rbuf_nocopy;
+       /* whether a copy is made when assigning a write buffer */
+       kstat_named_t xuiostat_wbuf_copied;
+       kstat_named_t xuiostat_wbuf_nocopy;
+} xuio_stats_t;
+
+static xuio_stats_t xuio_stats = {
+       { "onloan_read_buf",    KSTAT_DATA_UINT64 },
+       { "onloan_write_buf",   KSTAT_DATA_UINT64 },
+       { "read_buf_copied",    KSTAT_DATA_UINT64 },
+       { "read_buf_nocopy",    KSTAT_DATA_UINT64 },
+       { "write_buf_copied",   KSTAT_DATA_UINT64 },
+       { "write_buf_nocopy",   KSTAT_DATA_UINT64 }
+};
+
+#define XUIOSTAT_INCR(stat, val)        \
+        atomic_add_64(&xuio_stats.stat.value.ui64, (val))
+#define XUIOSTAT_BUMP(stat)     XUIOSTAT_INCR(stat, 1)
+
 int
 dmu_xuio_init(xuio_t *xuio, int nblk)
 {
@@ -950,85 +975,126 @@ xuio_stat_wbuf_nocopy()
 }
 
 #ifdef _KERNEL
+
+/*
+ * Copy up to size bytes between arg_buf and req based on the data direction
+ * described by the req.  If an entire req's data cannot be transfered the
+ * req's is updated such that it's current index and bv offsets correctly
+ * reference any residual data which could not be copied.  The return value
+ * is the number of bytes successfully copied to arg_buf.
+ */
+static int
+dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req)
+{
+       struct bio_vec *bv;
+       struct req_iterator iter;
+       char *bv_buf;
+       int tocpy;
+
+       *offset = 0;
+       rq_for_each_segment(bv, req, iter) {
+
+               /* Fully consumed the passed arg_buf */
+               ASSERT3S(*offset, <=, size);
+               if (size == *offset)
+                       break;
+
+               /* Skip fully consumed bv's */
+               if (bv->bv_len == 0)
+                       continue;
+
+               tocpy = MIN(bv->bv_len, size - *offset);
+               ASSERT3S(tocpy, >=, 0);
+
+               bv_buf = page_address(bv->bv_page) + bv->bv_offset;
+               ASSERT3P(bv_buf, !=, NULL);
+
+               if (rq_data_dir(req) == WRITE)
+                       memcpy(arg_buf + *offset, bv_buf, tocpy);
+               else
+                       memcpy(bv_buf, arg_buf + *offset, tocpy);
+
+               *offset += tocpy;
+               bv->bv_offset += tocpy;
+               bv->bv_len -= tocpy;
+       }
+
+       return 0;
+}
+
 int
-dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
+dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 {
+       uint64_t size = blk_rq_bytes(req);
+       uint64_t offset = blk_rq_pos(req) << 9;
        dmu_buf_t **dbp;
        int numbufs, i, err;
-       xuio_t *xuio = NULL;
 
        /*
         * NB: we could do this block-at-a-time, but it's nice
         * to be reading in parallel.
         */
-       err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG,
-           &numbufs, &dbp);
+       err = dmu_buf_hold_array(os, object, offset, size, TRUE, FTAG,
+                                &numbufs, &dbp);
        if (err)
                return (err);
 
-       if (uio->uio_extflg == UIO_XUIO)
-               xuio = (xuio_t *)uio;
-
        for (i = 0; i < numbufs; i++) {
-               int tocpy;
-               int bufoff;
+               int tocpy, didcpy, bufoff;
                dmu_buf_t *db = dbp[i];
 
-               ASSERT(size > 0);
+               bufoff = offset - db->db_offset;
+               ASSERT3S(bufoff, >=, 0);
 
-               bufoff = uio->uio_loffset - db->db_offset;
                tocpy = (int)MIN(db->db_size - bufoff, size);
+               if (tocpy == 0)
+                       break;
 
-               if (xuio) {
-                       dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
-                       arc_buf_t *dbuf_abuf = dbi->db_buf;
-                       arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
-                       err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
-                       if (!err) {
-                               uio->uio_resid -= tocpy;
-                               uio->uio_loffset += tocpy;
-                       }
+               err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req);
+
+               if (didcpy < tocpy)
+                       err = EIO;
 
-                       if (abuf == dbuf_abuf)
-                               XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
-                       else
-                               XUIOSTAT_BUMP(xuiostat_rbuf_copied);
-               } else {
-                       err = uiomove((char *)db->db_data + bufoff, tocpy,
-                           UIO_READ, uio);
-               }
                if (err)
                        break;
 
                size -= tocpy;
+               offset += didcpy;
+               err = 0;
        }
        dmu_buf_rele_array(dbp, numbufs, FTAG);
 
        return (err);
 }
 
-static int
-dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
+int
+dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
 {
+       uint64_t size = blk_rq_bytes(req);
+       uint64_t offset = blk_rq_pos(req) << 9;
        dmu_buf_t **dbp;
        int numbufs;
        int err = 0;
        int i;
 
-       err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
-           FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
+       if (size == 0)
+               return (0);
+
+       err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
+                                &numbufs, &dbp);
        if (err)
                return (err);
 
        for (i = 0; i < numbufs; i++) {
-               int tocpy;
-               int bufoff;
+               int tocpy, didcpy, bufoff;
                dmu_buf_t *db = dbp[i];
 
-               ASSERT(size > 0);
+               bufoff = offset - db->db_offset;
+               ASSERT3S(bufoff, >=, 0);
 
-               bufoff = uio->uio_loffset - db->db_offset;
                tocpy = (int)MIN(db->db_size - bufoff, size);
+               if (tocpy == 0)
+                       break;
 
                ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
 
@@ -1037,28 +1103,28 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
                else
                        dmu_buf_will_dirty(db, tx);
 
-               /*
-                * XXX uiomove could block forever (eg. nfs-backed
-                * pages).  There needs to be a uiolockdown() function
-                * to lock the pages in memory, so that uiomove won't
-                * block.
-                */
-               err = uiomove((char *)db->db_data + bufoff, tocpy,
-                   UIO_WRITE, uio);
+               err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req);
 
                if (tocpy == db->db_size)
                        dmu_buf_fill_done(db, tx);
 
+               if (didcpy < tocpy)
+                       err = EIO;
+
                if (err)
                        break;
 
                size -= tocpy;
+               offset += didcpy;
+               err = 0;
        }
 
        dmu_buf_rele_array(dbp, numbufs, FTAG);
        return (err);
 }
+#endif
 
+#ifdef HAVE_ZPL
 int
 dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
     dmu_tx_t *tx)
@@ -1763,3 +1829,22 @@ dmu_fini(void)
        sa_cache_fini();
        zfs_dbgmsg_fini();
 }
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_bonus_hold);
+EXPORT_SYMBOL(dmu_free_range);
+EXPORT_SYMBOL(dmu_read);
+EXPORT_SYMBOL(dmu_write);
+
+/* Get information on a DMU object. */
+EXPORT_SYMBOL(dmu_object_info);
+EXPORT_SYMBOL(dmu_object_info_from_dnode);
+EXPORT_SYMBOL(dmu_object_info_from_db);
+EXPORT_SYMBOL(dmu_object_size_from_db);
+
+EXPORT_SYMBOL(dmu_object_set_blocksize);
+EXPORT_SYMBOL(dmu_object_set_checksum);
+EXPORT_SYMBOL(dmu_object_set_compress);
+
+EXPORT_SYMBOL(dmu_ot);
+#endif