Use KM_PUSHPAGE instead of KM_SLEEP
[zfs.git] / module / zfs / dmu.c
index aaeec41..04b02c7 100644 (file)
@@ -381,7 +381,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
                }
                nblks = 1;
        }
-       dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
+       dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP | KM_NODEBUG);
 
        if (dn->dn_objset->os_dsl_dataset)
                dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
@@ -1122,75 +1122,82 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
        dmu_buf_rele_array(dbp, numbufs, FTAG);
        return (err);
 }
-#endif
 
-#ifdef HAVE_ZPL
 int
-dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
-    dmu_tx_t *tx)
+dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
 {
-       dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
-       dnode_t *dn;
-       int err;
-
-       if (size == 0)
-               return (0);
+       dmu_buf_t **dbp;
+       int numbufs, i, err;
+       xuio_t *xuio = NULL;
 
-       DB_DNODE_ENTER(db);
-       dn = DB_DNODE(db);
-       err = dmu_write_uio_dnode(dn, uio, size, tx);
-       DB_DNODE_EXIT(db);
+       /*
+        * NB: we could do this block-at-a-time, but it's nice
+        * to be reading in parallel.
+        */
+       err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG,
+           &numbufs, &dbp);
+       if (err)
+               return (err);
 
-       return (err);
-}
+       for (i = 0; i < numbufs; i++) {
+               int tocpy;
+               int bufoff;
+               dmu_buf_t *db = dbp[i];
 
-int
-dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
-    dmu_tx_t *tx)
-{
-       dnode_t *dn;
-       int err;
+               ASSERT(size > 0);
 
-       if (size == 0)
-               return (0);
+               bufoff = uio->uio_loffset - db->db_offset;
+               tocpy = (int)MIN(db->db_size - bufoff, size);
 
-       err = dnode_hold(os, object, FTAG, &dn);
-       if (err)
-               return (err);
+               if (xuio) {
+                       dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
+                       arc_buf_t *dbuf_abuf = dbi->db_buf;
+                       arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
+                       err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
+                       if (!err) {
+                               uio->uio_resid -= tocpy;
+                               uio->uio_loffset += tocpy;
+                       }
 
-       err = dmu_write_uio_dnode(dn, uio, size, tx);
+                       if (abuf == dbuf_abuf)
+                               XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
+                       else
+                               XUIOSTAT_BUMP(xuiostat_rbuf_copied);
+               } else {
+                       err = uiomove((char *)db->db_data + bufoff, tocpy,
+                           UIO_READ, uio);
+               }
+               if (err)
+                       break;
 
-       dnode_rele(dn, FTAG);
+               size -= tocpy;
+       }
+       dmu_buf_rele_array(dbp, numbufs, FTAG);
 
        return (err);
 }
 
-int
-dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
-    page_t *pp, dmu_tx_t *tx)
+static int
+dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
 {
        dmu_buf_t **dbp;
-       int numbufs, i;
-       int err;
-
-       if (size == 0)
-               return (0);
+       int numbufs;
+       int err = 0;
+       int i;
 
-       err = dmu_buf_hold_array(os, object, offset, size,
-           FALSE, FTAG, &numbufs, &dbp);
+       err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
+           FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
        if (err)
                return (err);
 
        for (i = 0; i < numbufs; i++) {
-               int tocpy, copied, thiscpy;
+               int tocpy;
                int bufoff;
                dmu_buf_t *db = dbp[i];
-               caddr_t va;
 
                ASSERT(size > 0);
-               ASSERT3U(db->db_size, >=, PAGESIZE);
 
-               bufoff = offset - db->db_offset;
+               bufoff = uio->uio_loffset - db->db_offset;
                tocpy = (int)MIN(db->db_size - bufoff, size);
 
                ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
@@ -1200,26 +1207,68 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
                else
                        dmu_buf_will_dirty(db, tx);
 
-               for (copied = 0; copied < tocpy; copied += PAGESIZE) {
-                       ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff);
-                       thiscpy = MIN(PAGESIZE, tocpy - copied);
-                       va = zfs_map_page(pp, S_READ);
-                       bcopy(va, (char *)db->db_data + bufoff, thiscpy);
-                       zfs_unmap_page(pp, va);
-                       pp = pp->p_next;
-                       bufoff += PAGESIZE;
-               }
+               /*
+                * XXX uiomove could block forever (eg.nfs-backed
+                * pages).  There needs to be a uiolockdown() function
+                * to lock the pages in memory, so that uiomove won't
+                * block.
+                */
+               err = uiomove((char *)db->db_data + bufoff, tocpy,
+                   UIO_WRITE, uio);
 
                if (tocpy == db->db_size)
                        dmu_buf_fill_done(db, tx);
 
-               offset += tocpy;
+               if (err)
+                       break;
+
                size -= tocpy;
        }
+
        dmu_buf_rele_array(dbp, numbufs, FTAG);
        return (err);
 }
-#endif
+
+int
+dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
+    dmu_tx_t *tx)
+{
+       dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
+       dnode_t *dn;
+       int err;
+
+       if (size == 0)
+               return (0);
+
+       DB_DNODE_ENTER(db);
+       dn = DB_DNODE(db);
+       err = dmu_write_uio_dnode(dn, uio, size, tx);
+       DB_DNODE_EXIT(db);
+
+       return (err);
+}
+
+int
+dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
+    dmu_tx_t *tx)
+{
+       dnode_t *dn;
+       int err;
+
+       if (size == 0)
+               return (0);
+
+       err = dnode_hold(os, object, FTAG, &dn);
+       if (err)
+               return (err);
+
+       err = dmu_write_uio_dnode(dn, uio, size, tx);
+
+       dnode_rele(dn, FTAG);
+
+       return (err);
+}
+#endif /* _KERNEL */
 
 /*
  * Allocate a loaned anonymous arc buffer.