Fix zvol+btrfs hang
[zfs.git] / module / zfs / dmu.c
index 79024e1..00a7a07 100644 (file)
@@ -381,7 +381,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
                }
                nblks = 1;
        }
-       dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
+       dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_PUSHPAGE | KM_NODEBUG);
 
        if (dn->dn_objset->os_dsl_dataset)
                dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
@@ -863,11 +863,11 @@ dmu_xuio_init(xuio_t *xuio, int nblk)
        uio_t *uio = &xuio->xu_uio;
 
        uio->uio_iovcnt = nblk;
-       uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP);
+       uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_PUSHPAGE);
 
-       priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
+       priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_PUSHPAGE);
        priv->cnt = nblk;
-       priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
+       priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_PUSHPAGE);
        priv->iovp = uio->uio_iov;
        XUIO_XUZC_PRIV(xuio) = priv;
 
@@ -1022,11 +1022,57 @@ dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req)
        return 0;
 }
 
+static void
+dmu_bio_put(struct bio *bio)
+{
+       struct bio *bio_next;
+
+       while (bio) {
+               bio_next = bio->bi_next;
+               bio_put(bio);
+               bio = bio_next;
+       }
+}
+
+static int
+dmu_bio_clone(struct bio *bio, struct bio **bio_copy)
+{
+       struct bio *bio_root = NULL;
+       struct bio *bio_last = NULL;
+       struct bio *bio_new;
+
+       if (bio == NULL)
+               return EINVAL;
+
+       while (bio) {
+               bio_new = bio_clone(bio, GFP_NOIO);
+               if (bio_new == NULL) {
+                       dmu_bio_put(bio_root);
+                       return ENOMEM;
+               }
+
+               if (bio_last) {
+                       bio_last->bi_next = bio_new;
+                       bio_last = bio_new;
+               } else {
+                       bio_root = bio_new;
+                       bio_last = bio_new;
+               }
+
+               bio = bio->bi_next;
+       }
+
+       *bio_copy = bio_root;
+
+       return 0;
+}
+
 int
 dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 {
        uint64_t size = blk_rq_bytes(req);
        uint64_t offset = blk_rq_pos(req) << 9;
+       struct bio *bio_saved = req->bio;
        dmu_buf_t **dbp;
        int numbufs, i, err;
 
@@ -1039,6 +1085,17 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
        if (err)
                return (err);
 
+       /*
+        * Clone the bio list so the bv->bv_offset and bv->bv_len members
+        * can be safely modified.  The original bio list is relinked in to
+        * the request when the function exits.  This is required because
+        * some file systems blindly assume that these values will remain
+        * constant between bio_submit() and the IO completion callback.
+        */
+       err = dmu_bio_clone(bio_saved, &req->bio);
+       if (err)
+               goto error;
+
        for (i = 0; i < numbufs; i++) {
                int tocpy, didcpy, bufoff;
                dmu_buf_t *db = dbp[i];
@@ -1062,6 +1119,10 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
                offset += didcpy;
                err = 0;
        }
+
+       dmu_bio_put(req->bio);
+       req->bio = bio_saved;
+error:
        dmu_buf_rele_array(dbp, numbufs, FTAG);
 
        return (err);
@@ -1072,6 +1133,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
 {
        uint64_t size = blk_rq_bytes(req);
        uint64_t offset = blk_rq_pos(req) << 9;
+       struct bio *bio_saved = req->bio;
        dmu_buf_t **dbp;
        int numbufs;
        int err = 0;
@@ -1085,6 +1147,17 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
        if (err)
                return (err);
 
+       /*
+        * Clone the bio list so the bv->bv_offset and bv->bv_len members
+        * can be safely modified.  The original bio list is relinked in to
+        * the request when the function exits.  This is required because
+        * some file systems blindly assume that these values will remain
+        * constant between bio_submit() and the IO completion callback.
+        */
+       err = dmu_bio_clone(bio_saved, &req->bio);
+       if (err)
+               goto error;
+
        for (i = 0; i < numbufs; i++) {
                int tocpy, didcpy, bufoff;
                dmu_buf_t *db = dbp[i];
@@ -1119,7 +1192,11 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
                err = 0;
        }
 
+       dmu_bio_put(req->bio);
+       req->bio = bio_saved;
+error:
        dmu_buf_rele_array(dbp, numbufs, FTAG);
+
        return (err);
 }
 
@@ -1268,63 +1345,6 @@ dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
 
        return (err);
 }
-
-#ifdef HAVE_ZPL
-int
-dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
-    page_t *pp, dmu_tx_t *tx)
-{
-       dmu_buf_t **dbp;
-       int numbufs, i;
-       int err;
-
-       if (size == 0)
-               return (0);
-
-       err = dmu_buf_hold_array(os, object, offset, size,
-           FALSE, FTAG, &numbufs, &dbp);
-       if (err)
-               return (err);
-
-       for (i = 0; i < numbufs; i++) {
-               int tocpy, copied, thiscpy;
-               int bufoff;
-               dmu_buf_t *db = dbp[i];
-               caddr_t va;
-
-               ASSERT(size > 0);
-               ASSERT3U(db->db_size, >=, PAGESIZE);
-
-               bufoff = offset - db->db_offset;
-               tocpy = (int)MIN(db->db_size - bufoff, size);
-
-               ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
-
-               if (tocpy == db->db_size)
-                       dmu_buf_will_fill(db, tx);
-               else
-                       dmu_buf_will_dirty(db, tx);
-
-               for (copied = 0; copied < tocpy; copied += PAGESIZE) {
-                       ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff);
-                       thiscpy = MIN(PAGESIZE, tocpy - copied);
-                       va = zfs_map_page(pp, S_READ);
-                       bcopy(va, (char *)db->db_data + bufoff, thiscpy);
-                       zfs_unmap_page(pp, va);
-                       pp = pp->p_next;
-                       bufoff += PAGESIZE;
-               }
-
-               if (tocpy == db->db_size)
-                       dmu_buf_fill_done(db, tx);
-
-               offset += tocpy;
-               size -= tocpy;
-       }
-       dmu_buf_rele_array(dbp, numbufs, FTAG);
-       return (err);
-}
-#endif /* HAVE_ZPL */
 #endif /* _KERNEL */
 
 /*
@@ -1488,7 +1508,7 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
                return (EIO);   /* Make zl_get_data do txg_waited_synced() */
        }
 
-       dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP);
+       dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE);
        dsa->dsa_dr = NULL;
        dsa->dsa_done = done;
        dsa->dsa_zgd = zgd;
@@ -1497,7 +1517,7 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
        zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
            zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
            dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa,
-           ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
+           ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, zb));
 
        return (0);
 }
@@ -1612,7 +1632,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
        dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
        mutex_exit(&db->db_mtx);
 
-       dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP);
+       dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE);
        dsa->dsa_dr = dr;
        dsa->dsa_done = done;
        dsa->dsa_zgd = zgd;
@@ -1621,7 +1641,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
        zio_nowait(arc_write(pio, os->os_spa, txg,
            bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp,
            dmu_sync_ready, dmu_sync_done, dsa,
-           ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
+           ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb));
 
        return (0);
 }
@@ -1918,6 +1938,7 @@ dmu_init(void)
        dnode_init();
        dbuf_init();
        zfetch_init();
+       dmu_tx_init();
        arc_init();
        l2arc_init();
 }
@@ -1927,6 +1948,7 @@ dmu_fini(void)
 {
        l2arc_fini();
        arc_fini();
+       dmu_tx_fini();
        zfetch_fini();
        dbuf_fini();
        dnode_fini();
@@ -1938,19 +1960,24 @@ dmu_fini(void)
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dmu_bonus_hold);
+EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
+EXPORT_SYMBOL(dmu_buf_rele_array);
 EXPORT_SYMBOL(dmu_free_range);
 EXPORT_SYMBOL(dmu_read);
 EXPORT_SYMBOL(dmu_write);
-
-/* Get information on a DMU object. */
 EXPORT_SYMBOL(dmu_object_info);
 EXPORT_SYMBOL(dmu_object_info_from_dnode);
 EXPORT_SYMBOL(dmu_object_info_from_db);
 EXPORT_SYMBOL(dmu_object_size_from_db);
-
 EXPORT_SYMBOL(dmu_object_set_blocksize);
 EXPORT_SYMBOL(dmu_object_set_checksum);
 EXPORT_SYMBOL(dmu_object_set_compress);
-
+EXPORT_SYMBOL(dmu_request_arcbuf);
+EXPORT_SYMBOL(dmu_return_arcbuf);
+EXPORT_SYMBOL(dmu_assign_arcbuf);
+EXPORT_SYMBOL(dmu_buf_hold);
 EXPORT_SYMBOL(dmu_ot);
+
+module_param(zfs_mdcomp_disable, int, 0644);
+MODULE_PARM_DESC(zfs_mdcomp_disable, "Disable meta data compression");
 #endif