[Qemu-devel] [PATCH 0/4] Block DMA helpers (v2)

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2)
@ 2009-02-05  9:33 Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 1/4] Add a scatter-gather list type and accessors Avi Kivity
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-05  9:33 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

The current dma mapping API is fairly complex to use: mapping can fail
when bounce resources are exhausted, requiring the caller to reschedule,
and the interface is fairly low level.

This patchset introduces block device oriented helpers to centralize this
logic.  Devices need only submit a scatter/gather list, and the helpers
will do all the dirty work of mapping, rescheduling, and unmapping.  IDE
has been converted to the new API.

Changes from v1:
- added license and copyright for new files

Avi Kivity (4):
  Add a scatter-gather list type and accessors
  Add qemu_iovec_reset()
  Introduce block dma helpers
  Convert IDE to use new dma helpers

 Makefile.target |    2 +-
 cutils.c        |    6 ++
 dma-helpers.c   |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dma.h           |   41 +++++++++++++++
 hw/ide.c        |   76 ++++-----------------------
 qemu-common.h   |    1 +
 6 files changed, 214 insertions(+), 67 deletions(-)
 create mode 100644 dma-helpers.c
 create mode 100644 dma.h

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 1/4] Add a scatter-gather list type and accessors
  2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
@ 2009-02-05  9:33 ` Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 2/4] Add qemu_iovec_reset() Avi Kivity
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-05  9:33 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

Scatter-gather lists are used extensively in dma-capable devices; a
single data structure allows more code reuse later on.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Makefile.target |    2 +-
 dma-helpers.c   |   38 ++++++++++++++++++++++++++++++++++++++
 dma.h           |   33 +++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletions(-)
 create mode 100644 dma-helpers.c
 create mode 100644 dma.h

diff --git a/Makefile.target b/Makefile.target
index 372d185..28ba17f 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -500,7 +500,7 @@ endif #CONFIG_BSD_USER
 # System emulator target
 ifndef CONFIG_USER_ONLY
 
-OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o
+OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o dma-helpers.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio.o virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/dma-helpers.c b/dma-helpers.c
new file mode 100644
index 0000000..11ad3a4
--- /dev/null
+++ b/dma-helpers.c
@@ -0,0 +1,38 @@
+/*
+ * DMA helper functions
+ *
+ * Copyright (c) 2009 Red Hat
+ *
+ * This work is licensed under the terms of the GNU General Public License
+ * (GNU GPL), version 2 or later.
+ */
+
+#include "dma.h"
+
+
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
+{
+    qsg->sg = qemu_malloc(alloc_hint * sizeof(ScatterGatherEntry));
+    qsg->nsg = 0;
+    qsg->nalloc = alloc_hint;
+    qsg->size = 0;
+}
+
+void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
+                     target_phys_addr_t len)
+{
+    if (qsg->nsg == qsg->nalloc) {
+        qsg->nalloc = 2 * qsg->nalloc + 1;
+        qsg->sg = qemu_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
+    }
+    qsg->sg[qsg->nsg].base = base;
+    qsg->sg[qsg->nsg].len = len;
+    qsg->size += len;
+    ++qsg->nsg;
+}
+
+void qemu_sglist_destroy(QEMUSGList *qsg)
+{
+    qemu_free(qsg->sg);
+}
+
diff --git a/dma.h b/dma.h
new file mode 100644
index 0000000..16af50f
--- /dev/null
+++ b/dma.h
@@ -0,0 +1,33 @@
+/*
+ * DMA helper functions
+ *
+ * Copyright (c) 2009 Red Hat
+ *
+ * This work is licensed under the terms of the GNU General Public License
+ * (GNU GPL), version 2 or later.
+ */
+
+#ifndef DMA_H
+#define DMA_H
+
+#include <stdio.h>
+#include "cpu.h"
+
+typedef struct {
+    target_phys_addr_t base;
+    target_phys_addr_t len;
+} ScatterGatherEntry;
+
+typedef struct {
+    ScatterGatherEntry *sg;
+    int nsg;
+    int nalloc;
+    target_phys_addr_t size;
+} QEMUSGList;
+
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
+void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
+                     target_phys_addr_t len);
+void qemu_sglist_destroy(QEMUSGList *qsg);
+
+#endif
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 2/4] Add qemu_iovec_reset()
  2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 1/4] Add a scatter-gather list type and accessors Avi Kivity
@ 2009-02-05  9:33 ` Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 3/4] Introduce block dma helpers Avi Kivity
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-05  9:33 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

Add a helper to zero out an existing iovec.  Removes the need to deallocate
and reallocate it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 cutils.c      |    6 ++++++
 qemu-common.h |    1 +
 2 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/cutils.c b/cutils.c
index 1090aa4..658746b 100644
--- a/cutils.c
+++ b/cutils.c
@@ -129,6 +129,12 @@ void qemu_iovec_destroy(QEMUIOVector *qiov)
     qemu_free(qiov->iov);
 }
 
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+    qiov->niov = 0;
+    qiov->size = 0;
+}
+
 void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
 {
     uint8_t *p = (uint8_t *)buf;
diff --git a/qemu-common.h b/qemu-common.h
index 8aef558..db33493 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -201,6 +201,7 @@ typedef struct QEMUIOVector {
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
+void qemu_iovec_reset(QEMUIOVector *qiov);
 void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
 void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
 
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 3/4] Introduce block dma helpers
  2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 1/4] Add a scatter-gather list type and accessors Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 2/4] Add qemu_iovec_reset() Avi Kivity
@ 2009-02-05  9:33 ` Avi Kivity
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 4/4] Convert IDE to use new " Avi Kivity
  2009-02-05 21:25 ` [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2) Anthony Liguori
  4 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-05  9:33 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

These helpers perform read/write requests on entire scatter/gather lists,
relieving the device emulation code from mapping and unmapping physical
memory, and from looping when map resources are exhausted.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 dma-helpers.c |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 dma.h         |    8 ++++
 2 files changed, 126 insertions(+), 1 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 11ad3a4..b2ade19 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -8,7 +8,7 @@
  */
 
 #include "dma.h"
-
+#include "block_int.h"
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
 {
@@ -36,3 +36,120 @@ void qemu_sglist_destroy(QEMUSGList *qsg)
     qemu_free(qsg->sg);
 }
 
+typedef struct {
+    BlockDriverState *bs;
+    BlockDriverAIOCB *acb;
+    QEMUSGList *sg;
+    uint64_t sector_num;
+    int is_write;
+    int sg_cur_index;
+    target_phys_addr_t sg_cur_byte;
+    QEMUIOVector iov;
+    QEMUBH *bh;
+} DMABlockState;
+
+static void dma_bdrv_cb(void *opaque, int ret);
+
+static void reschedule_dma(void *opaque)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+
+    qemu_bh_delete(dbs->bh);
+    dbs->bh = NULL;
+    dma_bdrv_cb(opaque, 0);
+}
+
+static void continue_after_map_failure(void *opaque)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+
+    dbs->bh = qemu_bh_new(reschedule_dma, dbs);
+    qemu_bh_schedule(dbs->bh);
+}
+
+static void dma_bdrv_cb(void *opaque, int ret)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+    target_phys_addr_t cur_addr, cur_len;
+    void *mem;
+    int i;
+
+    dbs->sector_num += dbs->iov.size / 512;
+    for (i = 0; i < dbs->iov.niov; ++i) {
+        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
+                                  dbs->iov.iov[i].iov_len, !dbs->is_write,
+                                  dbs->iov.iov[i].iov_len);
+    }
+    qemu_iovec_reset(&dbs->iov);
+
+    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
+        dbs->acb->cb(dbs->acb->opaque, ret);
+        qemu_iovec_destroy(&dbs->iov);
+        qemu_aio_release(dbs->acb);
+        qemu_free(dbs);
+        return;
+    }
+
+    while (dbs->sg_cur_index < dbs->sg->nsg) {
+        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
+        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
+        mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write);
+        if (!mem)
+            break;
+        qemu_iovec_add(&dbs->iov, mem, cur_len);
+        dbs->sg_cur_byte += cur_len;
+        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
+            dbs->sg_cur_byte = 0;
+            ++dbs->sg_cur_index;
+        }
+    }
+
+    if (dbs->iov.size == 0) {
+        cpu_register_map_client(dbs, continue_after_map_failure);
+        return;
+    }
+
+    if (dbs->is_write) {
+        bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
+                        dbs->iov.size / 512, dma_bdrv_cb, dbs);
+    } else {
+        bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
+                       dbs->iov.size / 512, dma_bdrv_cb, dbs);
+    }
+}
+
+static BlockDriverAIOCB *dma_bdrv_io(
+    BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
+    BlockDriverCompletionFunc *cb, void *opaque,
+    int is_write)
+{
+    DMABlockState *dbs = qemu_malloc(sizeof(*dbs));
+
+    dbs->bs = bs;
+    dbs->acb = qemu_aio_get(bs, cb, opaque);
+    dbs->sg = sg;
+    dbs->sector_num = sector_num;
+    dbs->sg_cur_index = 0;
+    dbs->sg_cur_byte = 0;
+    dbs->is_write = is_write;
+    dbs->bh = NULL;
+    qemu_iovec_init(&dbs->iov, sg->nsg);
+    dma_bdrv_cb(dbs, 0);
+    return dbs->acb;
+}
+
+
+BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
+                                QEMUSGList *sg, uint64_t sector,
+                                void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_bdrv_io(bs, sg, sector, cb, opaque, 0);
+}
+
+BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
+                                 QEMUSGList *sg, uint64_t sector,
+                                 void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_bdrv_io(bs, sg, sector, cb, opaque, 1);
+}
+
diff --git a/dma.h b/dma.h
index 16af50f..d596717 100644
--- a/dma.h
+++ b/dma.h
@@ -12,6 +12,7 @@
 
 #include <stdio.h>
 #include "cpu.h"
+#include "block.h"
 
 typedef struct {
     target_phys_addr_t base;
@@ -30,4 +31,11 @@ void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
                      target_phys_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 
+BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
+                                QEMUSGList *sg, uint64_t sector,
+                                BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
+                                 QEMUSGList *sg, uint64_t sector,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+
 #endif
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 4/4] Convert IDE to use new dma helpers
  2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
                   ` (2 preceding siblings ...)
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 3/4] Introduce block dma helpers Avi Kivity
@ 2009-02-05  9:33 ` Avi Kivity
  2009-02-05 21:25 ` [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2) Anthony Liguori
  4 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-05  9:33 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

Use the new dma block helpers to perform dma disk I/O.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 hw/ide.c |   76 ++++++++-----------------------------------------------------
 1 files changed, 10 insertions(+), 66 deletions(-)

diff --git a/hw/ide.c b/hw/ide.c
index bcaee46..1db0bcd 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -33,6 +33,7 @@
 #include "ppc_mac.h"
 #include "mac_dbdma.h"
 #include "sh.h"
+#include "dma.h"
 
 /* debug IDE devices */
 //#define DEBUG_IDE
@@ -423,7 +424,7 @@ typedef struct IDEState {
     int atapi_dma; /* true if dma is requested for the packet cmd */
     /* ATA DMA state */
     int io_buffer_size;
-    QEMUIOVector iovec;
+    QEMUSGList sg;
     /* PIO transfer handling */
     int req_nb_sectors; /* number of sectors per interrupt */
     EndTransferFunc *end_transfer_func;
@@ -876,10 +877,8 @@ static int dma_buf_prepare(BMDMAState *bm, int is_write)
         uint32_t size;
     } prd;
     int l, len;
-    void *mem;
-    target_phys_addr_t l1;
 
-    qemu_iovec_init(&s->iovec, s->nsector / (TARGET_PAGE_SIZE/512) + 1);
+    qemu_sglist_init(&s->sg, s->nsector / (TARGET_PAGE_SIZE/512) + 1);
     s->io_buffer_size = 0;
     for(;;) {
         if (bm->cur_prd_len == 0) {
@@ -900,15 +899,10 @@ static int dma_buf_prepare(BMDMAState *bm, int is_write)
         }
         l = bm->cur_prd_len;
         if (l > 0) {
-            l1 = l;
-            mem = cpu_physical_memory_map(bm->cur_prd_addr, &l1, is_write);
-            if (!mem) {
-                break;
-            }
-            qemu_iovec_add(&s->iovec, mem, l1);
-            bm->cur_prd_addr += l1;
-            bm->cur_prd_len -= l1;
-            s->io_buffer_size += l1;
+            qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
+            bm->cur_prd_addr += l;
+            bm->cur_prd_len -= l;
+            s->io_buffer_size += l;
         }
     }
     return 1;
@@ -916,14 +910,7 @@ static int dma_buf_prepare(BMDMAState *bm, int is_write)
 
 static void dma_buf_commit(IDEState *s, int is_write)
 {
-    int i;
-
-    for (i = 0; i < s->iovec.niov; ++i) {
-        cpu_physical_memory_unmap(s->iovec.iov[i].iov_base,
-                                  s->iovec.iov[i].iov_len, is_write,
-                                  s->iovec.iov[i].iov_len);
-    }
-    qemu_iovec_destroy(&s->iovec);
+    qemu_sglist_destroy(&s->sg);
 }
 
 static void ide_dma_error(IDEState *s)
@@ -1006,39 +993,6 @@ static int dma_buf_rw(BMDMAState *bm, int is_write)
     return 1;
 }
 
-typedef struct {
-    BMDMAState *bm;
-    void (*cb)(void *opaque, int ret);
-    QEMUBH *bh;
-} MapFailureContinuation;
-
-static void reschedule_dma(void *opaque)
-{
-    MapFailureContinuation *cont = opaque;
-
-    cont->cb(cont->bm, 0);
-    qemu_bh_delete(cont->bh);
-    qemu_free(cont);
-}
-
-static void continue_after_map_failure(void *opaque)
-{
-    MapFailureContinuation *cont = opaque;
-
-    cont->bh = qemu_bh_new(reschedule_dma, opaque);
-    qemu_bh_schedule(cont->bh);
-}
-
-static void wait_for_bounce_buffer(BMDMAState *bmdma,
-                                   void (*cb)(void *opaque, int ret))
-{
-    MapFailureContinuation *cont = qemu_malloc(sizeof(*cont));
-
-    cont->bm = bmdma;
-    cont->cb = cb;
-    cpu_register_map_client(cont, continue_after_map_failure);
-}
-
 static void ide_read_dma_cb(void *opaque, int ret)
 {
     BMDMAState *bm = opaque;
@@ -1080,15 +1034,10 @@ static void ide_read_dma_cb(void *opaque, int ret)
     s->io_buffer_size = n * 512;
     if (dma_buf_prepare(bm, 1) == 0)
         goto eot;
-    if (!s->iovec.niov) {
-        wait_for_bounce_buffer(bm, ide_read_dma_cb);
-        return;
-    }
 #ifdef DEBUG_AIO
     printf("aio_read: sector_num=%" PRId64 " n=%d\n", sector_num, n);
 #endif
-    bm->aiocb = bdrv_aio_readv(s->bs, sector_num, &s->iovec, n,
-                               ide_read_dma_cb, bm);
+    bm->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, ide_read_dma_cb, bm);
     ide_dma_submit_check(s, ide_read_dma_cb, bm);
 }
 
@@ -1209,15 +1158,10 @@ static void ide_write_dma_cb(void *opaque, int ret)
     /* launch next transfer */
     if (dma_buf_prepare(bm, 0) == 0)
         goto eot;
-    if (!s->iovec.niov) {
-        wait_for_bounce_buffer(bm, ide_write_dma_cb);
-        return;
-    }
 #ifdef DEBUG_AIO
     printf("aio_write: sector_num=%" PRId64 " n=%d\n", sector_num, n);
 #endif
-    bm->aiocb = bdrv_aio_writev(s->bs, sector_num, &s->iovec, n,
-                                ide_write_dma_cb, bm);
+    bm->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, ide_write_dma_cb, bm);
     ide_dma_submit_check(s, ide_write_dma_cb, bm);
 }
 
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
                   ` (3 preceding siblings ...)
  2009-02-05  9:33 ` [Qemu-devel] [PATCH 4/4] Convert IDE to use new " Avi Kivity
@ 2009-02-05 21:25 ` Anthony Liguori
  2009-02-06  8:52   ` Avi Kivity
  4 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2009-02-05 21:25 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Avi Kivity wrote:
> The current dma mapping API is fairly complex to use: mapping can fail
> when bounce resources are exhausted, requiring the caller to reschedule,
> and the interface is fairly low level.
>
> This patchset introduces block device oriented helpers to centralize this
> logic.  Devices need only submit a scatter/gather list, and the helpers
> will do all the dirty work of mapping, rescheduling, and unmapping.  IDE
> has been converted to the new API.
>   

Applied all.  Thanks.

BTW, what are your plans for virtio?  I'd reckon that just trying to map 
the full SG list and failing gracefully if unable would be sufficient.  
You could also convert the element to contain a QEMUSGList and then make 
use of the IO helpers in virtio-blk and virtio-net.  It just gets a 
little funky for simpler things like virtio-console and virtio-balloon.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-05 21:25 ` [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2) Anthony Liguori
@ 2009-02-06  8:52   ` Avi Kivity
  2009-02-06 15:12     ` Anthony Liguori
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2009-02-06  8:52 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

Anthony Liguori wrote:
> BTW, what are your plans for virtio?  

To hope someone else does it.

> I'd reckon that just trying to map the full SG list and failing 
> gracefully if unable would be sufficient.  You could also convert the 
> element to contain a QEMUSGList and then make use of the IO helpers in 
> virtio-blk and virtio-net.  It just gets a little funky for simpler 
> things like virtio-console and virtio-balloon.

One way is to convert the generic transport layer to use two QEMUSGLists 
(one for incoming, one for outgoing) and leave it at that.  virtio-blk 
could use the block dma helpers, virtio-net could do something else, and 
we'd add a qemu_sglist_to_buffer(sg, offset, len, buf) and 
qemu_sglist_from_buffer(sg, offset, len, buf) to allow low-bandwidth 
easy access to virtio data.

This requires that the generic layer be able to tell where a request 
ends; don't know if that's the case now.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-06  8:52   ` Avi Kivity
@ 2009-02-06 15:12     ` Anthony Liguori
  2009-02-07 21:16       ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2009-02-06 15:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Avi Kivity wrote:
> Anthony Liguori wrote:
>> BTW, what are your plans for virtio?  
>
> To hope someone else does it.
>
>> I'd reckon that just trying to map the full SG list and failing 
>> gracefully if unable would be sufficient.  You could also convert the 
>> element to contain a QEMUSGList and then make use of the IO helpers 
>> in virtio-blk and virtio-net.  It just gets a little funky for 
>> simpler things like virtio-console and virtio-balloon.
>
> One way is to convert the generic transport layer to use two 
> QEMUSGLists (one for incoming, one for outgoing) and leave it at 
> that.  virtio-blk could use the block dma helpers, virtio-net could do 
> something else, and we'd add a qemu_sglist_to_buffer(sg, offset, len, 
> buf) and qemu_sglist_from_buffer(sg, offset, len, buf) to allow 
> low-bandwidth easy access to virtio data.

Yeah, that's probably the best way of doing it.

>
> This requires that the generic layer be able to tell where a request 
> ends; don't know if that's the case now.

Maybe create a sub QEMUSGList from another QEMUSGList.  The other option 
is passing an offset and size to anything that takes a QEMUSGList.  You 
could potentially get smart with how sub SG lists were managed by making 
them just store an internal offset/size.  Although things start to get 
overly complex at some stage.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-06 15:12     ` Anthony Liguori
@ 2009-02-07 21:16       ` Avi Kivity
  2009-02-07 23:50         ` Anthony Liguori
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2009-02-07 21:16 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

Anthony Liguori wrote:
>>
>> This requires that the generic layer be able to tell where a request 
>> ends; don't know if that's the case now.
>
> Maybe create a sub QEMUSGList from another QEMUSGList.  The other 
> option is passing an offset and size to anything that takes a 
> QEMUSGList.  You could potentially get smart with how sub SG lists 
> were managed by making them just store an internal offset/size.  
> Although things start to get overly complex at some stage.

Lifetime issues will kill us.  I note that something resembling this 
sort of cleverness is holding up copyless networking -- different parts 
of an skb have different lifetimes.

But I thought of something simpler: have virtio call virtio-* to inquire 
whether a ring entry terminates a request.  This would also simplify the 
virtio device emulations somewhat.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-07 21:16       ` Avi Kivity
@ 2009-02-07 23:50         ` Anthony Liguori
  2009-02-08  9:30           ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2009-02-07 23:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Avi Kivity wrote:
> Anthony Liguori wrote:
>>>
>>> This requires that the generic layer be able to tell where a request 
>>> ends; don't know if that's the case now.
>>
>> Maybe create a sub QEMUSGList from another QEMUSGList.  The other 
>> option is passing an offset and size to anything that takes a 
>> QEMUSGList.  You could potentially get smart with how sub SG lists 
>> were managed by making them just store an internal offset/size.  
>> Although things start to get overly complex at some stage.
>
> Lifetime issues will kill us.  I note that something resembling this 
> sort of cleverness is holding up copyless networking -- different 
> parts of an skb have different lifetimes.

Yup.

> But I thought of something simpler: have virtio call virtio-* to 
> inquire whether a ring entry terminates a request.  This would also 
> simplify the virtio device emulations somewhat.

I don't know that I follow what you mean by "terminates a request".  I'm 
not sure I know what problem you're talking about solving.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-07 23:50         ` Anthony Liguori
@ 2009-02-08  9:30           ` Avi Kivity
  2009-02-08 17:59             ` Anthony Liguori
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2009-02-08  9:30 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

Anthony Liguori wrote:
>> But I thought of something simpler: have virtio call virtio-* to 
>> inquire whether a ring entry terminates a request.  This would also 
>> simplify the virtio device emulations somewhat.
>
> I don't know that I follow what you mean by "terminates a request".  

A request is composed of multiple virtio ring elements.  The last ring 
entry for a given request terminates it.

> I'm not sure I know what problem you're talking about solving.

The Linux guest interface is at the request level, accepting 
scatter/gather lists as parameters.  The qemu host interface is at the 
ring element level, leaving the task of gathering ring elements to make 
a request (and gathering segments to make a scatter/gather list) to users.

I think we should make the qemu virtio host interface talk at the 
request level, rather than the ring entry level.  If we do that, we'll 
get a cleaner, easier to use interface.

But I think I know what you're getting at: virtio sglists contain a 
descriptor followed by data.  We could handle that by adding

  qemu_sglist_sublist(QEMUSGList *sg, QEMUSGList *sublist, 
target_phys_addr_t offset, target_phys_addr_t len);

it would involve extra copying and allocation, but we could live with 
it, I think.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-08  9:30           ` Avi Kivity
@ 2009-02-08 17:59             ` Anthony Liguori
  2009-02-08 18:04               ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2009-02-08 17:59 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Avi Kivity wrote:
> Anthony Liguori wrote:
>>> But I thought of something simpler: have virtio call virtio-* to 
>>> inquire whether a ring entry terminates a request.  This would also 
>>> simplify the virtio device emulations somewhat.
>>
>> I don't know that I follow what you mean by "terminates a request".  
>
> A request is composed of multiple virtio ring elements.  The last ring 
> entry for a given request terminates it.
>
>> I'm not sure I know what problem you're talking about solving.
>
> The Linux guest interface is at the request level, accepting 
> scatter/gather lists as parameters.

Correct.   More accurately, it is:

    int (*add_buf)(struct virtqueue *vq,
               struct scatterlist sg[],
               unsigned int out_num,
               unsigned int in_num,
               void *data);

>   The qemu host interface is at the ring element level, leaving the 
> task of gathering ring elements to make a request (and gathering 
> segments to make a scatter/gather list) to users.

I don't think that's totally accurate.  Where the host and guest 
interface differ is that instead of having the symmetric version of 
this, namely:

vdev->buf_added(vq, sg, out_num, in_num, data);

Which then would require some sort of:

put_buf(vq, data, len)

We have a virtqueue_pop(elem) where elem contains all of the information 
passed to add_buf.  We have a virtqueue_push() function that's analogous 
to the above put_buf() and then we have a separate virtio_notify() 
function which would be the symmetric form of kick().  The names are 
different but the basic functions are the same.

Other than the difference of having split sg lists and using a 
structure, I don't see how they are different.

>
> I think we should make the qemu virtio host interface talk at the 
> request level, rather than the ring entry level.  If we do that, we'll 
> get a cleaner, easier to use interface.

Can you give an example?  I'm having a hard time seeing what you mean.

> But I think I know what you're getting at: virtio sglists contain a 
> descriptor followed by data.  We could handle that by adding
>
>  qemu_sglist_sublist(QEMUSGList *sg, QEMUSGList *sublist, 
> target_phys_addr_t offset, target_phys_addr_t len);
>
> it would involve extra copying and allocation, but we could live with 
> it, I think.

Yup.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-08 17:59             ` Anthony Liguori
@ 2009-02-08 18:04               ` Avi Kivity
  2009-02-08 19:04                 ` Anthony Liguori
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2009-02-08 18:04 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

Anthony Liguori wrote:
>>   The qemu host interface is at the ring element level, leaving the 
>> task of gathering ring elements to make a request (and gathering 
>> segments to make a scatter/gather list) to users.
>
> I don't think that's totally accurate.  Where the host and guest 
> interface differ is that instead of having the symmetric version of 
> this, namely:
>
> vdev->buf_added(vq, sg, out_num, in_num, data);
>
> Which then would require some sort of:
>
> put_buf(vq, data, len)
>
> We have a virtqueue_pop(elem) where elem contains all of the 
> information passed to add_buf.  We have a virtqueue_push() function 
> that's analogous to the above put_buf() and then we have a separate 
> virtio_notify() function which would be the symmetric form of kick().  
> The names are different but the basic functions are the same.
>
> Other than the difference of having split sg lists and using a 
> structure, I don't see how they are different.
>

You're right - I didn't dig deep enough.

We could replace

    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];

by QEMUSGList, which would be a minor improvement, but I thought things 
were very different for some reason.

>>
>> I think we should make the qemu virtio host interface talk at the 
>> request level, rather than the ring entry level.  If we do that, 
>> we'll get a cleaner, easier to use interface.
>
> Can you give an example?  I'm having a hard time seeing what you mean.

An example would be how the code is structured now (as opposed to how I 
thought it was structured).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2)
  2009-02-08 18:04               ` Avi Kivity
@ 2009-02-08 19:04                 ` Anthony Liguori
  0 siblings, 0 replies; 15+ messages in thread
From: Anthony Liguori @ 2009-02-08 19:04 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Avi Kivity wrote:
> We could replace
>
>    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
>    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
>
> by QEMUSGList, which would be a minor improvement, but I thought 
> things were very different for some reason.

I'm not sure that it's just a minor improvement.  To maintain this 
interface, we have to map the full request.  If we exposed QEMUSGLists, 
then each device could choose the mapping policy.  Since there's no good 
reason to use virtio to DMA MMIO memory, I'm unsure which is the best 
approach.

> An example would be how the code is structured now (as opposed to how 
> I thought it was structured).

Okay, I was certain I was missing something obvious :-)

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 0/4] Block DMA helpers
@ 2009-02-04 12:25 Avi Kivity
  2009-02-04 12:25 ` [Qemu-devel] [PATCH 3/4] Introduce block dma helpers Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2009-02-04 12:25 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

The current dma mapping API is fairly complex to use: mapping can fail
when bounce resources are exhausted, requiring the caller to reschedule,
and the interface is fairly low level.

This patchset introduces block device oriented helpers to centralize this
logic.  Devices need only submit a scatter/gather list, and the helpers
will do all the dirty work of mapping, rescheduling, and unmapping.  IDE
has been converted to the new API.

Avi Kivity (4):
  Add a scatter-gather list type and accessors
  Add qemu_iovec_reset()
  Introduce block dma helpers
  Convert IDE to use new dma helpers

 Makefile.target |    2 +-
 cutils.c        |    6 ++
 dma-helpers.c   |  146 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dma.h           |   32 ++++++++++++
 hw/ide.c        |   76 ++++-------------------------
 qemu-common.h   |    1 +
 6 files changed, 196 insertions(+), 67 deletions(-)
 create mode 100644 dma-helpers.c
 create mode 100644 dma.h

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 3/4] Introduce block dma helpers
  2009-02-04 12:25 [Qemu-devel] [PATCH 0/4] Block DMA helpers Avi Kivity
@ 2009-02-04 12:25 ` Avi Kivity
  0 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2009-02-04 12:25 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori

These helpers perform read/write requests on entire scatter/gather lists,
relieving the device emulation code from mapping and unmapping physical
memory, and from looping when map resources are exhausted.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 dma-helpers.c |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 dma.h         |    8 ++++
 2 files changed, 126 insertions(+), 1 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 315834e..9679f5d 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -1,5 +1,5 @@
 #include "dma.h"
-
+#include "block_int.h"
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
 {
@@ -27,3 +27,120 @@ void qemu_sglist_destroy(QEMUSGList *qsg)
     qemu_free(qsg->sg);
 }
 
+typedef struct {
+    BlockDriverState *bs;
+    BlockDriverAIOCB *acb;
+    QEMUSGList *sg;
+    uint64_t sector_num;
+    int is_write;
+    int sg_cur_index;
+    target_phys_addr_t sg_cur_byte;
+    QEMUIOVector iov;
+    QEMUBH *bh;
+} DMABlockState;
+
+static void dma_bdrv_cb(void *opaque, int ret);
+
+static void reschedule_dma(void *opaque)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+
+    qemu_bh_delete(dbs->bh);
+    dbs->bh = NULL;
+    dma_bdrv_cb(opaque, 0);
+}
+
+static void continue_after_map_failure(void *opaque)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+
+    dbs->bh = qemu_bh_new(reschedule_dma, dbs);
+    qemu_bh_schedule(dbs->bh);
+}
+
+static void dma_bdrv_cb(void *opaque, int ret)
+{
+    DMABlockState *dbs = (DMABlockState *)opaque;
+    target_phys_addr_t cur_addr, cur_len;
+    void *mem;
+    int i;
+
+    dbs->sector_num += dbs->iov.size / 512;
+    for (i = 0; i < dbs->iov.niov; ++i) {
+        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
+                                  dbs->iov.iov[i].iov_len, !dbs->is_write,
+                                  dbs->iov.iov[i].iov_len);
+    }
+    qemu_iovec_reset(&dbs->iov);
+
+    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
+        dbs->acb->cb(dbs->acb->opaque, ret);
+        qemu_iovec_destroy(&dbs->iov);
+        qemu_aio_release(dbs->acb);
+        qemu_free(dbs);
+        return;
+    }
+
+    while (dbs->sg_cur_index < dbs->sg->nsg) {
+        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
+        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
+        mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write);
+        if (!mem)
+            break;
+        qemu_iovec_add(&dbs->iov, mem, cur_len);
+        dbs->sg_cur_byte += cur_len;
+        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
+            dbs->sg_cur_byte = 0;
+            ++dbs->sg_cur_index;
+        }
+    }
+
+    if (dbs->iov.size == 0) {
+        cpu_register_map_client(dbs, continue_after_map_failure);
+        return;
+    }
+
+    if (dbs->is_write) {
+        bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
+                        dbs->iov.size / 512, dma_bdrv_cb, dbs);
+    } else {
+        bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
+                       dbs->iov.size / 512, dma_bdrv_cb, dbs);
+    }
+}
+
+static BlockDriverAIOCB *dma_bdrv_io(
+    BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
+    BlockDriverCompletionFunc *cb, void *opaque,
+    int is_write)
+{
+    DMABlockState *dbs = qemu_malloc(sizeof(*dbs));
+
+    dbs->bs = bs;
+    dbs->acb = qemu_aio_get(bs, cb, opaque);
+    dbs->sg = sg;
+    dbs->sector_num = sector_num;
+    dbs->sg_cur_index = 0;
+    dbs->sg_cur_byte = 0;
+    dbs->is_write = is_write;
+    dbs->bh = NULL;
+    qemu_iovec_init(&dbs->iov, sg->nsg);
+    dma_bdrv_cb(dbs, 0);
+    return dbs->acb;
+}
+
+
+BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
+                                QEMUSGList *sg, uint64_t sector,
+                                void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_bdrv_io(bs, sg, sector, cb, opaque, 0);
+}
+
+BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
+                                 QEMUSGList *sg, uint64_t sector,
+                                 void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_bdrv_io(bs, sg, sector, cb, opaque, 1);
+}
+
diff --git a/dma.h b/dma.h
index 3b56fa6..5ce54fb 100644
--- a/dma.h
+++ b/dma.h
@@ -3,6 +3,7 @@
 
 #include <stdio.h>
 #include "cpu.h"
+#include "block.h"
 
 typedef struct {
     target_phys_addr_t base;
@@ -21,4 +22,11 @@ void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
                      target_phys_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 
+BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
+                                QEMUSGList *sg, uint64_t sector,
+                                BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
+                                 QEMUSGList *sg, uint64_t sector,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+
 #endif
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-02-08 19:05 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-05  9:33 [Qemu-devel] [PATCH 0/4] Block DMA helpers (v2) Avi Kivity
2009-02-05  9:33 ` [Qemu-devel] [PATCH 1/4] Add a scatter-gather list type and accessors Avi Kivity
2009-02-05  9:33 ` [Qemu-devel] [PATCH 2/4] Add qemu_iovec_reset() Avi Kivity
2009-02-05  9:33 ` [Qemu-devel] [PATCH 3/4] Introduce block dma helpers Avi Kivity
2009-02-05  9:33 ` [Qemu-devel] [PATCH 4/4] Convert IDE to use new " Avi Kivity
2009-02-05 21:25 ` [Qemu-devel] Re: [PATCH 0/4] Block DMA helpers (v2) Anthony Liguori
2009-02-06  8:52   ` Avi Kivity
2009-02-06 15:12     ` Anthony Liguori
2009-02-07 21:16       ` Avi Kivity
2009-02-07 23:50         ` Anthony Liguori
2009-02-08  9:30           ` Avi Kivity
2009-02-08 17:59             ` Anthony Liguori
2009-02-08 18:04               ` Avi Kivity
2009-02-08 19:04                 ` Anthony Liguori
  -- strict thread matches above, loose matches on Subject: below --
2009-02-04 12:25 [Qemu-devel] [PATCH 0/4] Block DMA helpers Avi Kivity
2009-02-04 12:25 ` [Qemu-devel] [PATCH 3/4] Introduce block dma helpers Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).