qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Blue Swirl" <blauwirbel@gmail.com>
To: qemu-devel@nongnu.org
Cc: Paul Brook <paul@codesourcery.com>
Subject: Re: [Qemu-devel] Faster, generic IO/DMA model with vectored AIO?
Date: Tue, 30 Oct 2007 22:09:10 +0200	[thread overview]
Message-ID: <f43fc5580710301309p35d83da0r81f767bbcd1967df@mail.gmail.com> (raw)
In-Reply-To: <f43fc5580710291233l703e6c11yc9a6e7cc8d690e5@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1206 bytes --]

On 10/29/07, Blue Swirl <blauwirbel@gmail.com> wrote:
> We could cache the resolved addresses to overcome the additional setup
> overhead. Each stage should install cache invalidation callbacks or a
> method to call for recalculation of the addresses. For example IOMMU
> or ESPDMA mappings change very often.

I meant to write "don't change very often."

This version actually resolves the host memory address so that
scsi-disk could (with some additional plumbing) write directly to
final destination.

I think both pre- and postprocessing hooks may be needed, but those
are not implemented yet.

What about error handling? For example, first page is OK but second is
not. Truncate all further blocks and install a post-processing hook
that raises a bus error?

Example output:
esp
DMADriverAIOCB 0x27433f0
IO ranges:
base 0000000000000000 len 0000000000000800
Prehooks:
Posthooks:
espdma
DMADriverAIOCB 0x27433f0
IO ranges:
base 00000000fe00000a len 0000000000000800
Prehooks:
Posthooks:
iommu
DMADriverAIOCB 0x27433f0
IO ranges:
base 0000000007fe100a len 0000000000000800
Prehooks:
Posthooks:
physical
DMADriverAIOCB 0x27433f0
IO ranges:
base 00002b8e6f82200a len 0000000000000800
Prehooks:
Posthooks:

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: gdma_aiov.diff --]
[-- Type: text/x-diff; name=gdma_aiov.diff, Size: 16048 bytes --]

Index: qemu/vl.h
===================================================================
--- qemu.orig/vl.h	2007-10-29 16:59:37.000000000 +0000
+++ qemu/vl.h	2007-10-30 19:08:35.000000000 +0000
@@ -746,6 +746,109 @@
 
 #include "hw/irq.h"
 
+/* Generic DMA API */
+
+typedef void DMADriverCompletionFunc(void *opaque, int ret);
+
+typedef struct qemu_iolist {
+    target_phys_addr_t iov_base;
+    target_phys_addr_t iov_len;
+    struct qemu_iolist *next;
+} qemu_iolist;
+
+typedef struct DMADriverAIOCB DMADriverAIOCB;
+
+typedef DMADriverAIOCB *
+DMATranslationHandler(void *opaque, DMADriverAIOCB *request, int is_write);
+
+typedef struct DMACompletionEntry {
+    DMATranslationHandler *func;
+    void *opaque;
+    struct DMACompletionEntry *next;
+} DMACompletionEntry;
+
+struct DMADriverAIOCB {
+    qemu_iolist *iolist;
+    DMACompletionEntry *prehook;
+    DMACompletionEntry *posthook;
+    struct DMADriverAIOCB *next;
+};
+
+typedef struct qemu_bus {
+    unsigned int bus_bits;
+    DMATranslationHandler *north_handler;
+    void *north_handler_opaque;
+    DMATranslationHandler *south_handler;
+    void *south_handler_opaque;
+} qemu_bus;
+
+static inline qemu_bus *
+bus_init(unsigned int bus_bits,
+         DMATranslationHandler north_handler,
+         void *north_handler_opaque,
+         DMATranslationHandler south_handler,
+         void *south_handler_opaque)
+{
+    qemu_bus *bus;
+
+    bus = qemu_mallocz(sizeof(qemu_bus));
+    bus->bus_bits = bus_bits;
+    bus->north_handler = north_handler;
+    bus->north_handler_opaque = north_handler_opaque;
+    bus->south_handler = south_handler;
+    bus->south_handler_opaque = south_handler_opaque;
+    return bus;
+}
+
+/* Direction CPU->bridge->device/memory */
+static inline DMADriverAIOCB *
+bus_translate_south(qemu_bus *bus, DMADriverAIOCB *request, int is_write)
+{
+    return bus->south_handler(bus->south_handler_opaque, request, is_write);
+}
+
+/* From device towards CPU/memory (DMA) */
+static inline DMADriverAIOCB *
+bus_translate_north(qemu_bus *bus, DMADriverAIOCB *request, int is_write)
+{
+    return bus->north_handler(bus->north_handler_opaque, request, is_write);
+}
+
+static inline DMADriverAIOCB *
+bus_build_aiocb(target_phys_addr_t addr, target_phys_addr_t len)
+{
+    DMADriverAIOCB *d;
+
+    d = qemu_mallocz(sizeof(DMADriverAIOCB));
+    d->iolist = qemu_mallocz(sizeof(qemu_iolist));
+    d->iolist->iov_base = addr;
+    d->iolist->iov_len = len;
+    return d;
+}
+
+#if 1 || DEBUG_GDMA
+static inline void
+bus_dump_aiocb(DMADriverAIOCB *d)
+{
+    qemu_iolist *io;
+    DMACompletionEntry *e;
+
+    fprintf(stderr, "DMADriverAIOCB %p\nIO ranges:\n", d);
+    for (io = d->iolist; io != NULL; io = io->next) {
+        fprintf(stderr, "base " TARGET_FMT_plx " len " TARGET_FMT_plx "\n",
+                io->iov_base, io->iov_len);
+    }
+    fprintf(stderr, "Prehooks:\n");
+    for (e = d->prehook; e != NULL; e = e->next) {
+        fprintf(stderr, "func %p opaque %p\n", e->func, e->opaque);
+    }
+    fprintf(stderr, "Posthooks:\n");
+    for (e = d->posthook; e != NULL; e = e->next) {
+        fprintf(stderr, "func %p opaque %p\n", e->func, e->opaque);
+    }
+}
+#endif
+
 /* ISA bus */
 
 extern target_phys_addr_t isa_mem_base;
@@ -1253,7 +1356,8 @@
 extern QEMUMachine ss5_machine, ss10_machine;
 
 /* iommu.c */
-void *iommu_init(target_phys_addr_t addr);
+void *iommu_init(target_phys_addr_t addr, qemu_bus *parent_bus,
+                 qemu_bus **bus);
 void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr,
                                  uint8_t *buf, int len, int is_write);
 static inline void sparc_iommu_memory_read(void *opaque,
@@ -1308,11 +1412,13 @@
 /* esp.c */
 void esp_scsi_attach(void *opaque, BlockDriverState *bd, int id);
 void *esp_init(BlockDriverState **bd, target_phys_addr_t espaddr,
-               void *dma_opaque, qemu_irq irq, qemu_irq *reset);
+               void *dma_opaque, qemu_irq irq, qemu_irq *reset,
+               qemu_bus *parent_bus, qemu_bus **bus);
 
 /* sparc32_dma.c */
 void *sparc32_dma_init(target_phys_addr_t daddr, qemu_irq parent_irq,
-                       void *iommu, qemu_irq **dev_irq, qemu_irq **reset);
+                       void *iommu, qemu_irq **dev_irq, qemu_irq **reset,
+                       qemu_bus *parent_bus, qemu_bus **bus);
 void ledma_memory_read(void *opaque, target_phys_addr_t addr,
                        uint8_t *buf, int len, int do_bswap);
 void ledma_memory_write(void *opaque, target_phys_addr_t addr,
@@ -1428,6 +1534,8 @@
    scsi_{read,write}_data.  */
 void scsi_read_data(SCSIDevice *s, uint32_t tag);
 int scsi_write_data(SCSIDevice *s, uint32_t tag);
+void scsi_read_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d);
+int scsi_write_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d);
 void scsi_cancel_io(SCSIDevice *s, uint32_t tag);
 uint8_t *scsi_get_buf(SCSIDevice *s, uint32_t tag);
 
Index: qemu/hw/sun4m.c
===================================================================
--- qemu.orig/hw/sun4m.c	2007-10-29 16:59:37.000000000 +0000
+++ qemu/hw/sun4m.c	2007-10-30 19:09:12.000000000 +0000
@@ -306,6 +306,32 @@
     env->halted = 1;
 }
 
+static DMADriverAIOCB *
+physical_memory_bus_translate_north(void *opaque, DMADriverAIOCB *request,
+                                    int is_write)
+{
+    qemu_iolist *io;
+
+    for (io = request->iolist; io != NULL; io = io->next) {
+        if (io->iov_base < phys_ram_size)
+            io->iov_base += (unsigned long)phys_ram_base;
+        else
+            io->iov_len = 0;
+    }
+    fprintf(stderr, "physical\n");
+    bus_dump_aiocb(request);
+    return request;
+}
+
+static DMADriverAIOCB *
+physical_memory_bus_translate_south(void *opaque,
+                                    DMADriverAIOCB *request,
+                                    int is_write)
+{
+    // Does not exist?
+    return request;
+}
+
 static void *sun4m_hw_init(const struct hwdef *hwdef, int RAM_size,
                            DisplayState *ds, const char *cpu_model)
 
@@ -317,6 +343,8 @@
     qemu_irq *cpu_irqs[MAX_CPUS], *slavio_irq, *slavio_cpu_irq,
         *espdma_irq, *ledma_irq;
     qemu_irq *esp_reset, *le_reset;
+    qemu_bus *memory_bus, *iommu_bus, *espdma_bus, *ledma_bus, *esp_bus,
+        *scsi_bus;
 
     /* init CPUs */
     sparc_find_by_name(cpu_model, &def);
@@ -345,7 +373,12 @@
     /* allocate RAM */
     cpu_register_physical_memory(0, RAM_size, 0);
 
-    iommu = iommu_init(hwdef->iommu_base);
+    memory_bus = bus_init(TARGET_PHYS_ADDR_BITS,
+                          physical_memory_bus_translate_north,
+                          NULL,
+                          physical_memory_bus_translate_south,
+                          NULL);
+    iommu = iommu_init(hwdef->iommu_base, memory_bus, &iommu_bus);
     slavio_intctl = slavio_intctl_init(hwdef->intctl_base,
                                        hwdef->intctl_base + 0x10000ULL,
                                        &hwdef->intbit_to_level[0],
@@ -354,11 +387,12 @@
                                        hwdef->clock_irq);
 
     espdma = sparc32_dma_init(hwdef->dma_base, slavio_irq[hwdef->esp_irq],
-                              iommu, &espdma_irq, &esp_reset);
+                              iommu, &espdma_irq, &esp_reset, iommu_bus,
+                              &espdma_bus);
 
     ledma = sparc32_dma_init(hwdef->dma_base + 16ULL,
                              slavio_irq[hwdef->le_irq], iommu, &ledma_irq,
-                             &le_reset);
+                             &le_reset, iommu_bus, &ledma_bus);
 
     if (graphic_depth != 8 && graphic_depth != 24) {
         fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth);
@@ -392,7 +426,7 @@
     fdctrl_init(slavio_irq[hwdef->fd_irq], 0, 1, hwdef->fd_base, fd_table);
 
     main_esp = esp_init(bs_table, hwdef->esp_base, espdma, *espdma_irq,
-                        esp_reset);
+                        esp_reset, espdma_bus, &esp_bus);
 
     for (i = 0; i < MAX_DISKS; i++) {
         if (bs_table[i]) {
Index: qemu/hw/iommu.c
===================================================================
--- qemu.orig/hw/iommu.c	2007-10-29 16:59:37.000000000 +0000
+++ qemu/hw/iommu.c	2007-10-30 19:40:15.000000000 +0000
@@ -104,6 +104,7 @@
     target_phys_addr_t addr;
     uint32_t regs[IOMMU_NREGS];
     target_phys_addr_t iostart;
+    qemu_bus *bus;
 } IOMMUState;
 
 static uint32_t iommu_mem_readw(void *opaque, target_phys_addr_t addr)
@@ -244,6 +245,64 @@
     s->regs[IOMMU_AFAR] = addr;
 }
 
+static DMADriverAIOCB *
+iommu_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    IOMMUState *s = opaque;
+    qemu_iolist *io, *new_io, *next_io;
+    uint32_t flags;
+    int l, first;
+    target_phys_addr_t addr, prev_addr, len, page, phys_addr;
+
+    for (io = request->iolist; io != NULL; io = next_io) {
+        addr = io->iov_base;
+        len = io->iov_len;
+        next_io = io->next;
+        first = 1;
+        while (len > 0) {
+            page = addr & TARGET_PAGE_MASK;
+            l = (page + TARGET_PAGE_SIZE) - addr;
+            if (l > len)
+                l = len;
+            flags = iommu_page_get_flags(s, page);
+            if (!(flags & IOPTE_VALID)) {
+                return NULL;
+            }
+            if (is_write) {
+                if (!(flags & IOPTE_WRITE)) {
+                    return NULL;
+                }
+            }
+            phys_addr = iommu_translate_pa(s, addr, flags);
+            if (!first && addr != (prev_addr + TARGET_PAGE_SIZE)) {
+                new_io = qemu_mallocz(sizeof(qemu_iolist));
+                new_io->iov_base = phys_addr;
+                new_io->iov_len = len;
+                new_io->next = io->next;
+                io->next = new_io;
+            } else {
+                io->iov_base = phys_addr;
+                io->iov_len = len;
+            }
+            prev_addr = page;
+            len -= l;
+            addr += l;
+            first = 0;
+        }
+    }
+    fprintf(stderr, "iommu\n");
+    bus_dump_aiocb(request);
+    bus_translate_north(s->bus, request, is_write);
+    return request;
+}
+
+static DMADriverAIOCB *
+iommu_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    // 1:1 mapping
+    return request;
+}
+
 void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr,
                            uint8_t *buf, int len, int is_write)
 {
@@ -311,7 +370,7 @@
     s->regs[IOMMU_CTRL] = IOMMU_VERSION;
 }
 
-void *iommu_init(target_phys_addr_t addr)
+void *iommu_init(target_phys_addr_t addr, qemu_bus *parent_bus, qemu_bus **bus)
 {
     IOMMUState *s;
     int iommu_io_memory;
@@ -321,9 +380,12 @@
         return NULL;
 
     s->addr = addr;
+    s->bus = parent_bus;
 
     iommu_io_memory = cpu_register_io_memory(0, iommu_mem_read, iommu_mem_write, s);
     cpu_register_physical_memory(addr, IOMMU_NREGS * 4, iommu_io_memory);
+    *bus = bus_init(32, iommu_bus_translate_north, s,
+                    iommu_bus_translate_south, s);
 
     register_savevm("iommu", addr, 2, iommu_save, iommu_load, s);
     qemu_register_reset(iommu_reset, s);
Index: qemu/hw/sparc32_dma.c
===================================================================
--- qemu.orig/hw/sparc32_dma.c	2007-10-29 16:59:37.000000000 +0000
+++ qemu/hw/sparc32_dma.c	2007-10-30 19:29:06.000000000 +0000
@@ -60,6 +60,7 @@
     qemu_irq irq;
     void *iommu;
     qemu_irq dev_reset;
+    qemu_bus *bus;
 };
 
 /* Note: on sparc, the lance 16 bit bus is swapped */
@@ -128,6 +129,27 @@
     }
 }
 
+static DMADriverAIOCB *
+espdma_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    DMAState *s = opaque;
+    qemu_iolist *io;
+
+    for (io = request->iolist; io != NULL; io = io->next)
+        io->iov_base = (target_phys_addr_t)s->dmaregs[1];
+    fprintf(stderr, "espdma\n");
+    bus_dump_aiocb(request);
+    bus_translate_north(s->bus, request, is_write);
+    return request;
+}
+
+static DMADriverAIOCB *
+espdma_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    // 1:1 mapping
+    return request;
+}
+
 void espdma_memory_read(void *opaque, uint8_t *buf, int len)
 {
     DMAState *s = opaque;
@@ -238,7 +260,8 @@
 }
 
 void *sparc32_dma_init(target_phys_addr_t daddr, qemu_irq parent_irq,
-                       void *iommu, qemu_irq **dev_irq, qemu_irq **reset)
+                       void *iommu, qemu_irq **dev_irq, qemu_irq **reset,
+                       qemu_bus *parent_bus, qemu_bus **bus)
 {
     DMAState *s;
     int dma_io_memory;
@@ -258,6 +281,9 @@
     *dev_irq = qemu_allocate_irqs(dma_set_irq, s, 1);
 
     *reset = &s->dev_reset;
+    s->bus = parent_bus;
+    *bus = bus_init(32, espdma_bus_translate_north, s,
+                    espdma_bus_translate_south, s);
 
     return s;
 }
Index: qemu/hw/esp.c
===================================================================
--- qemu.orig/hw/esp.c	2007-10-29 16:59:37.000000000 +0000
+++ qemu/hw/esp.c	2007-10-30 19:27:46.000000000 +0000
@@ -74,6 +74,7 @@
     uint8_t *async_buf;
     uint32_t async_len;
     void *dma_opaque;
+    qemu_bus *bus;
 };
 
 #define STAT_DO 0x00
@@ -144,15 +145,25 @@
     datalen = scsi_send_command(s->current_dev, 0, &buf[1], lun);
     s->ti_size = datalen;
     if (datalen != 0) {
+        DMADriverAIOCB *d;
+
         s->rregs[4] = STAT_IN | STAT_TC;
         s->dma_left = 0;
         s->dma_counter = 0;
         if (datalen > 0) {
+            d = bus_build_aiocb(0, datalen);
+            fprintf(stderr, "esp\n");
+            bus_dump_aiocb(d);
+            bus_translate_north(s->bus, d, 1);
             s->rregs[4] |= STAT_DI;
-            scsi_read_data(s->current_dev, 0);
+            scsi_read_data_aio(s->current_dev, 0, d);
         } else {
+            d = bus_build_aiocb(0, -datalen);
+            fprintf(stderr, "esp\n");
+            bus_dump_aiocb(d);
+            bus_translate_north(s->bus, d, 0);
             s->rregs[4] |= STAT_DO;
-            scsi_write_data(s->current_dev, 0);
+            scsi_write_data_aio(s->current_dev, 0, d);
         }
     }
     s->rregs[5] = INTR_BS | INTR_FC;
@@ -330,6 +341,22 @@
     }
 }
 
+static DMADriverAIOCB *
+esp_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    // 1:1 mapping?
+    fprintf(stderr, "esp\n");
+    bus_dump_aiocb(request);
+    return request;
+}
+
+static DMADriverAIOCB *
+esp_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write)
+{
+    // 1:1 mapping
+    return request;
+}
+
 static void esp_reset(void *opaque)
 {
     ESPState *s = opaque;
@@ -575,7 +602,8 @@
 }
 
 void *esp_init(BlockDriverState **bd, target_phys_addr_t espaddr,
-               void *dma_opaque, qemu_irq irq, qemu_irq *reset)
+               void *dma_opaque, qemu_irq irq, qemu_irq *reset,
+               qemu_bus *parent_bus, qemu_bus **bus)
 {
     ESPState *s;
     int esp_io_memory;
@@ -587,9 +615,11 @@
     s->bd = bd;
     s->irq = irq;
     s->dma_opaque = dma_opaque;
-
+    s->bus = parent_bus;
     esp_io_memory = cpu_register_io_memory(0, esp_mem_read, esp_mem_write, s);
     cpu_register_physical_memory(espaddr, ESP_SIZE, esp_io_memory);
+    *bus = bus_init(32, esp_bus_translate_north, s,
+                    esp_bus_translate_south, s);
 
     esp_reset(s);
 
Index: qemu/hw/scsi-disk.c
===================================================================
--- qemu.orig/hw/scsi-disk.c	2007-10-29 16:59:37.000000000 +0000
+++ qemu/hw/scsi-disk.c	2007-10-30 18:55:19.000000000 +0000
@@ -199,6 +199,16 @@
     r->sector_count -= n;
 }
 
+void scsi_read_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d)
+{
+    scsi_read_data(s, tag);
+}
+
+int scsi_write_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d)
+{
+    return scsi_write_data(s, tag);
+}
+
 static void scsi_write_complete(void * opaque, int ret)
 {
     SCSIRequest *r = (SCSIRequest *)opaque;

  reply	other threads:[~2007-10-30 20:09 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-27 12:56 [Qemu-devel] Faster, generic IO/DMA model with vectored AIO? Blue Swirl
2007-10-27 16:53 ` [Qemu-devel] " Blue Swirl
2007-10-28  1:29 ` [Qemu-devel] " Paul Brook
2007-10-28  9:09   ` Blue Swirl
2007-10-28 19:10     ` Jamie Lokier
2007-10-29 19:33       ` Blue Swirl
2007-10-30 20:09         ` Blue Swirl [this message]
2007-10-28 20:55     ` Blue Swirl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f43fc5580710301309p35d83da0r81f767bbcd1967df@mail.gmail.com \
    --to=blauwirbel@gmail.com \
    --cc=paul@codesourcery.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).