qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: qemu-devel@nongnu.org, Blue Swirl <blauwirbel@gmail.com>,
	Anthony Liguori <anthony@codemonkey.ws>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	liu ping fan <qemulist@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: [Qemu-devel] [RFC v1 4/7] pci: switch iommu to using the memory API
Date: Thu, 11 Oct 2012 15:27:00 +0200	[thread overview]
Message-ID: <1349962023-560-5-git-send-email-avi@redhat.com> (raw)
In-Reply-To: <1349962023-560-1-git-send-email-avi@redhat.com>

Instead of requesting a DMAContext from the bus implementation, use a
MemoryRegion.  This can be initialized using memory_region_init_iommu()
(or memory_region_init_alias() for simple, static translations).

Add a destructor, since setups that have per-device translations will
need to return a different iommu region for every device.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 hw/pci.c           | 59 +++++++++++++++++++++++++++++++++---------------------
 hw/pci.h           |  7 +++++--
 hw/pci_internals.h |  5 +++--
 hw/spapr.h         |  2 ++
 hw/spapr_iommu.c   | 35 ++++++++++++++------------------
 hw/spapr_pci.c     | 26 ++++++++++++++++++++----
 hw/spapr_pci.h     |  1 +
 7 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 7adf61b..02e1989 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -274,6 +274,21 @@ int pci_find_domain(const PCIBus *bus)
     return -1;
 }
 
+static MemoryRegion *pci_default_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    MemoryRegion *mr = g_new(MemoryRegion, 1);
+
+    /* FIXME: inherit memory region from bus creator */
+    memory_region_init_alias(mr, "iommu-nop", get_system_memory(), 0, INT64_MAX);
+    return mr;
+}
+
+static void pci_default_iommu_dtor(MemoryRegion *mr)
+{
+    memory_region_destroy(mr);
+    g_free(mr);
+}
+
 void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent,
                          const char *name,
                          MemoryRegion *address_space_mem,
@@ -285,6 +300,7 @@ void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent,
     bus->devfn_min = devfn_min;
     bus->address_space_mem = address_space_mem;
     bus->address_space_io = address_space_io;
+    pci_setup_iommu(bus, pci_default_iommu, pci_default_iommu_dtor, NULL);
 
     /* host bridge */
     QLIST_INIT(&bus->child);
@@ -775,21 +791,16 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                      PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name);
         return NULL;
     }
+
     pci_dev->bus = bus;
-    if (bus->dma_context_fn) {
-        pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
-    } else {
-        /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is
-         * taken unconditionally */
-        /* FIXME: inherit memory region from bus creator */
-        memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
-                                 get_system_memory(), 0,
-                                 memory_region_size(get_system_memory()));
-        memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-        address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
-        pci_dev->dma = g_new(DMAContext, 1);
-        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
-    }
+    pci_dev->iommu = bus->iommu_fn(bus, bus->iommu_opaque, devfn);
+    memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
+                             pci_dev->iommu, 0, memory_region_size(pci_dev->iommu));
+    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+    pci_dev->dma = g_new(DMAContext, 1);
+    dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
+
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
@@ -843,12 +854,12 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
-    if (!pci_dev->bus->dma_context_fn) {
-        address_space_destroy(&pci_dev->bus_master_as);
-        memory_region_destroy(&pci_dev->bus_master_enable_region);
-        g_free(pci_dev->dma);
-        pci_dev->dma = NULL;
-    }
+    address_space_destroy(&pci_dev->bus_master_as);
+    memory_region_del_subregion(&pci_dev->bus_master_enable_region, pci_dev->iommu);
+    pci_dev->bus->iommu_dtor_fn(pci_dev->iommu);
+    memory_region_destroy(&pci_dev->bus_master_enable_region);
+    g_free(pci_dev->dma);
+    pci_dev->dma = NULL;
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
@@ -2092,10 +2103,12 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
     k->props = pci_props;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor,
+                     void *opaque)
 {
-    bus->dma_context_fn = fn;
-    bus->dma_context_opaque = opaque;
+    bus->iommu_fn = fn;
+    bus->iommu_dtor_fn = dtor;
+    bus->iommu_opaque = opaque;
 }
 
 static TypeInfo pci_device_type_info = {
diff --git a/hw/pci.h b/hw/pci.h
index a65e490..370354a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -213,6 +213,7 @@ struct PCIDevice {
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
     MemoryRegion bus_master_enable_region;
+    MemoryRegion *iommu;
     DMAContext *dma;
 
     /* do not access the following fields */
@@ -351,9 +352,11 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
-typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+typedef MemoryRegion *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+typedef void (*PCIIOMMUDestructorFunc)(MemoryRegion *mr);
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor,
+                     void *opaque);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index c931b64..040508f 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -17,8 +17,9 @@
 
 struct PCIBus {
     BusState qbus;
-    PCIDMAContextFunc dma_context_fn;
-    void *dma_context_opaque;
+    PCIIOMMUFunc iommu_fn;
+    PCIIOMMUDestructorFunc iommu_dtor_fn;
+    void *iommu_opaque;
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
diff --git a/hw/spapr.h b/hw/spapr.h
index ac34a17..452efba1 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -334,6 +334,8 @@ typedef struct sPAPRTCE {
 #define SPAPR_PCI_BASE_LIOBN    0x80000000
 
 void spapr_iommu_init(void);
+IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr,
+                                  bool is_write);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 54798a3..79e35d1 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -63,15 +63,11 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
-static int spapr_tce_translate(DMAContext *dma,
-                               dma_addr_t addr,
-                               target_phys_addr_t *paddr,
-                               target_phys_addr_t *len,
-                               DMADirection dir)
+IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr,
+                                  bool is_write)
 {
     sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-    enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE)
-        ? SPAPR_TCE_WO : SPAPR_TCE_RO;
+    enum sPAPRTCEAccess access = is_write ? SPAPR_TCE_WO : SPAPR_TCE_RO;
     uint64_t tce;
 
 #ifdef DEBUG_TCE
@@ -84,29 +80,27 @@ static int spapr_tce_translate(DMAContext *dma,
 #ifdef DEBUG_TCE
         fprintf(stderr, "spapr_tce_translate out of bounds\n");
 #endif
-        return -EFAULT;
+        return (IOMMUTLBEntry) { .valid = false };
     }
 
     tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce;
 
     /* Check TCE */
     if (!(tce & access)) {
-        return -EPERM;
+        return (IOMMUTLBEntry) { .valid = false };
     }
 
-    /* How much til end of page ? */
-    *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1;
-
-    /* Translate */
-    *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) |
-        (addr & SPAPR_TCE_PAGE_MASK);
-
 #ifdef DEBUG_TCE
     fprintf(stderr, " ->  *paddr=0x" TARGET_FMT_plx ", *len=0x"
-            TARGET_FMT_plx "\n", *paddr, *len);
+            TARGET_FMT_plx "\n", (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1);
 #endif
 
-    return 0;
+    return (IOMMUTLBEntry) {
+        .device_addr = addr & SPAPR_TCE_PAGE_MASK,
+        .translated_addr = (tce & ~SPAPR_TCE_PAGE_MASK),
+        .addr_mask = SPAPR_TCE_PAGE_MASK,
+        .valid = true,
+    };
 }
 
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
@@ -118,7 +112,7 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
     }
 
     tcet = g_malloc0(sizeof(*tcet));
-    dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL);
+    dma_context_init(&tcet->dma, &address_space_memory, NULL, NULL, NULL);
 
     tcet->liobn = liobn;
     tcet->window_size = window_size;
@@ -253,7 +247,8 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
         return 0;
     }
 
-    if (iommu->translate == spapr_tce_translate) {
+    /* FIXME: WHAT?? */
+    if (true) {
         sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu);
         return spapr_dma_dt(fdt, node_off, propname,
                 tcet->liobn, 0, tcet->window_size);
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 661c05b..24f5b46 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -506,14 +506,30 @@ static void spapr_msi_write(void *opaque, target_phys_addr_t addr,
 /*
  * PHB PCI device
  */
-static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
-                                            int devfn)
+static MemoryRegion *spapr_pci_dma_iommu_new(PCIBus *bus, void *opaque, int devfn)
 {
     sPAPRPHBState *phb = opaque;
 
-    return phb->dma;
+    return &phb->iommu;
 }
 
+static void spapr_pci_dma_iommu_destroy(MemoryRegion *iommu)
+{
+    /* iommu is shared among devices, do nothing */
+}
+
+static IOMMUTLBEntry spapr_phb_translate(MemoryRegion *iommu, target_phys_addr_t addr,
+                                         bool is_write)
+{
+    sPAPRPHBState *phb = container_of(iommu, sPAPRPHBState, iommu);
+
+    return spapr_tce_translate(phb->dma, addr, is_write);
+}
+
+static MemoryRegionIOMMUOps spapr_iommu_ops = {
+    .translate = spapr_phb_translate,
+};
+
 static int spapr_phb_init(SysBusDevice *s)
 {
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
@@ -576,7 +592,9 @@ static int spapr_phb_init(SysBusDevice *s)
     sphb->dma_window_start = 0;
     sphb->dma_window_size = 0x40000000;
     sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, sphb->dma_window_size);
-    pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb);
+    memory_region_init_iommu(&sphb->iommu, &spapr_iommu_ops, get_system_memory(),
+                             "iommu-spapr", INT64_MAX);
+    pci_setup_iommu(bus, spapr_pci_dma_iommu_new, spapr_pci_dma_iommu_destroy, sphb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
 
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 670dc62..171db45 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -45,6 +45,7 @@ typedef struct sPAPRPHBState {
     target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size;
     target_phys_addr_t msi_win_addr;
     MemoryRegion memwindow, iowindow, msiwindow;
+    MemoryRegion iommu;
 
     uint32_t dma_liobn;
     uint64_t dma_window_start;
-- 
1.7.12

  parent reply	other threads:[~2012-10-11 13:27 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-11 13:26 [Qemu-devel] [RFC v1 0/7] IOMMU support Avi Kivity
2012-10-11 13:26 ` [Qemu-devel] [RFC v1 1/7] memory: fix address space initialization/destruction Avi Kivity
2012-10-11 13:31   ` Paolo Bonzini
2012-10-11 13:33     ` Avi Kivity
2012-10-13  9:14       ` Blue Swirl
2012-10-11 13:26 ` [Qemu-devel] [RFC v1 2/7] memory: limit sections in the radix tree to the actual address space size Avi Kivity
2012-10-11 13:26 ` [Qemu-devel] [RFC v1 3/7] memory: iommu support Avi Kivity
2012-10-11 13:42   ` Paolo Bonzini
2012-10-11 13:45     ` Avi Kivity
2012-10-11 13:54       ` Paolo Bonzini
2012-10-11 13:57         ` Avi Kivity
2012-10-12  2:51           ` Benjamin Herrenschmidt
2012-10-15 16:54             ` Avi Kivity
2012-10-12  2:45     ` Benjamin Herrenschmidt
2012-10-13  9:30       ` Blue Swirl
2012-10-13 11:37         ` Benjamin Herrenschmidt
2012-10-11 14:29   ` Avi Kivity
2012-10-11 13:27 ` Avi Kivity [this message]
2012-10-11 13:53   ` [Qemu-devel] [RFC v1 4/7] pci: switch iommu to using the memory API Paolo Bonzini
2012-10-11 13:56     ` Avi Kivity
2012-10-13  9:13   ` Blue Swirl
2012-10-15 10:31     ` Avi Kivity
2012-10-11 13:27 ` [Qemu-devel] [RFC v1 5/7] i440fx: add an iommu Avi Kivity
2012-10-11 13:27 ` [Qemu-devel] [RFC v1 6/7] vfio: abort if an emulated iommu is used Avi Kivity
2012-10-11 13:27 ` [Qemu-devel] [RFC v1 7/7] vhost: " Avi Kivity
2012-10-11 13:31   ` Michael S. Tsirkin
2012-10-11 13:34     ` Avi Kivity
2012-10-11 13:44       ` Michael S. Tsirkin
2012-10-11 13:44         ` Avi Kivity
2012-10-11 14:35           ` Michael S. Tsirkin
2012-10-11 14:35             ` Avi Kivity
2012-10-11 15:34               ` Michael S. Tsirkin
2012-10-11 15:48                 ` Avi Kivity
2012-10-11 19:38                   ` Alex Williamson
2012-10-15 10:24                     ` Avi Kivity
2012-10-15  8:44                   ` liu ping fan
2012-10-15 10:32                     ` Avi Kivity
2012-10-12  2:36 ` [Qemu-devel] [RFC v1 0/7] IOMMU support Benjamin Herrenschmidt
2012-10-15 10:45   ` Avi Kivity
2012-10-15 19:52     ` Benjamin Herrenschmidt
2012-10-16  9:30       ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1349962023-560-5-git-send-email-avi@redhat.com \
    --to=avi@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=blauwirbel@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemulist@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).