* [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support
2010-08-04 22:32 [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Eduard - Gabriel Munteanu
@ 2010-08-04 22:32 ` Eduard - Gabriel Munteanu
2010-08-05 21:23 ` Blue Swirl
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation Eduard - Gabriel Munteanu
` (3 subsequent siblings)
4 siblings, 1 reply; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-04 22:32 UTC (permalink / raw)
To: joro; +Cc: kvm, qemu-devel, avi, Eduard - Gabriel Munteanu, paul
PCI devices should access memory through pci_memory_*() instead of
cpu_physical_memory_*(). This also provides support for translation and
access checking in case an IOMMU is emulated.
Memory maps are treated as remote IOTLBs (that is, translation caches
belonging to the IOMMU-aware device itself). Clients (devices) must
provide callbacks for map invalidation in case these maps are
persistent beyond the current I/O context, e.g. AIO DMA transfers.
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
hw/pci.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/pci.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-common.h | 1 +
3 files changed, 276 insertions(+), 0 deletions(-)
diff --git a/hw/pci.c b/hw/pci.c
index 6871728..ce2734b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -58,6 +58,10 @@ struct PCIBus {
Keep a count of the number of devices with raised IRQs. */
int nirq;
int *irq_count;
+
+#ifdef CONFIG_PCI_IOMMU
+ PCIIOMMU *iommu;
+#endif
};
static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
@@ -2029,6 +2033,147 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
}
}
+#ifdef CONFIG_PCI_IOMMU
+
+void pci_register_iommu(PCIDevice *dev, PCIIOMMU *iommu)
+{
+ dev->bus->iommu = iommu;
+}
+
+void pci_memory_rw(PCIDevice *dev,
+ pci_addr_t addr,
+ uint8_t *buf,
+ pci_addr_t len,
+ int is_write)
+{
+ int err, plen;
+ unsigned perms;
+ PCIIOMMU *iommu = dev->bus->iommu;
+ target_phys_addr_t paddr;
+
+ if (!iommu || !iommu->translate)
+ return cpu_physical_memory_rw(addr, buf, len, is_write);
+
+ perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
+
+ while (len) {
+ err = iommu->translate(iommu, dev, addr, &paddr, &plen, perms);
+ if (err)
+ return;
+
+ /* The translation might be valid for larger regions. */
+ if (plen > len)
+ plen = len;
+
+ cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+ len -= plen;
+ addr += plen;
+ buf += plen;
+ }
+}
+
+void *pci_memory_map(PCIDevice *dev,
+ PCIInvalidateIOTLBFunc *cb,
+ void *opaque,
+ pci_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write)
+{
+ int err, plen;
+ unsigned perms;
+ PCIIOMMU *iommu = dev->bus->iommu;
+ target_phys_addr_t paddr;
+
+ if (!iommu || !iommu->translate)
+ return cpu_physical_memory_map(addr, len, is_write);
+
+ perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
+
+ plen = *len;
+ err = iommu->translate(iommu, dev, addr, &paddr, &plen, perms);
+ if (err)
+ return NULL;
+
+ /*
+ * If this is true, the virtual region is contiguous,
+ * but the translated physical region isn't. We just
+ * clamp *len, much like cpu_physical_memory_map() does.
+ */
+ if (plen < *len)
+ *len = plen;
+
+ /* We treat maps as remote TLBs to cope with stuff like AIO. */
+ if (cb && iommu->register_iotlb_invalidator)
+ iommu->register_iotlb_invalidator(iommu, dev, addr, cb, opaque);
+
+ return cpu_physical_memory_map(paddr, len, is_write);
+}
+
+void pci_memory_unmap(PCIDevice *dev,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len)
+{
+ cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+}
+
+#define DEFINE_PCI_LD(suffix, size) \
+uint##size##_t pci_ld##suffix(PCIDevice *dev, pci_addr_t addr) \
+{ \
+ PCIIOMMU *iommu = dev->bus->iommu; \
+ target_phys_addr_t paddr; \
+ int plen, err; \
+ \
+ if (!iommu || !iommu->translate) \
+ return ld##suffix##_phys(addr); \
+ \
+ err = iommu->translate(iommu, dev, \
+ addr, &paddr, &plen, IOMMU_PERM_READ); \
+ if (err || (plen < size / 8)) \
+ return 0; \
+ \
+ return ld##suffix##_phys(paddr); \
+}
+
+#define DEFINE_PCI_ST(suffix, size) \
+void pci_st##suffix(PCIDevice *dev, pci_addr_t addr, uint##size##_t val) \
+{ \
+ PCIIOMMU *iommu = dev->bus->iommu; \
+ target_phys_addr_t paddr; \
+ int plen, err; \
+ \
+ if (!iommu || !iommu->translate) { \
+ st##suffix##_phys(addr, val); \
+ return; \
+ } \
+ \
+ err = iommu->translate(iommu, dev, \
+ addr, &paddr, &plen, IOMMU_PERM_WRITE); \
+ if (err || (plen < size / 8)) \
+ return; \
+ \
+ st##suffix##_phys(paddr, val); \
+}
+
+#else /* !defined(CONFIG_PCI_IOMMU) */
+
+#define DEFINE_PCI_LD(suffix, size)
+#define DEFINE_PCI_ST(suffix, size)
+
+#endif /* CONFIG_PCI_IOMMU */
+
+DEFINE_PCI_LD(ub, 8)
+DEFINE_PCI_LD(uw, 16)
+DEFINE_PCI_LD(l, 32)
+DEFINE_PCI_LD(q, 64)
+
+DEFINE_PCI_ST(b, 8)
+DEFINE_PCI_ST(w, 16)
+DEFINE_PCI_ST(l, 32)
+DEFINE_PCI_ST(q, 64)
+
static PCIDeviceInfo bridge_info = {
.qdev.name = "pci-bridge",
.qdev.size = sizeof(PCIBridge),
diff --git a/hw/pci.h b/hw/pci.h
index 4bd8a1a..bd8c21b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -430,4 +430,134 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
return !(last2 < first1 || last1 < first2);
}
+/*
+ * Memory I/O and PCI IOMMU definitions.
+ */
+
+typedef target_phys_addr_t pci_addr_t;
+
+typedef int PCIInvalidateIOTLBFunc(void *opaque);
+
+#ifndef CONFIG_PCI_IOMMU
+
+static inline void pci_memory_rw(PCIDevice *dev,
+ pci_addr_t addr,
+ uint8_t *buf,
+ pci_addr_t len,
+ int is_write)
+{
+ cpu_physical_memory_rw(addr, buf, len, is_write);
+}
+
+static inline void *pci_memory_map(PCIDevice *dev,
+ PCIInvalidateIOTLBFunc *cb,
+ void *opaque,
+ pci_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write)
+{
+ return cpu_physical_memory_map(addr, plen, is_write);
+}
+
+static inline void pci_memory_unmap(PCIDevice *dev,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len)
+{
+ cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+}
+
+#define DECLARE_PCI_LD(suffix, size) \
+static inline uint##size##_t pci_ld##suffix(PCIDevice *dev, \
+ pci_addr_t addr) \
+{ \
+ return ld##suffix##_phys(addr); \
+}
+
+#define DECLARE_PCI_ST(suffix, size) \
+static inline void pci_st##suffix(PCIDevice *dev, \
+ pci_addr_t addr, \
+ uint##size##_t val) \
+{ \
+ st##suffix##_phys(addr, val); \
+}
+
+#else /* defined(CONFIG_PCI_IOMMU) */
+
+struct PCIIOMMU {
+ void *opaque;
+
+ void (*register_iotlb_invalidator)(PCIIOMMU *iommu,
+ PCIDevice *dev,
+ pci_addr_t addr,
+ PCIInvalidateIOTLBFunc *cb,
+ void *opaque);
+ int (*translate)(PCIIOMMU *iommu,
+ PCIDevice *dev,
+ pci_addr_t addr,
+ target_phys_addr_t *paddr,
+ int *len,
+ unsigned perms);
+};
+
+#define IOMMU_PERM_READ (1 << 0)
+#define IOMMU_PERM_WRITE (1 << 1)
+#define IOMMU_PERM_RW (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
+
+extern void pci_memory_rw(PCIDevice *dev,
+ pci_addr_t addr,
+ uint8_t *buf,
+ pci_addr_t len,
+ int is_write);
+extern void *pci_memory_map(PCIDevice *dev,
+ PCIInvalidateIOTLBFunc *cb,
+ void *opaque,
+ pci_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write);
+extern void pci_memory_unmap(PCIDevice *dev,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len);
+extern void pci_register_iommu(PCIDevice *dev,
+ PCIIOMMU *iommu);
+
+#define DECLARE_PCI_LD(suffix, size) \
+extern uint##size##_t pci_ld##suffix(PCIDevice *dev, pci_addr_t addr);
+
+#define DECLARE_PCI_ST(suffix, size) \
+extern void pci_st##suffix(PCIDevice *dev, \
+ pci_addr_t addr, \
+ uint##size##_t val);
+
+#endif /* CONFIG_PCI_IOMMU */
+
+static inline void pci_memory_read(PCIDevice *dev,
+ pci_addr_t addr,
+ uint8_t *buf,
+ pci_addr_t len)
+{
+ pci_memory_rw(dev, addr, buf, len, 0);
+}
+
+static inline void pci_memory_write(PCIDevice *dev,
+ pci_addr_t addr,
+ const uint8_t *buf,
+ pci_addr_t len)
+{
+ pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
+}
+
+DECLARE_PCI_LD(ub, 8)
+DECLARE_PCI_LD(uw, 16)
+DECLARE_PCI_LD(l, 32)
+DECLARE_PCI_LD(q, 64)
+
+DECLARE_PCI_ST(b, 8)
+DECLARE_PCI_ST(w, 16)
+DECLARE_PCI_ST(l, 32)
+DECLARE_PCI_ST(q, 64)
+
#endif
diff --git a/qemu-common.h b/qemu-common.h
index 3fb2f0b..8daf962 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -219,6 +219,7 @@ typedef struct PCIHostState PCIHostState;
typedef struct PCIExpressHost PCIExpressHost;
typedef struct PCIBus PCIBus;
typedef struct PCIDevice PCIDevice;
+typedef struct PCIIOMMU PCIIOMMU;
typedef struct SerialState SerialState;
typedef struct IRQState *qemu_irq;
typedef struct PCMCIACardState PCMCIACardState;
--
1.7.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support Eduard - Gabriel Munteanu
@ 2010-08-05 21:23 ` Blue Swirl
2010-08-06 0:21 ` Eduard - Gabriel Munteanu
0 siblings, 1 reply; 15+ messages in thread
From: Blue Swirl @ 2010-08-05 21:23 UTC (permalink / raw)
To: Eduard - Gabriel Munteanu; +Cc: joro, paul, qemu-devel, kvm, avi
On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu
<eduard.munteanu@linux360.ro> wrote:
> PCI devices should access memory through pci_memory_*() instead of
> cpu_physical_memory_*(). This also provides support for translation and
> access checking in case an IOMMU is emulated.
>
> Memory maps are treated as remote IOTLBs (that is, translation caches
> belonging to the IOMMU-aware device itself). Clients (devices) must
> provide callbacks for map invalidation in case these maps are
> persistent beyond the current I/O context, e.g. AIO DMA transfers.
>
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
> hw/pci.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/pci.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++
> qemu-common.h | 1 +
> 3 files changed, 276 insertions(+), 0 deletions(-)
>
> diff --git a/hw/pci.c b/hw/pci.c
> index 6871728..ce2734b 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -58,6 +58,10 @@ struct PCIBus {
> Keep a count of the number of devices with raised IRQs. */
> int nirq;
> int *irq_count;
> +
> +#ifdef CONFIG_PCI_IOMMU
The code should not be conditional.
> + PCIIOMMU *iommu;
> +#endif
> };
>
> static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
> @@ -2029,6 +2033,147 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
> }
> }
>
> +#ifdef CONFIG_PCI_IOMMU
> +
> +void pci_register_iommu(PCIDevice *dev, PCIIOMMU *iommu)
> +{
> + dev->bus->iommu = iommu;
> +}
> +
> +void pci_memory_rw(PCIDevice *dev,
> + pci_addr_t addr,
> + uint8_t *buf,
> + pci_addr_t len,
> + int is_write)
> +{
> + int err, plen;
> + unsigned perms;
> + PCIIOMMU *iommu = dev->bus->iommu;
> + target_phys_addr_t paddr;
> +
> + if (!iommu || !iommu->translate)
> + return cpu_physical_memory_rw(addr, buf, len, is_write);
Instead of these kind of checks, please add default handlers which
call cpu_physical_memory_rw() etc.
> +
> + perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
Is this useful? How about just passing is_write as perms?
> +
> + while (len) {
> + err = iommu->translate(iommu, dev, addr, &paddr, &plen, perms);
> + if (err)
> + return;
> +
> + /* The translation might be valid for larger regions. */
> + if (plen > len)
> + plen = len;
> +
> + cpu_physical_memory_rw(paddr, buf, plen, is_write);
> +
> + len -= plen;
> + addr += plen;
> + buf += plen;
> + }
> +}
> +
> +void *pci_memory_map(PCIDevice *dev,
> + PCIInvalidateIOTLBFunc *cb,
> + void *opaque,
> + pci_addr_t addr,
> + target_phys_addr_t *len,
> + int is_write)
> +{
> + int err, plen;
> + unsigned perms;
> + PCIIOMMU *iommu = dev->bus->iommu;
> + target_phys_addr_t paddr;
> +
> + if (!iommu || !iommu->translate)
> + return cpu_physical_memory_map(addr, len, is_write);
> +
> + perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
> +
> + plen = *len;
> + err = iommu->translate(iommu, dev, addr, &paddr, &plen, perms);
> + if (err)
> + return NULL;
> +
> + /*
> + * If this is true, the virtual region is contiguous,
> + * but the translated physical region isn't. We just
> + * clamp *len, much like cpu_physical_memory_map() does.
> + */
> + if (plen < *len)
> + *len = plen;
> +
> + /* We treat maps as remote TLBs to cope with stuff like AIO. */
> + if (cb && iommu->register_iotlb_invalidator)
> + iommu->register_iotlb_invalidator(iommu, dev, addr, cb, opaque);
> +
> + return cpu_physical_memory_map(paddr, len, is_write);
> +}
> +
> +void pci_memory_unmap(PCIDevice *dev,
> + void *buffer,
> + target_phys_addr_t len,
> + int is_write,
> + target_phys_addr_t access_len)
> +{
> + cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> +}
> +
> +#define DEFINE_PCI_LD(suffix, size) \
> +uint##size##_t pci_ld##suffix(PCIDevice *dev, pci_addr_t addr) \
> +{ \
> + PCIIOMMU *iommu = dev->bus->iommu; \
> + target_phys_addr_t paddr; \
> + int plen, err; \
> + \
> + if (!iommu || !iommu->translate) \
> + return ld##suffix##_phys(addr); \
> + \
> + err = iommu->translate(iommu, dev, \
> + addr, &paddr, &plen, IOMMU_PERM_READ); \
> + if (err || (plen < size / 8)) \
> + return 0; \
> + \
> + return ld##suffix##_phys(paddr); \
> +}
> +
> +#define DEFINE_PCI_ST(suffix, size) \
> +void pci_st##suffix(PCIDevice *dev, pci_addr_t addr, uint##size##_t val) \
> +{ \
> + PCIIOMMU *iommu = dev->bus->iommu; \
> + target_phys_addr_t paddr; \
> + int plen, err; \
> + \
> + if (!iommu || !iommu->translate) { \
> + st##suffix##_phys(addr, val); \
> + return; \
> + } \
> + \
> + err = iommu->translate(iommu, dev, \
> + addr, &paddr, &plen, IOMMU_PERM_WRITE); \
> + if (err || (plen < size / 8)) \
> + return; \
> + \
> + st##suffix##_phys(paddr, val); \
> +}
> +
> +#else /* !defined(CONFIG_PCI_IOMMU) */
> +
> +#define DEFINE_PCI_LD(suffix, size)
> +#define DEFINE_PCI_ST(suffix, size)
> +
> +#endif /* CONFIG_PCI_IOMMU */
> +
> +DEFINE_PCI_LD(ub, 8)
> +DEFINE_PCI_LD(uw, 16)
> +DEFINE_PCI_LD(l, 32)
> +DEFINE_PCI_LD(q, 64)
> +
> +DEFINE_PCI_ST(b, 8)
> +DEFINE_PCI_ST(w, 16)
> +DEFINE_PCI_ST(l, 32)
> +DEFINE_PCI_ST(q, 64)
> +
> static PCIDeviceInfo bridge_info = {
> .qdev.name = "pci-bridge",
> .qdev.size = sizeof(PCIBridge),
> diff --git a/hw/pci.h b/hw/pci.h
> index 4bd8a1a..bd8c21b 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -430,4 +430,134 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
> return !(last2 < first1 || last1 < first2);
> }
>
> +/*
> + * Memory I/O and PCI IOMMU definitions.
> + */
> +
> +typedef target_phys_addr_t pci_addr_t;
There is already pcibus_t.
> +
> +typedef int PCIInvalidateIOTLBFunc(void *opaque);
I think some type safety tricks could be used with for example PCIDevice *.
> +
> +#ifndef CONFIG_PCI_IOMMU
> +
> +static inline void pci_memory_rw(PCIDevice *dev,
> + pci_addr_t addr,
> + uint8_t *buf,
> + pci_addr_t len,
> + int is_write)
> +{
> + cpu_physical_memory_rw(addr, buf, len, is_write);
> +}
> +
> +static inline void *pci_memory_map(PCIDevice *dev,
> + PCIInvalidateIOTLBFunc *cb,
> + void *opaque,
> + pci_addr_t addr,
> + target_phys_addr_t *len,
> + int is_write)
> +{
> + return cpu_physical_memory_map(addr, plen, is_write);
> +}
> +
> +static inline void pci_memory_unmap(PCIDevice *dev,
> + void *buffer,
> + target_phys_addr_t len,
> + int is_write,
> + target_phys_addr_t access_len)
> +{
> + cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> +}
> +
> +#define DECLARE_PCI_LD(suffix, size) \
> +static inline uint##size##_t pci_ld##suffix(PCIDevice *dev, \
> + pci_addr_t addr) \
> +{ \
> + return ld##suffix##_phys(addr); \
> +}
> +
> +#define DECLARE_PCI_ST(suffix, size) \
> +static inline void pci_st##suffix(PCIDevice *dev, \
> + pci_addr_t addr, \
> + uint##size##_t val) \
> +{ \
> + st##suffix##_phys(addr, val); \
> +}
> +
> +#else /* defined(CONFIG_PCI_IOMMU) */
> +
> +struct PCIIOMMU {
> + void *opaque;
> +
> + void (*register_iotlb_invalidator)(PCIIOMMU *iommu,
> + PCIDevice *dev,
> + pci_addr_t addr,
> + PCIInvalidateIOTLBFunc *cb,
> + void *opaque);
> + int (*translate)(PCIIOMMU *iommu,
> + PCIDevice *dev,
> + pci_addr_t addr,
> + target_phys_addr_t *paddr,
> + int *len,
> + unsigned perms);
> +};
> +
> +#define IOMMU_PERM_READ (1 << 0)
> +#define IOMMU_PERM_WRITE (1 << 1)
> +#define IOMMU_PERM_RW (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
> +
> +extern void pci_memory_rw(PCIDevice *dev,
> + pci_addr_t addr,
> + uint8_t *buf,
> + pci_addr_t len,
> + int is_write);
> +extern void *pci_memory_map(PCIDevice *dev,
> + PCIInvalidateIOTLBFunc *cb,
> + void *opaque,
> + pci_addr_t addr,
> + target_phys_addr_t *len,
> + int is_write);
> +extern void pci_memory_unmap(PCIDevice *dev,
> + void *buffer,
> + target_phys_addr_t len,
> + int is_write,
> + target_phys_addr_t access_len);
> +extern void pci_register_iommu(PCIDevice *dev,
> + PCIIOMMU *iommu);
> +
> +#define DECLARE_PCI_LD(suffix, size) \
> +extern uint##size##_t pci_ld##suffix(PCIDevice *dev, pci_addr_t addr);
> +
> +#define DECLARE_PCI_ST(suffix, size) \
> +extern void pci_st##suffix(PCIDevice *dev, \
> + pci_addr_t addr, \
> + uint##size##_t val);
> +
> +#endif /* CONFIG_PCI_IOMMU */
> +
> +static inline void pci_memory_read(PCIDevice *dev,
> + pci_addr_t addr,
> + uint8_t *buf,
> + pci_addr_t len)
> +{
> + pci_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void pci_memory_write(PCIDevice *dev,
> + pci_addr_t addr,
> + const uint8_t *buf,
> + pci_addr_t len)
> +{
> + pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
> +}
> +
> +DECLARE_PCI_LD(ub, 8)
> +DECLARE_PCI_LD(uw, 16)
> +DECLARE_PCI_LD(l, 32)
> +DECLARE_PCI_LD(q, 64)
> +
> +DECLARE_PCI_ST(b, 8)
> +DECLARE_PCI_ST(w, 16)
> +DECLARE_PCI_ST(l, 32)
> +DECLARE_PCI_ST(q, 64)
> +
> #endif
> diff --git a/qemu-common.h b/qemu-common.h
> index 3fb2f0b..8daf962 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -219,6 +219,7 @@ typedef struct PCIHostState PCIHostState;
> typedef struct PCIExpressHost PCIExpressHost;
> typedef struct PCIBus PCIBus;
> typedef struct PCIDevice PCIDevice;
> +typedef struct PCIIOMMU PCIIOMMU;
> typedef struct SerialState SerialState;
> typedef struct IRQState *qemu_irq;
> typedef struct PCMCIACardState PCMCIACardState;
> --
> 1.7.1
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support
2010-08-05 21:23 ` Blue Swirl
@ 2010-08-06 0:21 ` Eduard - Gabriel Munteanu
0 siblings, 0 replies; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-06 0:21 UTC (permalink / raw)
To: Blue Swirl; +Cc: joro, paul, qemu-devel, kvm, avi
On Thu, Aug 05, 2010 at 09:23:30PM +0000, Blue Swirl wrote:
> On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu
[snip]
> > @@ -58,6 +58,10 @@ struct PCIBus {
> > ?? ?? ?? ??Keep a count of the number of devices with raised IRQs. ??*/
> > ?? ?? int nirq;
> > ?? ?? int *irq_count;
> > +
> > +#ifdef CONFIG_PCI_IOMMU
>
> The code should not be conditional.
>
> > + ?? ??PCIIOMMU *iommu;
> > +#endif
> > ??};
> >
> > ??static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
> > @@ -2029,6 +2033,147 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
> > ?? ?? }
> > ??}
> >
> > +#ifdef CONFIG_PCI_IOMMU
> > +
> > +void pci_register_iommu(PCIDevice *dev, PCIIOMMU *iommu)
> > +{
> > + ?? ??dev->bus->iommu = iommu;
> > +}
> > +
> > +void pci_memory_rw(PCIDevice *dev,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? pci_addr_t addr,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? uint8_t *buf,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? pci_addr_t len,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? int is_write)
> > +{
> > + ?? ??int err, plen;
> > + ?? ??unsigned perms;
> > + ?? ??PCIIOMMU *iommu = dev->bus->iommu;
> > + ?? ??target_phys_addr_t paddr;
> > +
> > + ?? ??if (!iommu || !iommu->translate)
> > + ?? ?? ?? ??return cpu_physical_memory_rw(addr, buf, len, is_write);
>
> Instead of these kind of checks, please add default handlers which
> call cpu_physical_memory_rw() etc.
>
Ok. I'm trying to minimize impact (non-inlineable function calls) when
the IOMMU is disabled at compile-time. I think I can do it some other
way, as you suggest.
> > +
> > + ?? ??perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
>
> Is this useful? How about just passing is_write as perms?
>
Only in theory: it might come in handy if we ever support RW operations,
like read-modify-write memory maps. Also, write permissions include
zero-byte reads for the AMD IOMMU, so IOMMU_PERM_* could be further
refined.
I'm happy to remove it, though.
[snip]
> > +/*
> > + * Memory I/O and PCI IOMMU definitions.
> > + */
> > +
> > +typedef target_phys_addr_t pci_addr_t;
>
> There is already pcibus_t.
>
Thanks, I'll use that.
> > +
> > +typedef int PCIInvalidateIOTLBFunc(void *opaque);
>
> I think some type safety tricks could be used with for example PCIDevice *.
>
Note that 'opaque' belongs to the caller (the code that requests
memory maps).
Some device might make multiple maps that can be invalidated separately.
The actual stuff that describes the map might not be straightforward to
recover from a PCIDevice.
We could add another parameter to PCIInvalidateIOTLBFunc(), but since
the main user is DMA code, it's going to complicate things further.
[snip]
Eduard
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation
2010-08-04 22:32 [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Eduard - Gabriel Munteanu
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support Eduard - Gabriel Munteanu
@ 2010-08-04 22:32 ` Eduard - Gabriel Munteanu
2010-08-05 21:31 ` Blue Swirl
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 3/4] ide: use the PCI memory access interface Eduard - Gabriel Munteanu
` (2 subsequent siblings)
4 siblings, 1 reply; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-04 22:32 UTC (permalink / raw)
To: joro; +Cc: kvm, qemu-devel, avi, Eduard - Gabriel Munteanu, paul
This introduces emulation for the AMD IOMMU, described in "AMD I/O
Virtualization Technology (IOMMU) Specification".
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
Makefile.target | 2 +
configure | 10 +
hw/amd_iommu.c | 671 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/pc.c | 4 +
hw/pc.h | 3 +
hw/pci_ids.h | 2 +
hw/pci_regs.h | 1 +
7 files changed, 693 insertions(+), 0 deletions(-)
create mode 100644 hw/amd_iommu.c
diff --git a/Makefile.target b/Makefile.target
index 70a9c1b..86226a0 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o
obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o
+obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o
+
# Hardware support
obj-ia64-y += ide.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
obj-ia64-y += fdc.o mc146818rtc.o serial.o i8259.o ipf.o
diff --git a/configure b/configure
index af50607..7448603 100755
--- a/configure
+++ b/configure
@@ -317,6 +317,7 @@ io_thread="no"
mixemu="no"
kvm_cap_pit=""
kvm_cap_device_assignment=""
+amd_iommu="no"
kerneldir=""
aix="no"
blobs="yes"
@@ -629,6 +630,8 @@ for opt do
;;
--enable-kvm-device-assignment) kvm_cap_device_assignment="yes"
;;
+ --enable-amd-iommu-emul) amd_iommu="yes"
+ ;;
--enable-profiler) profiler="yes"
;;
--enable-cocoa)
@@ -871,6 +874,8 @@ echo " --disable-kvm-pit disable KVM pit support"
echo " --enable-kvm-pit enable KVM pit support"
echo " --disable-kvm-device-assignment disable KVM device assignment support"
echo " --enable-kvm-device-assignment enable KVM device assignment support"
+echo " --disable-amd-iommu-emul disable AMD IOMMU emulation"
+echo " --enable-amd-iommu-emul enable AMD IOMMU emulation"
echo " --disable-nptl disable usermode NPTL support"
echo " --enable-nptl enable usermode NPTL support"
echo " --enable-system enable all system emulation targets"
@@ -2251,6 +2256,7 @@ echo "Install blobs $blobs"
echo "KVM support $kvm"
echo "KVM PIT support $kvm_cap_pit"
echo "KVM device assig. $kvm_cap_device_assignment"
+echo "AMD IOMMU emul. $amd_iommu"
echo "fdt support $fdt"
echo "preadv support $preadv"
echo "fdatasync $fdatasync"
@@ -2645,6 +2651,10 @@ case "$target_arch2" in
x86_64)
TARGET_BASE_ARCH=i386
target_phys_bits=64
+ if test "$amd_iommu" = "yes"; then
+ echo "CONFIG_AMD_IOMMU=y" >> $config_target_mak
+ echo "CONFIG_PCI_IOMMU=y" >> $config_host_mak
+ fi
;;
ia64)
target_phys_bits=64
diff --git a/hw/amd_iommu.c b/hw/amd_iommu.c
new file mode 100644
index 0000000..ff9903e
--- /dev/null
+++ b/hw/amd_iommu.c
@@ -0,0 +1,671 @@
+/*
+ * AMD IOMMU emulation
+ *
+ * Copyright (c) 2010 Eduard - Gabriel Munteanu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "pc.h"
+#include "hw.h"
+#include "pci.h"
+#include "qlist.h"
+
+/* Capability registers */
+#define CAPAB_HEADER 0x00
+#define CAPAB_REV_TYPE 0x02
+#define CAPAB_FLAGS 0x03
+#define CAPAB_BAR_LOW 0x04
+#define CAPAB_BAR_HIGH 0x08
+#define CAPAB_RANGE 0x0C
+#define CAPAB_MISC 0x10
+
+#define CAPAB_SIZE 0x14
+
+/* Capability header data */
+#define CAPAB_FLAG_IOTLBSUP (1 << 0)
+#define CAPAB_FLAG_HTTUNNEL (1 << 1)
+#define CAPAB_FLAG_NPCACHE (1 << 2)
+#define CAPAB_INIT_REV (1 << 3)
+#define CAPAB_INIT_TYPE 3
+#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE)
+#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | CAPAB_FLAG_HTTUNNEL)
+#define CAPAB_INIT_MISC (64 << 15) | (48 << 8)
+#define CAPAB_BAR_MASK ~((1UL << 14) - 1)
+
+/* MMIO registers */
+#define MMIO_DEVICE_TABLE 0x0000
+#define MMIO_COMMAND_BASE 0x0008
+#define MMIO_EVENT_BASE 0x0010
+#define MMIO_CONTROL 0x0018
+#define MMIO_EXCL_BASE 0x0020
+#define MMIO_EXCL_LIMIT 0x0028
+#define MMIO_COMMAND_HEAD 0x2000
+#define MMIO_COMMAND_TAIL 0x2008
+#define MMIO_EVENT_HEAD 0x2010
+#define MMIO_EVENT_TAIL 0x2018
+#define MMIO_STATUS 0x2020
+
+#define MMIO_SIZE 0x4000
+
+#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1)
+#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~MMIO_DEVTAB_SIZE_MASK)
+#define MMIO_DEVTAB_ENTRY_SIZE 32
+#define MMIO_DEVTAB_SIZE_UNIT 4096
+
+#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7)
+#define MMIO_CMDBUF_SIZE_MASK 0x0F
+#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK
+#define MMIO_CMDBUF_DEFAULT_SIZE 8
+#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F)
+#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK
+
+#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7)
+#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK
+#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK
+#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE
+#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F)
+#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK
+
+#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK
+#define MMIO_EXCL_ENABLED_MASK (1ULL << 0)
+#define MMIO_EXCL_ALLOW_MASK (1ULL << 1)
+#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK
+#define MMIO_EXCL_LIMIT_LOW 0xFFF
+
+#define MMIO_CONTROL_IOMMUEN (1ULL << 0)
+#define MMIO_CONTROL_HTTUNEN (1ULL << 1)
+#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2)
+#define MMIO_CONTROL_EVENTINTEN (1ULL << 3)
+#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4)
+#define MMIO_CONTROL_CMDBUFEN (1ULL << 12)
+
+#define MMIO_STATUS_EVTLOG_OF (1ULL << 0)
+#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1)
+#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2)
+#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3)
+#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4)
+
+#define CMDBUF_ID_BYTE 0x07
+#define CMDBUF_ID_RSHIFT 4
+#define CMDBUF_ENTRY_SIZE 0x10
+
+#define CMD_COMPLETION_WAIT 0x01
+#define CMD_INVAL_DEVTAB_ENTRY 0x02
+#define CMD_INVAL_IOMMU_PAGES 0x03
+#define CMD_INVAL_IOTLB_PAGES 0x04
+#define CMD_INVAL_INTR_TABLE 0x05
+
+#define DEVTAB_ENTRY_SIZE 32
+
+/* Device table entry bits 0:63 */
+#define DEV_VALID (1ULL << 0)
+#define DEV_TRANSLATION_VALID (1ULL << 1)
+#define DEV_MODE_MASK 0x7
+#define DEV_MODE_RSHIFT 9
+#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000
+#define DEV_PT_ROOT_RSHIFT 12
+#define DEV_PERM_SHIFT 61
+#define DEV_PERM_READ (1ULL << 61)
+#define DEV_PERM_WRITE (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define DEV_DOMAIN_ID_MASK ((1ULL << 16) - 1)
+#define DEV_IOTLB_SUPPORT (1ULL << 17)
+#define DEV_SUPPRESS_PF (1ULL << 18)
+#define DEV_SUPPRESS_ALL_PF (1ULL << 19)
+#define DEV_IOCTL_MASK ~3
+#define DEV_IOCTL_RSHIFT 20
+#define DEV_IOCTL_DENY 0
+#define DEV_IOCTL_PASSTHROUGH 1
+#define DEV_IOCTL_TRANSLATE 2
+#define DEV_CACHE (1ULL << 37)
+#define DEV_SNOOP_DISABLE (1ULL << 38)
+#define DEV_EXCL (1ULL << 39)
+
+struct amd_iommu_invalidator {
+ int devfn;
+ PCIInvalidateIOTLBFunc *func;
+ void *opaque;
+ QLIST_ENTRY(amd_iommu_invalidator) list;
+};
+
+struct amd_iommu_state {
+ PCIDevice dev;
+
+ int capab_offset;
+ unsigned char *capab;
+
+ int mmio_index;
+ target_phys_addr_t mmio_addr;
+ unsigned char *mmio_buf;
+ int mmio_enabled;
+
+ int enabled;
+ int ats_enabled;
+
+ target_phys_addr_t devtab;
+ size_t devtab_len;
+
+ target_phys_addr_t cmdbuf;
+ int cmdbuf_enabled;
+ size_t cmdbuf_len;
+ size_t cmdbuf_head;
+ size_t cmdbuf_tail;
+ int completion_wait_intr;
+
+ target_phys_addr_t evtlog;
+ int evtlog_enabled;
+ int evtlog_intr;
+ size_t evtlog_len;
+ size_t evtlog_head;
+ size_t evtlog_tail;
+
+ target_phys_addr_t excl_base;
+ target_phys_addr_t excl_limit;
+ int excl_enabled;
+ int excl_allow;
+
+ QLIST_HEAD(, amd_iommu_invalidator) invalidators;
+};
+
+static void amd_iommu_register_invalidator(PCIIOMMU *iommu,
+ PCIDevice *dev,
+ pci_addr_t addr,
+ PCIInvalidateIOTLBFunc *cb,
+ void *opaque)
+{
+ struct amd_iommu_invalidator *inval;
+ struct amd_iommu_state *st = iommu->opaque;
+
+ inval = qemu_malloc(sizeof(struct amd_iommu_invalidator));
+ inval->devfn = dev->devfn;
+ inval->func = cb;
+ inval->opaque = opaque;
+
+ QLIST_INSERT_HEAD(&st->invalidators, inval, list);
+}
+
+static void amd_iommu_completion_wait(struct amd_iommu_state *st,
+ uint8_t *cmd)
+{
+ uint64_t addr;
+
+ if (cmd[0] & 1) {
+ addr = le64_to_cpu(*(uint64_t *) cmd) & 0xFFFFFFFFFFFF8;
+ cpu_physical_memory_write(addr, cmd + 8, 8);
+ }
+
+ if (cmd[0] & 2)
+ st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_COMWAIT_INTR;
+}
+
+static void amd_iommu_inval_iotlb(struct amd_iommu_state *st,
+ uint8_t *cmd)
+{
+ struct amd_iommu_invalidator *inval;
+ int devfn = *(uint16_t *) cmd;
+
+ QLIST_FOREACH(inval, &st->invalidators, list) {
+ if (inval->devfn == devfn) {
+ inval->func(inval->opaque);
+ QLIST_REMOVE(inval, list);
+ }
+ }
+}
+
+static void amd_iommu_cmdbuf_run(struct amd_iommu_state *st)
+{
+ uint8_t cmd[16];
+ int type;
+
+ if (!st->cmdbuf_enabled)
+ return;
+
+ /* Check if there's work to do. */
+ if (st->cmdbuf_head == st->cmdbuf_tail)
+ return;
+
+ cpu_physical_memory_read(st->cmdbuf + st->cmdbuf_head, cmd, 16);
+ type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT;
+ switch (type) {
+ case CMD_COMPLETION_WAIT:
+ amd_iommu_completion_wait(st, cmd);
+ break;
+ case CMD_INVAL_DEVTAB_ENTRY:
+ break;
+ case CMD_INVAL_IOMMU_PAGES:
+ break;
+ case CMD_INVAL_IOTLB_PAGES:
+ amd_iommu_inval_iotlb(st, cmd);
+ break;
+ case CMD_INVAL_INTR_TABLE:
+ break;
+ default:
+ break;
+ }
+
+ /* Increment and wrap head pointer. */
+ st->cmdbuf_head += CMDBUF_ENTRY_SIZE;
+ if (st->cmdbuf_head >= st->cmdbuf_len)
+ st->cmdbuf_head = 0;
+}
+
+static uint32_t amd_iommu_mmio_buf_read(struct amd_iommu_state *st,
+ size_t offset,
+ size_t size)
+{
+ ssize_t i;
+ uint32_t ret;
+
+ if (!size)
+ return 0;
+
+ ret = st->mmio_buf[offset + size - 1];
+ for (i = size - 2; i >= 0; i--) {
+ ret <<= 8;
+ ret |= st->mmio_buf[offset + i];
+ }
+
+ return ret;
+}
+
+static void amd_iommu_mmio_buf_write(struct amd_iommu_state *st,
+ size_t offset,
+ size_t size,
+ uint32_t val)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ st->mmio_buf[offset + i] = val & 0xFF;
+ val >>= 8;
+ }
+}
+
+static void amd_iommu_update_mmio(struct amd_iommu_state *st,
+ target_phys_addr_t addr)
+{
+ size_t reg = addr & ~0x07;
+ uint64_t *base = (uint64_t *) &st->mmio_buf[reg];
+ uint64_t val = *base;
+
+ switch (reg) {
+ case MMIO_CONTROL:
+ st->enabled = !!(val & MMIO_CONTROL_IOMMUEN);
+ st->ats_enabled = !!(val & MMIO_CONTROL_HTTUNEN);
+ st->evtlog_enabled = st->enabled &&
+ !!(val & MMIO_CONTROL_EVENTLOGEN);
+ st->evtlog_intr = !!(val & MMIO_CONTROL_EVENTINTEN);
+ st->completion_wait_intr = !!(val & MMIO_CONTROL_COMWAITINTEN);
+ st->cmdbuf_enabled = st->enabled &&
+ !!(val & MMIO_CONTROL_CMDBUFEN);
+
+ /* Update status flags depending on the control register. */
+ if (st->cmdbuf_enabled)
+ st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_CMDBUF_RUN;
+ else
+ st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_CMDBUF_RUN;
+ if (st->evtlog_enabled)
+ st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_RUN;
+ else
+ st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_EVTLOG_RUN;
+
+ amd_iommu_cmdbuf_run(st);
+ break;
+ case MMIO_DEVICE_TABLE:
+ st->devtab = (target_phys_addr_t) (val & MMIO_DEVTAB_BASE_MASK);
+ st->devtab_len = ((val & MMIO_DEVTAB_SIZE_MASK) + 1) *
+ (MMIO_DEVTAB_SIZE_UNIT / MMIO_DEVTAB_ENTRY_SIZE);
+ break;
+ case MMIO_COMMAND_BASE:
+ st->cmdbuf = (target_phys_addr_t) (val & MMIO_CMDBUF_BASE_MASK);
+ st->cmdbuf_len = 1UL << (st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] &
+ MMIO_CMDBUF_SIZE_MASK);
+ amd_iommu_cmdbuf_run(st);
+ break;
+ case MMIO_COMMAND_HEAD:
+ st->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK;
+ amd_iommu_cmdbuf_run(st);
+ break;
+ case MMIO_COMMAND_TAIL:
+ st->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK;
+ amd_iommu_cmdbuf_run(st);
+ break;
+ case MMIO_EVENT_BASE:
+ st->evtlog = (target_phys_addr_t) (val & MMIO_EVTLOG_BASE_MASK);
+ st->evtlog_len = 1UL << (st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] &
+ MMIO_EVTLOG_SIZE_MASK);
+ break;
+ case MMIO_EVENT_HEAD:
+ st->evtlog_head = val & MMIO_EVTLOG_HEAD_MASK;
+ break;
+ case MMIO_EVENT_TAIL:
+ st->evtlog_tail = val & MMIO_EVTLOG_TAIL_MASK;
+ break;
+ case MMIO_EXCL_BASE:
+ st->excl_base = (target_phys_addr_t) (val & MMIO_EXCL_BASE_MASK);
+ st->excl_enabled = val & MMIO_EXCL_ENABLED_MASK;
+ st->excl_allow = val & MMIO_EXCL_ALLOW_MASK;
+ break;
+ case MMIO_EXCL_LIMIT:
+ st->excl_limit = (target_phys_addr_t) ((val & MMIO_EXCL_LIMIT_MASK) |
+ MMIO_EXCL_LIMIT_LOW);
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t amd_iommu_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+ struct amd_iommu_state *st = opaque;
+
+ return amd_iommu_mmio_buf_read(st, addr, 1);
+}
+
+static uint32_t amd_iommu_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+ struct amd_iommu_state *st = opaque;
+
+ return amd_iommu_mmio_buf_read(st, addr, 2);
+}
+
+static uint32_t amd_iommu_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ struct amd_iommu_state *st = opaque;
+
+ return amd_iommu_mmio_buf_read(st, addr, 4);
+}
+
+static void amd_iommu_mmio_writeb(void *opaque,
+ target_phys_addr_t addr,
+ uint32_t val)
+{
+ struct amd_iommu_state *st = opaque;
+
+ amd_iommu_mmio_buf_write(st, addr, 1, val);
+ amd_iommu_update_mmio(st, addr);
+}
+
+static void amd_iommu_mmio_writew(void *opaque,
+ target_phys_addr_t addr,
+ uint32_t val)
+{
+ struct amd_iommu_state *st = opaque;
+
+ amd_iommu_mmio_buf_write(st, addr, 2, val);
+ amd_iommu_update_mmio(st, addr);
+}
+
+static void amd_iommu_mmio_writel(void *opaque,
+ target_phys_addr_t addr,
+ uint32_t val)
+{
+ struct amd_iommu_state *st = opaque;
+
+ amd_iommu_mmio_buf_write(st, addr, 4, val);
+ amd_iommu_update_mmio(st, addr);
+}
+
+static CPUReadMemoryFunc * const amd_iommu_mmio_read[] = {
+ amd_iommu_mmio_readb,
+ amd_iommu_mmio_readw,
+ amd_iommu_mmio_readl,
+};
+
+static CPUWriteMemoryFunc * const amd_iommu_mmio_write[] = {
+ amd_iommu_mmio_writeb,
+ amd_iommu_mmio_writew,
+ amd_iommu_mmio_writel,
+};
+
+static void amd_iommu_init_mmio(struct amd_iommu_state *st)
+{
+ st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] = MMIO_CMDBUF_DEFAULT_SIZE;
+ st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] = MMIO_EVTLOG_DEFAULT_SIZE;
+}
+
+static void amd_iommu_enable_mmio(struct amd_iommu_state *st)
+{
+ target_phys_addr_t addr;
+
+ st->mmio_index = cpu_register_io_memory(amd_iommu_mmio_read,
+ amd_iommu_mmio_write, st);
+ if (st->mmio_index < 0)
+ return;
+
+ addr = le64_to_cpu(*(uint64_t *) &st->capab[CAPAB_BAR_LOW]) & CAPAB_BAR_MASK;
+ cpu_register_physical_memory(addr, MMIO_SIZE, st->mmio_index);
+
+ st->mmio_addr = addr;
+ st->mmio_buf = qemu_mallocz(MMIO_SIZE);
+ st->mmio_enabled = 1;
+ amd_iommu_init_mmio(st);
+}
+
+static uint32_t amd_iommu_read_capab(PCIDevice *pci_dev,
+ uint32_t addr, int len)
+{
+ return pci_default_cap_read_config(pci_dev, addr, len);
+}
+
+static void amd_iommu_write_capab(PCIDevice *dev,
+ uint32_t addr, uint32_t val, int len)
+{
+ struct amd_iommu_state *st;
+ unsigned char *capab;
+ int reg;
+
+ st = DO_UPCAST(struct amd_iommu_state, dev, dev);
+ capab = st->capab;
+ reg = (addr - 0x40) & ~0x3; /* Get the 32-bits register. */
+
+ switch (reg) {
+ case CAPAB_HEADER:
+ case CAPAB_MISC:
+ /* Read-only. */
+ return;
+ case CAPAB_BAR_LOW:
+ case CAPAB_BAR_HIGH:
+ case CAPAB_RANGE:
+ if (st->mmio_enabled)
+ return;
+ pci_default_cap_write_config(dev, addr, val, len);
+ break;
+ default:
+ return;
+ }
+
+ if (capab[CAPAB_BAR_LOW] & 0x1)
+ amd_iommu_enable_mmio(st);
+}
+
+static int amd_iommu_init_capab(PCIDevice *dev)
+{
+ struct amd_iommu_state *st;
+ unsigned char *capab;
+
+ st = DO_UPCAST(struct amd_iommu_state, dev, dev);
+ capab = st->dev.config + st->capab_offset;
+
+ capab[CAPAB_REV_TYPE] = CAPAB_REV_TYPE;
+ capab[CAPAB_FLAGS] = CAPAB_FLAGS;
+ capab[CAPAB_BAR_LOW] = 0;
+ capab[CAPAB_BAR_HIGH] = 0;
+ capab[CAPAB_RANGE] = 0;
+ *((uint32_t *) &capab[CAPAB_MISC]) = cpu_to_le32(CAPAB_INIT_MISC);
+
+ st->capab = capab;
+ st->dev.cap.length = CAPAB_SIZE;
+
+ return 0;
+}
+
+static int amd_iommu_translate(PCIIOMMU *iommu,
+ PCIDevice *dev,
+ pci_addr_t addr,
+ target_phys_addr_t *paddr,
+ int *len,
+ unsigned perms);
+
+static int amd_iommu_pci_initfn(PCIDevice *dev)
+{
+ struct amd_iommu_state *st;
+ PCIIOMMU *iommu;
+ int err;
+
+ st = DO_UPCAST(struct amd_iommu_state, dev, dev);
+
+ pci_config_set_vendor_id(st->dev.config, PCI_VENDOR_ID_AMD);
+ pci_config_set_device_id(st->dev.config, PCI_DEVICE_ID_AMD_IOMMU);
+ pci_config_set_class(st->dev.config, PCI_CLASS_SYSTEM_IOMMU);
+
+ st->capab_offset = pci_add_capability(&st->dev,
+ PCI_CAP_ID_SEC,
+ CAPAB_SIZE);
+ err = pci_enable_capability_support(&st->dev, st->capab_offset,
+ amd_iommu_read_capab,
+ amd_iommu_write_capab,
+ amd_iommu_init_capab);
+ if (err)
+ return err;
+
+ iommu = qemu_mallocz(sizeof(PCIIOMMU));
+ iommu->opaque = st;
+ iommu->translate = amd_iommu_translate;
+ iommu->register_iotlb_invalidator = amd_iommu_register_invalidator;
+ pci_register_iommu(dev, iommu);
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_amd_iommu = {
+ .name = "amd-iommu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_PCI_DEVICE(dev, struct amd_iommu_state),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static PCIDeviceInfo amd_iommu_pci_info = {
+ .qdev.name = "amd-iommu",
+ .qdev.desc = "AMD IOMMU",
+ .qdev.size = sizeof(struct amd_iommu_state),
+ .qdev.vmsd = &vmstate_amd_iommu,
+ .init = amd_iommu_pci_initfn,
+};
+
+void amd_iommu_init(PCIBus *bus)
+{
+ pci_create_simple(bus, -1, "amd-iommu");
+}
+
+static void amd_iommu_register(void)
+{
+ pci_qdev_register(&amd_iommu_pci_info);
+}
+
+device_init(amd_iommu_register);
+
+static void amd_iommu_page_fault(struct amd_iommu_state *st,
+ int devfn,
+ unsigned domid,
+ target_phys_addr_t addr,
+ int present,
+ int is_write)
+{
+ uint16_t entry[8];
+ uint64_t *entry_addr = (uint64_t *) &entry[4];
+
+ entry[0] = cpu_to_le16(devfn);
+ entry[1] = 0;
+ entry[2] = cpu_to_le16(domid);
+ entry[3] = (2UL << 12) | (!!present << 4) | (!!is_write << 5);
+ *entry_addr = cpu_to_le64(addr);
+
+ cpu_physical_memory_write((target_phys_addr_t) st->evtlog + st->evtlog_tail, (uint8_t *) &entry, 128);
+ st->evtlog_tail += 128;
+}
+
+static inline uint64_t amd_iommu_get_perms(uint64_t entry)
+{
+ return (entry & (DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT;
+}
+
+static int amd_iommu_translate(PCIIOMMU *iommu,
+ PCIDevice *dev,
+ pci_addr_t addr,
+ target_phys_addr_t *paddr,
+ int *len,
+ unsigned perms)
+{
+ int devfn, present;
+ target_phys_addr_t entry_addr, pte_addr;
+ uint64_t entry[4], pte, page_offset, pte_perms;
+ unsigned level, domid;
+ struct amd_iommu_state *st = iommu->opaque;
+
+ if (!st->enabled)
+ goto no_translation;
+
+ /* Get device table entry. */
+ devfn = dev->devfn;
+ entry_addr = st->devtab + devfn * DEVTAB_ENTRY_SIZE;
+ cpu_physical_memory_read(entry_addr, (uint8_t *) entry, 32);
+
+ pte = entry[0];
+ if (!(pte & DEV_VALID) || !(pte & DEV_TRANSLATION_VALID)) {
+ goto no_translation;
+ }
+ domid = entry[1] & DEV_DOMAIN_ID_MASK;
+ level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
+ while (level > 0) {
+ /*
+ * Check permissions: the bitwise
+ * implication perms -> entry_perms must be true.
+ */
+ pte_perms = amd_iommu_get_perms(pte);
+ present = pte & 1;
+ if (!present || perms != (perms & pte_perms)) {
+ amd_iommu_page_fault(st, devfn, domid, addr,
+ present, !!(perms & IOMMU_PERM_WRITE));
+ return -EPERM;
+ }
+
+ /* Go to the next lower level. */
+ pte_addr = pte & DEV_PT_ROOT_MASK;
+ pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+ pte = ldq_phys(pte_addr);
+ level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
+ }
+ page_offset = addr & 4095;
+ *paddr = (pte & DEV_PT_ROOT_MASK) + page_offset;
+ *len = 4096 - page_offset;
+
+ return 0;
+
+no_translation:
+ *paddr = addr;
+ *len = INT_MAX;
+ return 0;
+}
diff --git a/hw/pc.c b/hw/pc.c
index 186e322..4c929f9 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1066,6 +1066,10 @@ void pc_pci_device_init(PCIBus *pci_bus)
int max_bus;
int bus;
+#ifdef CONFIG_AMD_IOMMU
+ amd_iommu_init(pci_bus);
+#endif
+
max_bus = drive_get_max_bus(IF_SCSI);
for (bus = 0; bus <= max_bus; bus++) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
diff --git a/hw/pc.h b/hw/pc.h
index 3ef2f75..255ad93 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -191,4 +191,7 @@ void extboot_init(BlockDriverState *bs);
int e820_add_entry(uint64_t, uint64_t, uint32_t);
+/* amd_iommu.c */
+void amd_iommu_init(PCIBus *bus);
+
#endif
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index 39e9f1d..d790312 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -26,6 +26,7 @@
#define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_SYSTEM_IOMMU 0x0806
#define PCI_CLASS_SYSTEM_OTHER 0x0880
#define PCI_CLASS_SERIAL_USB 0x0c03
@@ -56,6 +57,7 @@
#define PCI_VENDOR_ID_AMD 0x1022
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
+#define PCI_DEVICE_ID_AMD_IOMMU 0x0000 /* FIXME */
#define PCI_VENDOR_ID_MOTOROLA 0x1057
#define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002
diff --git a/hw/pci_regs.h b/hw/pci_regs.h
index 1c675dc..6399b5d 100644
--- a/hw/pci_regs.h
+++ b/hw/pci_regs.h
@@ -216,6 +216,7 @@
#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */
#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */
#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */
+#define PCI_CAP_ID_SEC 0x0F /* Secure Device (AMD IOMMU) */
#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */
--
1.7.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation Eduard - Gabriel Munteanu
@ 2010-08-05 21:31 ` Blue Swirl
2010-08-06 0:41 ` Eduard - Gabriel Munteanu
0 siblings, 1 reply; 15+ messages in thread
From: Blue Swirl @ 2010-08-05 21:31 UTC (permalink / raw)
To: Eduard - Gabriel Munteanu; +Cc: joro, paul, qemu-devel, kvm, avi
On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu
<eduard.munteanu@linux360.ro> wrote:
> This introduces emulation for the AMD IOMMU, described in "AMD I/O
> Virtualization Technology (IOMMU) Specification".
>
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
> Makefile.target | 2 +
> configure | 10 +
> hw/amd_iommu.c | 671 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/pc.c | 4 +
> hw/pc.h | 3 +
> hw/pci_ids.h | 2 +
> hw/pci_regs.h | 1 +
> 7 files changed, 693 insertions(+), 0 deletions(-)
> create mode 100644 hw/amd_iommu.c
>
> diff --git a/Makefile.target b/Makefile.target
> index 70a9c1b..86226a0 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o
> obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o
>
> +obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o
Make this unconditional.
> +
> # Hardware support
> obj-ia64-y += ide.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
> obj-ia64-y += fdc.o mc146818rtc.o serial.o i8259.o ipf.o
> diff --git a/configure b/configure
> index af50607..7448603 100755
> --- a/configure
> +++ b/configure
> @@ -317,6 +317,7 @@ io_thread="no"
> mixemu="no"
> kvm_cap_pit=""
> kvm_cap_device_assignment=""
> +amd_iommu="no"
> kerneldir=""
> aix="no"
> blobs="yes"
> @@ -629,6 +630,8 @@ for opt do
> ;;
> --enable-kvm-device-assignment) kvm_cap_device_assignment="yes"
> ;;
> + --enable-amd-iommu-emul) amd_iommu="yes"
> + ;;
> --enable-profiler) profiler="yes"
> ;;
> --enable-cocoa)
> @@ -871,6 +874,8 @@ echo " --disable-kvm-pit disable KVM pit support"
> echo " --enable-kvm-pit enable KVM pit support"
> echo " --disable-kvm-device-assignment disable KVM device assignment support"
> echo " --enable-kvm-device-assignment enable KVM device assignment support"
> +echo " --disable-amd-iommu-emul disable AMD IOMMU emulation"
> +echo " --enable-amd-iommu-emul enable AMD IOMMU emulation"
> echo " --disable-nptl disable usermode NPTL support"
> echo " --enable-nptl enable usermode NPTL support"
> echo " --enable-system enable all system emulation targets"
> @@ -2251,6 +2256,7 @@ echo "Install blobs $blobs"
> echo "KVM support $kvm"
> echo "KVM PIT support $kvm_cap_pit"
> echo "KVM device assig. $kvm_cap_device_assignment"
> +echo "AMD IOMMU emul. $amd_iommu"
> echo "fdt support $fdt"
> echo "preadv support $preadv"
> echo "fdatasync $fdatasync"
> @@ -2645,6 +2651,10 @@ case "$target_arch2" in
> x86_64)
> TARGET_BASE_ARCH=i386
> target_phys_bits=64
> + if test "$amd_iommu" = "yes"; then
> + echo "CONFIG_AMD_IOMMU=y" >> $config_target_mak
> + echo "CONFIG_PCI_IOMMU=y" >> $config_host_mak
> + fi
Drop all configure changes.
> ;;
> ia64)
> target_phys_bits=64
> diff --git a/hw/amd_iommu.c b/hw/amd_iommu.c
> new file mode 100644
> index 0000000..ff9903e
> --- /dev/null
> +++ b/hw/amd_iommu.c
> @@ -0,0 +1,671 @@
> +/*
> + * AMD IOMMU emulation
> + *
> + * Copyright (c) 2010 Eduard - Gabriel Munteanu
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "pc.h"
> +#include "hw.h"
> +#include "pci.h"
> +#include "qlist.h"
> +
> +/* Capability registers */
> +#define CAPAB_HEADER 0x00
> +#define CAPAB_REV_TYPE 0x02
> +#define CAPAB_FLAGS 0x03
> +#define CAPAB_BAR_LOW 0x04
> +#define CAPAB_BAR_HIGH 0x08
> +#define CAPAB_RANGE 0x0C
> +#define CAPAB_MISC 0x10
> +
> +#define CAPAB_SIZE 0x14
> +
> +/* Capability header data */
> +#define CAPAB_FLAG_IOTLBSUP (1 << 0)
> +#define CAPAB_FLAG_HTTUNNEL (1 << 1)
> +#define CAPAB_FLAG_NPCACHE (1 << 2)
> +#define CAPAB_INIT_REV (1 << 3)
> +#define CAPAB_INIT_TYPE 3
> +#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE)
> +#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | CAPAB_FLAG_HTTUNNEL)
> +#define CAPAB_INIT_MISC (64 << 15) | (48 << 8)
> +#define CAPAB_BAR_MASK ~((1UL << 14) - 1)
> +
> +/* MMIO registers */
> +#define MMIO_DEVICE_TABLE 0x0000
> +#define MMIO_COMMAND_BASE 0x0008
> +#define MMIO_EVENT_BASE 0x0010
> +#define MMIO_CONTROL 0x0018
> +#define MMIO_EXCL_BASE 0x0020
> +#define MMIO_EXCL_LIMIT 0x0028
> +#define MMIO_COMMAND_HEAD 0x2000
> +#define MMIO_COMMAND_TAIL 0x2008
> +#define MMIO_EVENT_HEAD 0x2010
> +#define MMIO_EVENT_TAIL 0x2018
> +#define MMIO_STATUS 0x2020
> +
> +#define MMIO_SIZE 0x4000
> +
> +#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1)
> +#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~MMIO_DEVTAB_SIZE_MASK)
> +#define MMIO_DEVTAB_ENTRY_SIZE 32
> +#define MMIO_DEVTAB_SIZE_UNIT 4096
> +
> +#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7)
> +#define MMIO_CMDBUF_SIZE_MASK 0x0F
> +#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK
> +#define MMIO_CMDBUF_DEFAULT_SIZE 8
> +#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F)
> +#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK
> +
> +#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7)
> +#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK
> +#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK
> +#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE
> +#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F)
> +#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK
> +
> +#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK
> +#define MMIO_EXCL_ENABLED_MASK (1ULL << 0)
> +#define MMIO_EXCL_ALLOW_MASK (1ULL << 1)
> +#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK
> +#define MMIO_EXCL_LIMIT_LOW 0xFFF
> +
> +#define MMIO_CONTROL_IOMMUEN (1ULL << 0)
> +#define MMIO_CONTROL_HTTUNEN (1ULL << 1)
> +#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2)
> +#define MMIO_CONTROL_EVENTINTEN (1ULL << 3)
> +#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4)
> +#define MMIO_CONTROL_CMDBUFEN (1ULL << 12)
> +
> +#define MMIO_STATUS_EVTLOG_OF (1ULL << 0)
> +#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1)
> +#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2)
> +#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3)
> +#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4)
> +
> +#define CMDBUF_ID_BYTE 0x07
> +#define CMDBUF_ID_RSHIFT 4
> +#define CMDBUF_ENTRY_SIZE 0x10
> +
> +#define CMD_COMPLETION_WAIT 0x01
> +#define CMD_INVAL_DEVTAB_ENTRY 0x02
> +#define CMD_INVAL_IOMMU_PAGES 0x03
> +#define CMD_INVAL_IOTLB_PAGES 0x04
> +#define CMD_INVAL_INTR_TABLE 0x05
> +
> +#define DEVTAB_ENTRY_SIZE 32
> +
> +/* Device table entry bits 0:63 */
> +#define DEV_VALID (1ULL << 0)
> +#define DEV_TRANSLATION_VALID (1ULL << 1)
> +#define DEV_MODE_MASK 0x7
> +#define DEV_MODE_RSHIFT 9
> +#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000
> +#define DEV_PT_ROOT_RSHIFT 12
> +#define DEV_PERM_SHIFT 61
> +#define DEV_PERM_READ (1ULL << 61)
> +#define DEV_PERM_WRITE (1ULL << 62)
> +
> +/* Device table entry bits 64:127 */
> +#define DEV_DOMAIN_ID_MASK ((1ULL << 16) - 1)
> +#define DEV_IOTLB_SUPPORT (1ULL << 17)
> +#define DEV_SUPPRESS_PF (1ULL << 18)
> +#define DEV_SUPPRESS_ALL_PF (1ULL << 19)
> +#define DEV_IOCTL_MASK ~3
> +#define DEV_IOCTL_RSHIFT 20
> +#define DEV_IOCTL_DENY 0
> +#define DEV_IOCTL_PASSTHROUGH 1
> +#define DEV_IOCTL_TRANSLATE 2
> +#define DEV_CACHE (1ULL << 37)
> +#define DEV_SNOOP_DISABLE (1ULL << 38)
> +#define DEV_EXCL (1ULL << 39)
> +
> +struct amd_iommu_invalidator {
> + int devfn;
> + PCIInvalidateIOTLBFunc *func;
> + void *opaque;
> + QLIST_ENTRY(amd_iommu_invalidator) list;
> +};
This should be AMDIOMMUInvalidator with typedef.
> +
> +struct amd_iommu_state {
> + PCIDevice dev;
> +
> + int capab_offset;
> + unsigned char *capab;
> +
> + int mmio_index;
> + target_phys_addr_t mmio_addr;
> + unsigned char *mmio_buf;
> + int mmio_enabled;
> +
> + int enabled;
> + int ats_enabled;
> +
> + target_phys_addr_t devtab;
> + size_t devtab_len;
> +
> + target_phys_addr_t cmdbuf;
> + int cmdbuf_enabled;
> + size_t cmdbuf_len;
> + size_t cmdbuf_head;
> + size_t cmdbuf_tail;
> + int completion_wait_intr;
> +
> + target_phys_addr_t evtlog;
> + int evtlog_enabled;
> + int evtlog_intr;
> + size_t evtlog_len;
> + size_t evtlog_head;
> + size_t evtlog_tail;
> +
> + target_phys_addr_t excl_base;
> + target_phys_addr_t excl_limit;
> + int excl_enabled;
> + int excl_allow;
> +
> + QLIST_HEAD(, amd_iommu_invalidator) invalidators;
> +};
Likewise, AMDIOMMUState.
> +static void amd_iommu_register_invalidator(PCIIOMMU *iommu,
> + PCIDevice *dev,
> + pci_addr_t addr,
> + PCIInvalidateIOTLBFunc *cb,
> + void *opaque)
> +{
> + struct amd_iommu_invalidator *inval;
> + struct amd_iommu_state *st = iommu->opaque;
> +
> + inval = qemu_malloc(sizeof(struct amd_iommu_invalidator));
> + inval->devfn = dev->devfn;
> + inval->func = cb;
> + inval->opaque = opaque;
> +
> + QLIST_INSERT_HEAD(&st->invalidators, inval, list);
> +}
> +
> +static void amd_iommu_completion_wait(struct amd_iommu_state *st,
> + uint8_t *cmd)
> +{
> + uint64_t addr;
> +
> + if (cmd[0] & 1) {
> + addr = le64_to_cpu(*(uint64_t *) cmd) & 0xFFFFFFFFFFFF8;
> + cpu_physical_memory_write(addr, cmd + 8, 8);
> + }
> +
> + if (cmd[0] & 2)
> + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_COMWAIT_INTR;
> +}
> +
> +static void amd_iommu_inval_iotlb(struct amd_iommu_state *st,
> + uint8_t *cmd)
> +{
> + struct amd_iommu_invalidator *inval;
> + int devfn = *(uint16_t *) cmd;
> +
> + QLIST_FOREACH(inval, &st->invalidators, list) {
> + if (inval->devfn == devfn) {
> + inval->func(inval->opaque);
> + QLIST_REMOVE(inval, list);
> + }
> + }
> +}
> +
> +static void amd_iommu_cmdbuf_run(struct amd_iommu_state *st)
> +{
> + uint8_t cmd[16];
> + int type;
> +
> + if (!st->cmdbuf_enabled)
> + return;
> +
> + /* Check if there's work to do. */
> + if (st->cmdbuf_head == st->cmdbuf_tail)
> + return;
> +
> + cpu_physical_memory_read(st->cmdbuf + st->cmdbuf_head, cmd, 16);
> + type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT;
> + switch (type) {
> + case CMD_COMPLETION_WAIT:
> + amd_iommu_completion_wait(st, cmd);
> + break;
> + case CMD_INVAL_DEVTAB_ENTRY:
> + break;
> + case CMD_INVAL_IOMMU_PAGES:
> + break;
> + case CMD_INVAL_IOTLB_PAGES:
> + amd_iommu_inval_iotlb(st, cmd);
> + break;
> + case CMD_INVAL_INTR_TABLE:
> + break;
> + default:
> + break;
> + }
> +
> + /* Increment and wrap head pointer. */
> + st->cmdbuf_head += CMDBUF_ENTRY_SIZE;
> + if (st->cmdbuf_head >= st->cmdbuf_len)
> + st->cmdbuf_head = 0;
> +}
> +
> +static uint32_t amd_iommu_mmio_buf_read(struct amd_iommu_state *st,
> + size_t offset,
> + size_t size)
> +{
> + ssize_t i;
> + uint32_t ret;
> +
> + if (!size)
> + return 0;
> +
> + ret = st->mmio_buf[offset + size - 1];
> + for (i = size - 2; i >= 0; i--) {
> + ret <<= 8;
> + ret |= st->mmio_buf[offset + i];
> + }
> +
> + return ret;
> +}
> +
> +static void amd_iommu_mmio_buf_write(struct amd_iommu_state *st,
> + size_t offset,
> + size_t size,
> + uint32_t val)
> +{
> + size_t i;
> +
> + for (i = 0; i < size; i++) {
> + st->mmio_buf[offset + i] = val & 0xFF;
> + val >>= 8;
> + }
> +}
> +
> +static void amd_iommu_update_mmio(struct amd_iommu_state *st,
> + target_phys_addr_t addr)
> +{
> + size_t reg = addr & ~0x07;
> + uint64_t *base = (uint64_t *) &st->mmio_buf[reg];
> + uint64_t val = *base;
> +
> + switch (reg) {
> + case MMIO_CONTROL:
> + st->enabled = !!(val & MMIO_CONTROL_IOMMUEN);
> + st->ats_enabled = !!(val & MMIO_CONTROL_HTTUNEN);
> + st->evtlog_enabled = st->enabled &&
> + !!(val & MMIO_CONTROL_EVENTLOGEN);
> + st->evtlog_intr = !!(val & MMIO_CONTROL_EVENTINTEN);
> + st->completion_wait_intr = !!(val & MMIO_CONTROL_COMWAITINTEN);
> + st->cmdbuf_enabled = st->enabled &&
> + !!(val & MMIO_CONTROL_CMDBUFEN);
> +
> + /* Update status flags depending on the control register. */
> + if (st->cmdbuf_enabled)
> + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_CMDBUF_RUN;
> + else
> + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_CMDBUF_RUN;
> + if (st->evtlog_enabled)
> + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_RUN;
> + else
> + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_EVTLOG_RUN;
> +
> + amd_iommu_cmdbuf_run(st);
> + break;
> + case MMIO_DEVICE_TABLE:
> + st->devtab = (target_phys_addr_t) (val & MMIO_DEVTAB_BASE_MASK);
> + st->devtab_len = ((val & MMIO_DEVTAB_SIZE_MASK) + 1) *
> + (MMIO_DEVTAB_SIZE_UNIT / MMIO_DEVTAB_ENTRY_SIZE);
> + break;
> + case MMIO_COMMAND_BASE:
> + st->cmdbuf = (target_phys_addr_t) (val & MMIO_CMDBUF_BASE_MASK);
> + st->cmdbuf_len = 1UL << (st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] &
> + MMIO_CMDBUF_SIZE_MASK);
> + amd_iommu_cmdbuf_run(st);
> + break;
> + case MMIO_COMMAND_HEAD:
> + st->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK;
> + amd_iommu_cmdbuf_run(st);
> + break;
> + case MMIO_COMMAND_TAIL:
> + st->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK;
> + amd_iommu_cmdbuf_run(st);
> + break;
> + case MMIO_EVENT_BASE:
> + st->evtlog = (target_phys_addr_t) (val & MMIO_EVTLOG_BASE_MASK);
> + st->evtlog_len = 1UL << (st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] &
> + MMIO_EVTLOG_SIZE_MASK);
> + break;
> + case MMIO_EVENT_HEAD:
> + st->evtlog_head = val & MMIO_EVTLOG_HEAD_MASK;
> + break;
> + case MMIO_EVENT_TAIL:
> + st->evtlog_tail = val & MMIO_EVTLOG_TAIL_MASK;
> + break;
> + case MMIO_EXCL_BASE:
> + st->excl_base = (target_phys_addr_t) (val & MMIO_EXCL_BASE_MASK);
> + st->excl_enabled = val & MMIO_EXCL_ENABLED_MASK;
> + st->excl_allow = val & MMIO_EXCL_ALLOW_MASK;
> + break;
> + case MMIO_EXCL_LIMIT:
> + st->excl_limit = (target_phys_addr_t) ((val & MMIO_EXCL_LIMIT_MASK) |
> + MMIO_EXCL_LIMIT_LOW);
> + break;
> + default:
> + break;
> + }
> +}
> +
> +static uint32_t amd_iommu_mmio_readb(void *opaque, target_phys_addr_t addr)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + return amd_iommu_mmio_buf_read(st, addr, 1);
> +}
> +
> +static uint32_t amd_iommu_mmio_readw(void *opaque, target_phys_addr_t addr)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + return amd_iommu_mmio_buf_read(st, addr, 2);
> +}
> +
> +static uint32_t amd_iommu_mmio_readl(void *opaque, target_phys_addr_t addr)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + return amd_iommu_mmio_buf_read(st, addr, 4);
> +}
> +
> +static void amd_iommu_mmio_writeb(void *opaque,
> + target_phys_addr_t addr,
> + uint32_t val)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + amd_iommu_mmio_buf_write(st, addr, 1, val);
> + amd_iommu_update_mmio(st, addr);
> +}
> +
> +static void amd_iommu_mmio_writew(void *opaque,
> + target_phys_addr_t addr,
> + uint32_t val)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + amd_iommu_mmio_buf_write(st, addr, 2, val);
> + amd_iommu_update_mmio(st, addr);
> +}
> +
> +static void amd_iommu_mmio_writel(void *opaque,
> + target_phys_addr_t addr,
> + uint32_t val)
> +{
> + struct amd_iommu_state *st = opaque;
> +
> + amd_iommu_mmio_buf_write(st, addr, 4, val);
> + amd_iommu_update_mmio(st, addr);
> +}
> +
> +static CPUReadMemoryFunc * const amd_iommu_mmio_read[] = {
> + amd_iommu_mmio_readb,
> + amd_iommu_mmio_readw,
> + amd_iommu_mmio_readl,
> +};
> +
> +static CPUWriteMemoryFunc * const amd_iommu_mmio_write[] = {
> + amd_iommu_mmio_writeb,
> + amd_iommu_mmio_writew,
> + amd_iommu_mmio_writel,
> +};
> +
> +static void amd_iommu_init_mmio(struct amd_iommu_state *st)
> +{
> + st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] = MMIO_CMDBUF_DEFAULT_SIZE;
> + st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] = MMIO_EVTLOG_DEFAULT_SIZE;
> +}
> +
> +static void amd_iommu_enable_mmio(struct amd_iommu_state *st)
> +{
> + target_phys_addr_t addr;
> +
> + st->mmio_index = cpu_register_io_memory(amd_iommu_mmio_read,
> + amd_iommu_mmio_write, st);
> + if (st->mmio_index < 0)
> + return;
> +
> + addr = le64_to_cpu(*(uint64_t *) &st->capab[CAPAB_BAR_LOW]) & CAPAB_BAR_MASK;
> + cpu_register_physical_memory(addr, MMIO_SIZE, st->mmio_index);
> +
> + st->mmio_addr = addr;
> + st->mmio_buf = qemu_mallocz(MMIO_SIZE);
> + st->mmio_enabled = 1;
> + amd_iommu_init_mmio(st);
> +}
> +
> +static uint32_t amd_iommu_read_capab(PCIDevice *pci_dev,
> + uint32_t addr, int len)
> +{
> + return pci_default_cap_read_config(pci_dev, addr, len);
> +}
> +
> +static void amd_iommu_write_capab(PCIDevice *dev,
> + uint32_t addr, uint32_t val, int len)
> +{
> + struct amd_iommu_state *st;
> + unsigned char *capab;
> + int reg;
> +
> + st = DO_UPCAST(struct amd_iommu_state, dev, dev);
> + capab = st->capab;
> + reg = (addr - 0x40) & ~0x3; /* Get the 32-bits register. */
> +
> + switch (reg) {
> + case CAPAB_HEADER:
> + case CAPAB_MISC:
> + /* Read-only. */
> + return;
> + case CAPAB_BAR_LOW:
> + case CAPAB_BAR_HIGH:
> + case CAPAB_RANGE:
> + if (st->mmio_enabled)
> + return;
> + pci_default_cap_write_config(dev, addr, val, len);
> + break;
> + default:
> + return;
> + }
> +
> + if (capab[CAPAB_BAR_LOW] & 0x1)
> + amd_iommu_enable_mmio(st);
> +}
> +
> +static int amd_iommu_init_capab(PCIDevice *dev)
> +{
> + struct amd_iommu_state *st;
> + unsigned char *capab;
> +
> + st = DO_UPCAST(struct amd_iommu_state, dev, dev);
> + capab = st->dev.config + st->capab_offset;
> +
> + capab[CAPAB_REV_TYPE] = CAPAB_REV_TYPE;
> + capab[CAPAB_FLAGS] = CAPAB_FLAGS;
> + capab[CAPAB_BAR_LOW] = 0;
> + capab[CAPAB_BAR_HIGH] = 0;
> + capab[CAPAB_RANGE] = 0;
> + *((uint32_t *) &capab[CAPAB_MISC]) = cpu_to_le32(CAPAB_INIT_MISC);
> +
> + st->capab = capab;
> + st->dev.cap.length = CAPAB_SIZE;
> +
> + return 0;
> +}
> +
> +static int amd_iommu_translate(PCIIOMMU *iommu,
> + PCIDevice *dev,
> + pci_addr_t addr,
> + target_phys_addr_t *paddr,
> + int *len,
> + unsigned perms);
Please move the implementation here to avoid this declaration.
> +static int amd_iommu_pci_initfn(PCIDevice *dev)
> +{
> + struct amd_iommu_state *st;
> + PCIIOMMU *iommu;
> + int err;
> +
> + st = DO_UPCAST(struct amd_iommu_state, dev, dev);
> +
> + pci_config_set_vendor_id(st->dev.config, PCI_VENDOR_ID_AMD);
> + pci_config_set_device_id(st->dev.config, PCI_DEVICE_ID_AMD_IOMMU);
> + pci_config_set_class(st->dev.config, PCI_CLASS_SYSTEM_IOMMU);
> +
> + st->capab_offset = pci_add_capability(&st->dev,
> + PCI_CAP_ID_SEC,
> + CAPAB_SIZE);
> + err = pci_enable_capability_support(&st->dev, st->capab_offset,
> + amd_iommu_read_capab,
> + amd_iommu_write_capab,
> + amd_iommu_init_capab);
> + if (err)
> + return err;
> +
> + iommu = qemu_mallocz(sizeof(PCIIOMMU));
> + iommu->opaque = st;
> + iommu->translate = amd_iommu_translate;
> + iommu->register_iotlb_invalidator = amd_iommu_register_invalidator;
> + pci_register_iommu(dev, iommu);
I'd avoid the structure and just pass the stuff to pci_register_iommu
as function arguments.
> +
> + return 0;
> +}
> +
> +static const VMStateDescription vmstate_amd_iommu = {
> + .name = "amd-iommu",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .minimum_version_id_old = 1,
> + .fields = (VMStateField []) {
> + VMSTATE_PCI_DEVICE(dev, struct amd_iommu_state),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +static PCIDeviceInfo amd_iommu_pci_info = {
> + .qdev.name = "amd-iommu",
> + .qdev.desc = "AMD IOMMU",
> + .qdev.size = sizeof(struct amd_iommu_state),
> + .qdev.vmsd = &vmstate_amd_iommu,
> + .init = amd_iommu_pci_initfn,
> +};
> +
> +void amd_iommu_init(PCIBus *bus)
> +{
> + pci_create_simple(bus, -1, "amd-iommu");
> +}
Just open code this in pc.c.
> +
> +static void amd_iommu_register(void)
> +{
> + pci_qdev_register(&amd_iommu_pci_info);
> +}
> +
> +device_init(amd_iommu_register);
> +
> +static void amd_iommu_page_fault(struct amd_iommu_state *st,
> + int devfn,
> + unsigned domid,
> + target_phys_addr_t addr,
> + int present,
> + int is_write)
> +{
> + uint16_t entry[8];
> + uint64_t *entry_addr = (uint64_t *) &entry[4];
> +
> + entry[0] = cpu_to_le16(devfn);
> + entry[1] = 0;
> + entry[2] = cpu_to_le16(domid);
> + entry[3] = (2UL << 12) | (!!present << 4) | (!!is_write << 5);
> + *entry_addr = cpu_to_le64(addr);
> +
> + cpu_physical_memory_write((target_phys_addr_t) st->evtlog + st->evtlog_tail, (uint8_t *) &entry, 128);
> + st->evtlog_tail += 128;
> +}
> +
> +static inline uint64_t amd_iommu_get_perms(uint64_t entry)
> +{
> + return (entry & (DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT;
> +}
> +
> +static int amd_iommu_translate(PCIIOMMU *iommu,
> + PCIDevice *dev,
> + pci_addr_t addr,
> + target_phys_addr_t *paddr,
> + int *len,
> + unsigned perms)
> +{
> + int devfn, present;
> + target_phys_addr_t entry_addr, pte_addr;
> + uint64_t entry[4], pte, page_offset, pte_perms;
> + unsigned level, domid;
> + struct amd_iommu_state *st = iommu->opaque;
> +
> + if (!st->enabled)
> + goto no_translation;
> +
> + /* Get device table entry. */
> + devfn = dev->devfn;
> + entry_addr = st->devtab + devfn * DEVTAB_ENTRY_SIZE;
> + cpu_physical_memory_read(entry_addr, (uint8_t *) entry, 32);
> +
> + pte = entry[0];
> + if (!(pte & DEV_VALID) || !(pte & DEV_TRANSLATION_VALID)) {
> + goto no_translation;
> + }
> + domid = entry[1] & DEV_DOMAIN_ID_MASK;
> + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
> + while (level > 0) {
> + /*
> + * Check permissions: the bitwise
> + * implication perms -> entry_perms must be true.
> + */
> + pte_perms = amd_iommu_get_perms(pte);
> + present = pte & 1;
> + if (!present || perms != (perms & pte_perms)) {
> + amd_iommu_page_fault(st, devfn, domid, addr,
> + present, !!(perms & IOMMU_PERM_WRITE));
> + return -EPERM;
> + }
> +
> + /* Go to the next lower level. */
> + pte_addr = pte & DEV_PT_ROOT_MASK;
> + pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
> + pte = ldq_phys(pte_addr);
> + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
> + }
> + page_offset = addr & 4095;
> + *paddr = (pte & DEV_PT_ROOT_MASK) + page_offset;
> + *len = 4096 - page_offset;
> +
> + return 0;
> +
> +no_translation:
> + *paddr = addr;
> + *len = INT_MAX;
> + return 0;
> +}
> diff --git a/hw/pc.c b/hw/pc.c
> index 186e322..4c929f9 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -1066,6 +1066,10 @@ void pc_pci_device_init(PCIBus *pci_bus)
> int max_bus;
> int bus;
>
> +#ifdef CONFIG_AMD_IOMMU
> + amd_iommu_init(pci_bus);
> +#endif
> +
> max_bus = drive_get_max_bus(IF_SCSI);
> for (bus = 0; bus <= max_bus; bus++) {
> pci_create_simple(pci_bus, -1, "lsi53c895a");
> diff --git a/hw/pc.h b/hw/pc.h
> index 3ef2f75..255ad93 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -191,4 +191,7 @@ void extboot_init(BlockDriverState *bs);
>
> int e820_add_entry(uint64_t, uint64_t, uint32_t);
>
> +/* amd_iommu.c */
> +void amd_iommu_init(PCIBus *bus);
> +
> #endif
> diff --git a/hw/pci_ids.h b/hw/pci_ids.h
> index 39e9f1d..d790312 100644
> --- a/hw/pci_ids.h
> +++ b/hw/pci_ids.h
> @@ -26,6 +26,7 @@
>
> #define PCI_CLASS_MEMORY_RAM 0x0500
>
> +#define PCI_CLASS_SYSTEM_IOMMU 0x0806
> #define PCI_CLASS_SYSTEM_OTHER 0x0880
>
> #define PCI_CLASS_SERIAL_USB 0x0c03
> @@ -56,6 +57,7 @@
>
> #define PCI_VENDOR_ID_AMD 0x1022
> #define PCI_DEVICE_ID_AMD_LANCE 0x2000
> +#define PCI_DEVICE_ID_AMD_IOMMU 0x0000 /* FIXME */
>
> #define PCI_VENDOR_ID_MOTOROLA 0x1057
> #define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002
> diff --git a/hw/pci_regs.h b/hw/pci_regs.h
> index 1c675dc..6399b5d 100644
> --- a/hw/pci_regs.h
> +++ b/hw/pci_regs.h
> @@ -216,6 +216,7 @@
> #define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */
> #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */
> #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */
> +#define PCI_CAP_ID_SEC 0x0F /* Secure Device (AMD IOMMU) */
Indentation seems to be off.
> #define PCI_CAP_ID_EXP 0x10 /* PCI Express */
> #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */
> --
> 1.7.1
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation
2010-08-05 21:31 ` Blue Swirl
@ 2010-08-06 0:41 ` Eduard - Gabriel Munteanu
0 siblings, 0 replies; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-06 0:41 UTC (permalink / raw)
To: Blue Swirl; +Cc: joro, paul, qemu-devel, kvm, avi
On Thu, Aug 05, 2010 at 09:31:58PM +0000, Blue Swirl wrote:
> On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu
> <eduard.munteanu@linux360.ro> wrote:
[snip]
> > diff --git a/Makefile.target b/Makefile.target
> > index 70a9c1b..86226a0 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o
> > ??obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> > ??obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o
> >
> > +obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o
>
> Make this unconditional.
>
[snip]
>
> Drop all configure changes.
>
Alright, so it's not going to be a compile-time configurable option.
I'll make some cmdline option for it and make really sure I don't mess
performance in hot paths.
(I'm actually happy to know it's gonna go in that way.)
[snip]
> > +struct amd_iommu_invalidator {
> > + ?? ??int ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? devfn;
> > + ?? ??PCIInvalidateIOTLBFunc ??*func;
> > + ?? ??void ?? ?? ?? ?? ?? ?? ?? ?? ?? ??*opaque;
> > + ?? ??QLIST_ENTRY(amd_iommu_invalidator) list;
> > +};
>
> This should be AMDIOMMUInvalidator with typedef.
>
> > +
> > +struct amd_iommu_state {
[snip]
> > +};
>
> Likewise, AMDIOMMUState.
>
[snip]
> > +static int amd_iommu_translate(PCIIOMMU *iommu,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? PCIDevice *dev,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? pci_addr_t addr,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? target_phys_addr_t *paddr,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? int *len,
> > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? unsigned perms);
>
> Please move the implementation here to avoid this declaration.
>
[snip]
> > + ?? ??iommu = qemu_mallocz(sizeof(PCIIOMMU));
> > + ?? ??iommu->opaque = st;
> > + ?? ??iommu->translate = amd_iommu_translate;
> > + ?? ??iommu->register_iotlb_invalidator = amd_iommu_register_invalidator;
> > + ?? ??pci_register_iommu(dev, iommu);
>
> I'd avoid the structure and just pass the stuff to pci_register_iommu
> as function arguments.
>
[snip]
> > +void amd_iommu_init(PCIBus *bus)
> > +{
> > + ?? ??pci_create_simple(bus, -1, "amd-iommu");
> > +}
>
> Just open code this in pc.c.
>
Roger, I'll fix these.
[snip]
> > ??#define PCI_VENDOR_ID_MOTOROLA ?? ?? ?? ?? ?? 0x1057
> > ??#define PCI_DEVICE_ID_MOTOROLA_MPC106 ?? ??0x0002
> > diff --git a/hw/pci_regs.h b/hw/pci_regs.h
> > index 1c675dc..6399b5d 100644
> > --- a/hw/pci_regs.h
> > +++ b/hw/pci_regs.h
> > @@ -216,6 +216,7 @@
> > ??#define ??PCI_CAP_ID_SHPC ?? ?? ?? 0x0C ?? ??/* PCI Standard Hot-Plug Controller */
> > ??#define ??PCI_CAP_ID_SSVID ?? ?? ??0x0D ?? ??/* Bridge subsystem vendor/device ID */
> > ??#define ??PCI_CAP_ID_AGP3 ?? ?? ?? 0x0E ?? ??/* AGP Target PCI-PCI bridge */
> > +#define ??PCI_CAP_ID_SEC ?? ?? 0x0F ?? ??/* Secure Device (AMD IOMMU) */
>
> Indentation seems to be off.
>
> > ??#define ??PCI_CAP_ID_EXP ?? ?? ?? ??0x10 ?? ??/* PCI Express */
> > ??#define ??PCI_CAP_ID_MSIX ?? ?? ?? 0x11 ?? ??/* MSI-X */
> > ??#define ??PCI_CAP_ID_AF ?? ?? ?? ?? 0x13 ?? ??/* PCI Advanced Features */
> > --
> > 1.7.1
The original has tabs instead of spaces, but my changes line up
properly. Which way should I go, convert it all to spaces, add my line
with tabs or leave it like this? Of course, any cleanup would go in a
separate patch.
Thanks,
Eduard
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH 3/4] ide: use the PCI memory access interface
2010-08-04 22:32 [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Eduard - Gabriel Munteanu
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 1/4] pci: memory access API and IOMMU support Eduard - Gabriel Munteanu
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 2/4] AMD IOMMU emulation Eduard - Gabriel Munteanu
@ 2010-08-04 22:32 ` Eduard - Gabriel Munteanu
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 4/4] rtl8139: " Eduard - Gabriel Munteanu
2010-08-05 21:13 ` [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Blue Swirl
4 siblings, 0 replies; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-04 22:32 UTC (permalink / raw)
To: joro; +Cc: kvm, qemu-devel, avi, Eduard - Gabriel Munteanu, paul
Emulated PCI IDE controllers now use the memory access interface. This
also allows an emulated IOMMU to translate and check accesses.
Map invalidation results in cancelling DMA transfers. Since the guest OS
can't properly recover the DMA results in case the mapping is changed,
this is a fairly good approximation.
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
dma-helpers.c | 37 +++++++++++++++++++++++++++++++++++--
dma.h | 21 ++++++++++++++++++++-
hw/ide/core.c | 15 ++++++++-------
hw/ide/internal.h | 39 +++++++++++++++++++++++++++++++++++++++
hw/ide/pci.c | 7 +++++++
5 files changed, 109 insertions(+), 10 deletions(-)
diff --git a/dma-helpers.c b/dma-helpers.c
index d4fc077..408fee3 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -10,12 +10,34 @@
#include "dma.h"
#include "block_int.h"
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
+static void *qemu_sglist_default_map(void *opaque,
+ target_phys_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write)
+{
+ return cpu_physical_memory_map(addr, len, is_write);
+}
+
+static void qemu_sglist_default_unmap(void *opaque,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len)
+{
+ cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+}
+
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint,
+ QEMUSGMapFunc *map, QEMUSGUnmapFunc *unmap, void *opaque)
{
qsg->sg = qemu_malloc(alloc_hint * sizeof(ScatterGatherEntry));
qsg->nsg = 0;
qsg->nalloc = alloc_hint;
qsg->size = 0;
+
+ qsg->map = map ? map : (QEMUSGMapFunc *) qemu_sglist_default_map;
+ qsg->unmap = unmap ? unmap : (QEMUSGUnmapFunc *) qemu_sglist_default_unmap;
+ qsg->opaque = opaque;
}
void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
@@ -79,6 +101,16 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
}
}
+static void dma_bdrv_cancel(void *opaque)
+{
+ DMAAIOCB *dbs = opaque;
+
+ bdrv_aio_cancel(dbs->acb);
+ dma_bdrv_unmap(dbs);
+ qemu_iovec_destroy(&dbs->iov);
+ qemu_aio_release(dbs);
+}
+
static void dma_bdrv_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
@@ -100,7 +132,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
- mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write);
+ mem = dbs->sg->map(dbs->sg->opaque, dma_bdrv_cancel, dbs,
+ cur_addr, &cur_len, !dbs->is_write);
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
diff --git a/dma.h b/dma.h
index f3bb275..d48f35c 100644
--- a/dma.h
+++ b/dma.h
@@ -15,6 +15,19 @@
#include "hw/hw.h"
#include "block.h"
+typedef void QEMUSGInvalMapFunc(void *opaque);
+typedef void *QEMUSGMapFunc(void *opaque,
+ QEMUSGInvalMapFunc *inval_cb,
+ void *inval_opaque,
+ target_phys_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write);
+typedef void QEMUSGUnmapFunc(void *opaque,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len);
+
typedef struct {
target_phys_addr_t base;
target_phys_addr_t len;
@@ -25,9 +38,15 @@ typedef struct {
int nsg;
int nalloc;
target_phys_addr_t size;
+
+ QEMUSGMapFunc *map;
+ QEMUSGUnmapFunc *unmap;
+ void *opaque;
} QEMUSGList;
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint,
+ QEMUSGMapFunc *map, QEMUSGUnmapFunc *unmap,
+ void *opaque);
void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
target_phys_addr_t len);
void qemu_sglist_destroy(QEMUSGList *qsg);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 0b3b7c2..c19013a 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -435,7 +435,8 @@ static int dma_buf_prepare(BMDMAState *bm, int is_write)
} prd;
int l, len;
- qemu_sglist_init(&s->sg, s->nsector / (IDE_PAGE_SIZE / 512) + 1);
+ qemu_sglist_init(&s->sg, s->nsector / (IDE_PAGE_SIZE / 512) + 1,
+ bm->map, bm->unmap, bm->opaque);
s->io_buffer_size = 0;
for(;;) {
if (bm->cur_prd_len == 0) {
@@ -443,7 +444,7 @@ static int dma_buf_prepare(BMDMAState *bm, int is_write)
if (bm->cur_prd_last ||
(bm->cur_addr - bm->addr) >= IDE_PAGE_SIZE)
return s->io_buffer_size != 0;
- cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+ bmdma_memory_read(bm, bm->cur_addr, (uint8_t *)&prd, 8);
bm->cur_addr += 8;
prd.addr = le32_to_cpu(prd.addr);
prd.size = le32_to_cpu(prd.size);
@@ -526,7 +527,7 @@ static int dma_buf_rw(BMDMAState *bm, int is_write)
if (bm->cur_prd_last ||
(bm->cur_addr - bm->addr) >= IDE_PAGE_SIZE)
return 0;
- cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+ bmdma_memory_read(bm, bm->cur_addr, (uint8_t *)&prd, 8);
bm->cur_addr += 8;
prd.addr = le32_to_cpu(prd.addr);
prd.size = le32_to_cpu(prd.size);
@@ -541,11 +542,11 @@ static int dma_buf_rw(BMDMAState *bm, int is_write)
l = bm->cur_prd_len;
if (l > 0) {
if (is_write) {
- cpu_physical_memory_write(bm->cur_prd_addr,
- s->io_buffer + s->io_buffer_index, l);
+ bmdma_memory_write(bm, bm->cur_prd_addr,
+ s->io_buffer + s->io_buffer_index, l);
} else {
- cpu_physical_memory_read(bm->cur_prd_addr,
- s->io_buffer + s->io_buffer_index, l);
+ bmdma_memory_read(bm, bm->cur_prd_addr,
+ s->io_buffer + s->io_buffer_index, l);
}
bm->cur_prd_addr += l;
bm->cur_prd_len -= l;
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index eef1ee1..0f3b707 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -476,6 +476,24 @@ struct IDEDeviceInfo {
#define BM_CMD_START 0x01
#define BM_CMD_READ 0x08
+typedef void BMDMAInvalMapFunc(void *opaque);
+typedef void BMDMARWFunc(void *opaque,
+ target_phys_addr_t addr,
+ uint8_t *buf,
+ target_phys_addr_t len,
+ int is_write);
+typedef void *BMDMAMapFunc(void *opaque,
+ BMDMAInvalMapFunc *inval_cb,
+ void *inval_opaque,
+ target_phys_addr_t addr,
+ target_phys_addr_t *len,
+ int is_write);
+typedef void BMDMAUnmapFunc(void *opaque,
+ void *buffer,
+ target_phys_addr_t len,
+ int is_write,
+ target_phys_addr_t access_len);
+
struct BMDMAState {
uint8_t cmd;
uint8_t status;
@@ -495,8 +513,29 @@ struct BMDMAState {
int64_t sector_num;
uint32_t nsector;
QEMUBH *bh;
+
+ BMDMARWFunc *rw;
+ BMDMAMapFunc *map;
+ BMDMAUnmapFunc *unmap;
+ void *opaque;
};
+static inline void bmdma_memory_read(BMDMAState *bm,
+ target_phys_addr_t addr,
+ uint8_t *buf,
+ target_phys_addr_t len)
+{
+ bm->rw(bm->opaque, addr, buf, len, 0);
+}
+
+static inline void bmdma_memory_write(BMDMAState *bm,
+ target_phys_addr_t addr,
+ uint8_t *buf,
+ target_phys_addr_t len)
+{
+ bm->rw(bm->opaque, addr, buf, len, 1);
+}
+
static inline IDEState *idebus_active_if(IDEBus *bus)
{
return bus->ifs + bus->unit;
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4d95cc5..5879044 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -183,4 +183,11 @@ void pci_ide_create_devs(PCIDevice *dev, DriveInfo **hd_table)
continue;
ide_create_drive(d->bus+bus[i], unit[i], hd_table[i]);
}
+
+ for (i = 0; i < 2; i++) {
+ d->bmdma[i].rw = (void *) pci_memory_rw;
+ d->bmdma[i].map = (void *) pci_memory_map;
+ d->bmdma[i].unmap = (void *) pci_memory_unmap;
+ d->bmdma[i].opaque = dev;
+ }
}
--
1.7.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH 4/4] rtl8139: use the PCI memory access interface
2010-08-04 22:32 [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Eduard - Gabriel Munteanu
` (2 preceding siblings ...)
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 3/4] ide: use the PCI memory access interface Eduard - Gabriel Munteanu
@ 2010-08-04 22:32 ` Eduard - Gabriel Munteanu
2010-08-05 21:13 ` [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Blue Swirl
4 siblings, 0 replies; 15+ messages in thread
From: Eduard - Gabriel Munteanu @ 2010-08-04 22:32 UTC (permalink / raw)
To: joro; +Cc: kvm, qemu-devel, avi, Eduard - Gabriel Munteanu, paul
This allows the device to work properly with an emulated IOMMU.
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
hw/rtl8139.c | 99 ++++++++++++++++++++++++++++++++-------------------------
1 files changed, 56 insertions(+), 43 deletions(-)
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 72e2242..99d5f69 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -412,12 +412,6 @@ typedef struct RTL8139TallyCounters
uint16_t TxUndrn;
} RTL8139TallyCounters;
-/* Clears all tally counters */
-static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters);
-
-/* Writes tally counters to specified physical memory address */
-static void RTL8139TallyCounters_physical_memory_write(target_phys_addr_t tc_addr, RTL8139TallyCounters* counters);
-
typedef struct RTL8139State {
PCIDevice dev;
uint8_t phys[8]; /* mac address */
@@ -496,6 +490,14 @@ typedef struct RTL8139State {
} RTL8139State;
+/* Clears all tally counters */
+static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters);
+
+/* Writes tally counters to specified physical memory address */
+static void
+RTL8139TallyCounters_physical_memory_write(RTL8139State *s,
+ target_phys_addr_t tc_addr);
+
static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
static void prom9346_decode_command(EEprom9346 *eeprom, uint8_t command)
@@ -746,6 +748,8 @@ static int rtl8139_cp_transmitter_enabled(RTL8139State *s)
static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size)
{
+ PCIDevice *dev = &s->dev;
+
if (s->RxBufAddr + size > s->RxBufferSize)
{
int wrapped = MOD2(s->RxBufAddr + size, s->RxBufferSize);
@@ -757,15 +761,15 @@ static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size)
if (size > wrapped)
{
- cpu_physical_memory_write( s->RxBuf + s->RxBufAddr,
- buf, size-wrapped );
+ pci_memory_write(dev, s->RxBuf + s->RxBufAddr,
+ buf, size-wrapped);
}
/* reset buffer pointer */
s->RxBufAddr = 0;
- cpu_physical_memory_write( s->RxBuf + s->RxBufAddr,
- buf + (size-wrapped), wrapped );
+ pci_memory_write(dev, s->RxBuf + s->RxBufAddr,
+ buf + (size-wrapped), wrapped);
s->RxBufAddr = wrapped;
@@ -774,7 +778,7 @@ static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size)
}
/* non-wrapping path or overwrapping enabled */
- cpu_physical_memory_write( s->RxBuf + s->RxBufAddr, buf, size );
+ pci_memory_write(dev, s->RxBuf + s->RxBufAddr, buf, size);
s->RxBufAddr += size;
}
@@ -814,6 +818,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt)
{
RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+ PCIDevice *dev = &s->dev;
int size = size_;
uint32_t packet_header = 0;
@@ -968,13 +973,13 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
uint32_t val, rxdw0,rxdw1,rxbufLO,rxbufHI;
- cpu_physical_memory_read(cplus_rx_ring_desc, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_rx_ring_desc, (uint8_t *)&val, 4);
rxdw0 = le32_to_cpu(val);
- cpu_physical_memory_read(cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
rxdw1 = le32_to_cpu(val);
- cpu_physical_memory_read(cplus_rx_ring_desc+8, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_rx_ring_desc+8, (uint8_t *)&val, 4);
rxbufLO = le32_to_cpu(val);
- cpu_physical_memory_read(cplus_rx_ring_desc+12, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_rx_ring_desc+12, (uint8_t *)&val, 4);
rxbufHI = le32_to_cpu(val);
DEBUG_PRINT(("RTL8139: +++ C+ mode RX descriptor %d %08x %08x %08x %08x\n",
@@ -1019,7 +1024,7 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
target_phys_addr_t rx_addr = rtl8139_addr64(rxbufLO, rxbufHI);
/* receive/copy to target memory */
- cpu_physical_memory_write( rx_addr, buf, size );
+ pci_memory_write(dev, rx_addr, buf, size);
if (s->CpCmd & CPlusRxChkSum)
{
@@ -1032,7 +1037,7 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
#else
val = 0;
#endif
- cpu_physical_memory_write( rx_addr+size, (uint8_t *)&val, 4);
+ pci_memory_write(dev, rx_addr + size, (uint8_t *)&val, 4);
/* first segment of received packet flag */
#define CP_RX_STATUS_FS (1<<29)
@@ -1081,9 +1086,9 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
/* update ring data */
val = cpu_to_le32(rxdw0);
- cpu_physical_memory_write(cplus_rx_ring_desc, (uint8_t *)&val, 4);
+ pci_memory_write(dev, cplus_rx_ring_desc, (uint8_t *)&val, 4);
val = cpu_to_le32(rxdw1);
- cpu_physical_memory_write(cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
+ pci_memory_write(dev, cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
/* update tally counter */
++s->tally_counters.RxOk;
@@ -1279,50 +1284,54 @@ static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters)
counters->TxUndrn = 0;
}
-static void RTL8139TallyCounters_physical_memory_write(target_phys_addr_t tc_addr, RTL8139TallyCounters* tally_counters)
+static void
+RTL8139TallyCounters_physical_memory_write(RTL8139State *s,
+ target_phys_addr_t tc_addr)
{
+ PCIDevice *dev = &s->dev;
+ RTL8139TallyCounters *tally_counters = &s->tally_counters;
uint16_t val16;
uint32_t val32;
uint64_t val64;
val64 = cpu_to_le64(tally_counters->TxOk);
- cpu_physical_memory_write(tc_addr + 0, (uint8_t *)&val64, 8);
+ pci_memory_write(dev, tc_addr + 0, (uint8_t *)&val64, 8);
val64 = cpu_to_le64(tally_counters->RxOk);
- cpu_physical_memory_write(tc_addr + 8, (uint8_t *)&val64, 8);
+ pci_memory_write(dev, tc_addr + 8, (uint8_t *)&val64, 8);
val64 = cpu_to_le64(tally_counters->TxERR);
- cpu_physical_memory_write(tc_addr + 16, (uint8_t *)&val64, 8);
+ pci_memory_write(dev, tc_addr + 16, (uint8_t *)&val64, 8);
val32 = cpu_to_le32(tally_counters->RxERR);
- cpu_physical_memory_write(tc_addr + 24, (uint8_t *)&val32, 4);
+ pci_memory_write(dev, tc_addr + 24, (uint8_t *)&val32, 4);
val16 = cpu_to_le16(tally_counters->MissPkt);
- cpu_physical_memory_write(tc_addr + 28, (uint8_t *)&val16, 2);
+ pci_memory_write(dev, tc_addr + 28, (uint8_t *)&val16, 2);
val16 = cpu_to_le16(tally_counters->FAE);
- cpu_physical_memory_write(tc_addr + 30, (uint8_t *)&val16, 2);
+ pci_memory_write(dev, tc_addr + 30, (uint8_t *)&val16, 2);
val32 = cpu_to_le32(tally_counters->Tx1Col);
- cpu_physical_memory_write(tc_addr + 32, (uint8_t *)&val32, 4);
+ pci_memory_write(dev, tc_addr + 32, (uint8_t *)&val32, 4);
val32 = cpu_to_le32(tally_counters->TxMCol);
- cpu_physical_memory_write(tc_addr + 36, (uint8_t *)&val32, 4);
+ pci_memory_write(dev, tc_addr + 36, (uint8_t *)&val32, 4);
val64 = cpu_to_le64(tally_counters->RxOkPhy);
- cpu_physical_memory_write(tc_addr + 40, (uint8_t *)&val64, 8);
+ pci_memory_write(dev, tc_addr + 40, (uint8_t *)&val64, 8);
val64 = cpu_to_le64(tally_counters->RxOkBrd);
- cpu_physical_memory_write(tc_addr + 48, (uint8_t *)&val64, 8);
+ pci_memory_write(dev, tc_addr + 48, (uint8_t *)&val64, 8);
val32 = cpu_to_le32(tally_counters->RxOkMul);
- cpu_physical_memory_write(tc_addr + 56, (uint8_t *)&val32, 4);
+ pci_memory_write(dev, tc_addr + 56, (uint8_t *)&val32, 4);
val16 = cpu_to_le16(tally_counters->TxAbt);
- cpu_physical_memory_write(tc_addr + 60, (uint8_t *)&val16, 2);
+ pci_memory_write(dev, tc_addr + 60, (uint8_t *)&val16, 2);
val16 = cpu_to_le16(tally_counters->TxUndrn);
- cpu_physical_memory_write(tc_addr + 62, (uint8_t *)&val16, 2);
+ pci_memory_write(dev, tc_addr + 62, (uint8_t *)&val16, 2);
}
/* Loads values of tally counters from VM state file */
@@ -1758,6 +1767,8 @@ static void rtl8139_transfer_frame(RTL8139State *s, const uint8_t *buf, int size
static int rtl8139_transmit_one(RTL8139State *s, int descriptor)
{
+ PCIDevice *dev = &s->dev;
+
if (!rtl8139_transmitter_enabled(s))
{
DEBUG_PRINT(("RTL8139: +++ cannot transmit from descriptor %d: transmitter disabled\n",
@@ -1780,7 +1791,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor)
DEBUG_PRINT(("RTL8139: +++ transmit reading %d bytes from host memory at 0x%08x\n",
txsize, s->TxAddr[descriptor]));
- cpu_physical_memory_read(s->TxAddr[descriptor], txbuffer, txsize);
+ pci_memory_read(dev, s->TxAddr[descriptor], txbuffer, txsize);
/* Mark descriptor as transferred */
s->TxStatus[descriptor] |= TxHostOwns;
@@ -1886,6 +1897,8 @@ static uint16_t ip_checksum(void *data, size_t len)
static int rtl8139_cplus_transmit_one(RTL8139State *s)
{
+ PCIDevice *dev = &s->dev;
+
if (!rtl8139_transmitter_enabled(s))
{
DEBUG_PRINT(("RTL8139: +++ C+ mode: transmitter disabled\n"));
@@ -1911,14 +1924,14 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
uint32_t val, txdw0,txdw1,txbufLO,txbufHI;
- cpu_physical_memory_read(cplus_tx_ring_desc, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_tx_ring_desc, (uint8_t *)&val, 4);
txdw0 = le32_to_cpu(val);
/* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 */
- cpu_physical_memory_read(cplus_tx_ring_desc+4, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_tx_ring_desc+4, (uint8_t *)&val, 4);
txdw1 = le32_to_cpu(val);
- cpu_physical_memory_read(cplus_tx_ring_desc+8, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_tx_ring_desc+8, (uint8_t *)&val, 4);
txbufLO = le32_to_cpu(val);
- cpu_physical_memory_read(cplus_tx_ring_desc+12, (uint8_t *)&val, 4);
+ pci_memory_read(dev, cplus_tx_ring_desc+12, (uint8_t *)&val, 4);
txbufHI = le32_to_cpu(val);
DEBUG_PRINT(("RTL8139: +++ C+ mode TX descriptor %d %08x %08x %08x %08x\n",
@@ -2025,7 +2038,8 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
DEBUG_PRINT(("RTL8139: +++ C+ mode transmit reading %d bytes from host memory at %016" PRIx64 " to offset %d\n",
txsize, (uint64_t)tx_addr, s->cplus_txbuffer_offset));
- cpu_physical_memory_read(tx_addr, s->cplus_txbuffer + s->cplus_txbuffer_offset, txsize);
+ pci_memory_read(dev, tx_addr,
+ s->cplus_txbuffer + s->cplus_txbuffer_offset, txsize);
s->cplus_txbuffer_offset += txsize;
/* seek to next Rx descriptor */
@@ -2052,10 +2066,10 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
/* update ring data */
val = cpu_to_le32(txdw0);
- cpu_physical_memory_write(cplus_tx_ring_desc, (uint8_t *)&val, 4);
+ pci_memory_write(dev, cplus_tx_ring_desc, (uint8_t *)&val, 4);
/* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 */
// val = cpu_to_le32(txdw1);
-// cpu_physical_memory_write(cplus_tx_ring_desc+4, &val, 4);
+// pci_memory_write(dev, cplus_tx_ring_desc+4, &val, 4);
/* Now decide if descriptor being processed is holding the last segment of packet */
if (txdw0 & CP_TX_LS)
@@ -2364,7 +2378,6 @@ static void rtl8139_transmit(RTL8139State *s)
static void rtl8139_TxStatus_write(RTL8139State *s, uint32_t txRegOffset, uint32_t val)
{
-
int descriptor = txRegOffset/4;
/* handle C+ transmit mode register configuration */
@@ -2381,7 +2394,7 @@ static void rtl8139_TxStatus_write(RTL8139State *s, uint32_t txRegOffset, uint32
target_phys_addr_t tc_addr = rtl8139_addr64(s->TxStatus[0] & ~0x3f, s->TxStatus[1]);
/* dump tally counters to specified memory location */
- RTL8139TallyCounters_physical_memory_write( tc_addr, &s->tally_counters);
+ RTL8139TallyCounters_physical_memory_write(s, tc_addr);
/* mark dump completed */
s->TxStatus[0] &= ~0x8;
--
1.7.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version
2010-08-04 22:32 [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Eduard - Gabriel Munteanu
` (3 preceding siblings ...)
2010-08-04 22:32 ` [Qemu-devel] [RFC PATCH 4/4] rtl8139: " Eduard - Gabriel Munteanu
@ 2010-08-05 21:13 ` Blue Swirl
2010-08-06 14:09 ` [Qemu-devel] Question about starting 2 VMs using Qemu Anjali Kulkarni
4 siblings, 1 reply; 15+ messages in thread
From: Blue Swirl @ 2010-08-05 21:13 UTC (permalink / raw)
To: Eduard - Gabriel Munteanu; +Cc: joro, paul, qemu-devel, kvm, avi
On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu
<eduard.munteanu@linux360.ro> wrote:
> Hi,
>
> I hope I solved the issues raised by Anthony and Paul.
>
> Please have a look and tell me what you think. However, don't merge it yet (in
> case you like it), I need to test and cleanup some pieces further. There are
> also some patches from the previous series I didn't include yet.
>
>
> Thanks,
> Eduard
>
> Eduard - Gabriel Munteanu (4):
> pci: memory access API and IOMMU support
> AMD IOMMU emulation
> ide: use the PCI memory access interface
> rtl8139: use the PCI memory access interface
The patches violate CODING_STYLE regarding structure naming and use of
braces, please fix.
>
> Makefile.target | 2 +
> configure | 10 +
> dma-helpers.c | 37 +++-
> dma.h | 21 ++-
> hw/amd_iommu.c | 671 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/ide/core.c | 15 +-
> hw/ide/internal.h | 39 +++
> hw/ide/pci.c | 7 +
> hw/pc.c | 4 +
> hw/pc.h | 3 +
> hw/pci.c | 145 ++++++++++++
> hw/pci.h | 130 +++++++++++
> hw/pci_ids.h | 2 +
> hw/pci_regs.h | 1 +
> hw/rtl8139.c | 99 +++++----
> qemu-common.h | 1 +
> 16 files changed, 1134 insertions(+), 53 deletions(-)
> create mode 100644 hw/amd_iommu.c
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] Question about starting 2 VMs using Qemu
2010-08-05 21:13 ` [Qemu-devel] [RFC PATCH 0/4] AMD IOMMU emulation 2nd version Blue Swirl
@ 2010-08-06 14:09 ` Anjali Kulkarni
2010-08-07 5:26 ` Mulyadi Santosa
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: Anjali Kulkarni @ 2010-08-06 14:09 UTC (permalink / raw)
To: qemu-devel@nongnu.org, kvm@vger.kernel.org
Hi,
I can start my freebsd based image in Qemu, and I use tap interface for
connectivity to the host or external world, but when I try to fire up 2
instances, using tap, it fails. For the second invocation, it gives me the
error:
[root@ipg-virt01 tmp]# ./qemu-system-x86_64 -m 512 anjali.img -net
nic,model=yukon,macaddr=52:54:00:00:aa:02 -net tap,ifname=tap0,script=no
warning: could not configure /dev/net/tun: no virtual network emulation
Could not initialize device 'tap'
Am I doing something wrong? The first instance is running fine.
Anjali
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] Question about starting 2 VMs using Qemu
2010-08-06 14:09 ` [Qemu-devel] Question about starting 2 VMs using Qemu Anjali Kulkarni
@ 2010-08-07 5:26 ` Mulyadi Santosa
2010-08-07 17:43 ` [Qemu-devel] Question about e1000 NIC emulation in Qemu - chipset used Anjali Kulkarni
` (2 subsequent siblings)
3 siblings, 0 replies; 15+ messages in thread
From: Mulyadi Santosa @ 2010-08-07 5:26 UTC (permalink / raw)
To: Anjali Kulkarni; +Cc: qemu-devel@nongnu.org
Hi..
On Fri, Aug 6, 2010 at 21:09, Anjali Kulkarni <anjali@juniper.net> wrote:
> Hi,
>
> I can start my freebsd based image in Qemu, and I use tap interface for
> connectivity to the host or external world, but when I try to fire up 2
> instances, using tap, it fails. For the second invocation, it gives me the
> error:
>
> [root@ipg-virt01 tmp]# ./qemu-system-x86_64 -m 512 anjali.img -net
> nic,model=yukon,macaddr=52:54:00:00:aa:02 -net tap,ifname=tap0,script=no
try to replace "tap0" to "tap1" and see what happens. Note: make sure
you have prepared it first using tunctl or other utility....
--
regards,
Mulyadi Santosa
Freelance Linux trainer and consultant
blog: the-hydra.blogspot.com
training: mulyaditraining.blogspot.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] Question about e1000 NIC emulation in Qemu - chipset used
2010-08-06 14:09 ` [Qemu-devel] Question about starting 2 VMs using Qemu Anjali Kulkarni
2010-08-07 5:26 ` Mulyadi Santosa
@ 2010-08-07 17:43 ` Anjali Kulkarni
2010-08-09 1:49 ` [Qemu-devel] Re: Question about starting 2 VMs using Qemu haishan
2010-08-09 22:18 ` Nirmal Guhan
3 siblings, 0 replies; 15+ messages in thread
From: Anjali Kulkarni @ 2010-08-07 17:43 UTC (permalink / raw)
To: qemu-devel@nongnu.org, kvm@vger.kernel.org
Hi,
I am using Qemu for emulation of my virtual e1000 NIC, and my driver for the
e1000 is a modified version of 82547EI. (Modified because it is userspace
and plugs into other modules - but the device access specific parts are not
modified).
I wanted to know which chipset exactly does Qemu emulate? Does it make a
difference if it is different from the 82547? I see that 82547 is in the
e1000 family, so should my driver work for a variant of the e1000 family
which Qemu or vmware supports, for eg, vmware supports 82545EM, so can I use
my driver for 82547(maybe I will need some modifications) and will it work
with the emulated e1000 nic in vmware?
Which chipset does Qemu's e1000 emulate?
Thanks!
Anjali
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] Re: Question about starting 2 VMs using Qemu
2010-08-06 14:09 ` [Qemu-devel] Question about starting 2 VMs using Qemu Anjali Kulkarni
2010-08-07 5:26 ` Mulyadi Santosa
2010-08-07 17:43 ` [Qemu-devel] Question about e1000 NIC emulation in Qemu - chipset used Anjali Kulkarni
@ 2010-08-09 1:49 ` haishan
2010-08-09 22:18 ` Nirmal Guhan
3 siblings, 0 replies; 15+ messages in thread
From: haishan @ 2010-08-09 1:49 UTC (permalink / raw)
To: Anjali Kulkarni; +Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org
Anjali Kulkarni wrote:
> Hi,
>
> I can start my freebsd based image in Qemu, and I use tap interface for
> connectivity to the host or external world, but when I try to fire up 2
> instances, using tap, it fails. For the second invocation, it gives me the
> error:
>
> [root@ipg-virt01 tmp]# ./qemu-system-x86_64 -m 512 anjali.img -net
> nic,model=yukon,macaddr=52:54:00:00:aa:02 -net tap,ifname=tap0,script=no
> warning: could not configure /dev/net/tun: no virtual network emulation
> Could not initialize device 'tap'
>
> Am I doing something wrong? The first instance is running fine.
>
>
Please make sure your instances have different mac address.
The following settings might work:
Instance 1: macaddr=52:54:00:00:aa:02
Instance 2: macaddr=52:54:00:00:aa:03
Thanks
Shan Hai
> Anjali
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] Re: Question about starting 2 VMs using Qemu
2010-08-06 14:09 ` [Qemu-devel] Question about starting 2 VMs using Qemu Anjali Kulkarni
` (2 preceding siblings ...)
2010-08-09 1:49 ` [Qemu-devel] Re: Question about starting 2 VMs using Qemu haishan
@ 2010-08-09 22:18 ` Nirmal Guhan
3 siblings, 0 replies; 15+ messages in thread
From: Nirmal Guhan @ 2010-08-09 22:18 UTC (permalink / raw)
To: Anjali Kulkarni; +Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org
Try removing ifname=tap0 and a different macaddr for second instance.
--Nirmal
On Fri, Aug 6, 2010 at 7:09 AM, Anjali Kulkarni <anjali@juniper.net> wrote:
> Hi,
>
> I can start my freebsd based image in Qemu, and I use tap interface for
> connectivity to the host or external world, but when I try to fire up 2
> instances, using tap, it fails. For the second invocation, it gives me the
> error:
>
> [root@ipg-virt01 tmp]# ./qemu-system-x86_64 -m 512 anjali.img -net
> nic,model=yukon,macaddr=52:54:00:00:aa:02 -net tap,ifname=tap0,script=no
> warning: could not configure /dev/net/tun: no virtual network emulation
> Could not initialize device 'tap'
>
> Am I doing something wrong? The first instance is running fine.
>
> Anjali
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 15+ messages in thread