From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
Sairaj Kodilkar <sarunkod@amd.com>,
Alexey Kardashevskiy <aik@amd.com>,
Vasant Hegde <vasant.hegde@amd.com>,
Alejandro Jimenez <alejandro.j.jimenez@oracle.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Eduardo Habkost <eduardo@habkost.net>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PULL 02/14] amd_iommu: Fix handling of devices on buses != 0
Date: Sun, 9 Nov 2025 09:35:11 -0500 [thread overview]
Message-ID: <27d6a0ec0beec45c35a70cd5f12c4530725dda2a.1762698873.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1762698873.git.mst@redhat.com>
From: Sairaj Kodilkar <sarunkod@amd.com>
The AMD IOMMU is set up at boot time and uses PCI bus numbers + devfn for
indexing into the DTE. The problem is that before the guest starts, all PCI
bus numbers are 0 as no PCI discovery has happened yet (BIOS and/or kernel
will do that later), so relying on the bus number is wrong.
The immediate effect is that emulated devices cannot do DMA when placed on a
bus other than 0.
Replace the static address_space array with a hash table keyed by devfn and
PCIBus*, since these values do not change after the guest boots.
Co-developed-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20251103203209.645434-3-alejandro.j.jimenez@oracle.com>
---
hw/i386/amd_iommu.h | 2 +-
hw/i386/amd_iommu.c | 134 ++++++++++++++++++++++++++------------------
2 files changed, 79 insertions(+), 57 deletions(-)
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index daf82fc85f..38471b95d1 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -408,7 +408,7 @@ struct AMDVIState {
bool mmio_enabled;
/* for each served device */
- AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+ GHashTable *address_spaces;
/* list of address spaces with registered notifiers */
QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers;
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 378e0cb55e..5c5cfd4989 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -59,7 +59,7 @@ const char *amdvi_mmio_high[] = {
};
struct AMDVIAddressSpace {
- uint8_t bus_num; /* bus number */
+ PCIBus *bus; /* PCIBus (for bus number) */
uint8_t devfn; /* device function */
AMDVIState *iommu_state; /* AMDVI - one per machine */
MemoryRegion root; /* AMDVI Root memory map region */
@@ -101,6 +101,11 @@ typedef enum AMDVIFaultReason {
AMDVI_FR_PT_ENTRY_INV, /* Failure to read PTE from guest memory */
} AMDVIFaultReason;
+typedef struct AMDVIAsKey {
+ PCIBus *bus;
+ uint8_t devfn;
+} AMDVIAsKey;
+
uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
@@ -382,6 +387,44 @@ static guint amdvi_uint64_hash(gconstpointer v)
return (guint)*(const uint64_t *)v;
}
+static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
+{
+ const AMDVIAsKey *key1 = v1;
+ const AMDVIAsKey *key2 = v2;
+
+ return key1->bus == key2->bus && key1->devfn == key2->devfn;
+}
+
+static guint amdvi_as_hash(gconstpointer v)
+{
+ const AMDVIAsKey *key = v;
+ guint bus = (guint)(uintptr_t)key->bus;
+
+ return (guint)(bus << 8 | (guint)key->devfn);
+}
+
+static AMDVIAddressSpace *amdvi_as_lookup(AMDVIState *s, PCIBus *bus,
+ uint8_t devfn)
+{
+ const AMDVIAsKey key = { .bus = bus, .devfn = devfn };
+ return g_hash_table_lookup(s->address_spaces, &key);
+}
+
+static gboolean amdvi_find_as_by_devid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ const AMDVIAsKey *as = key;
+ const uint16_t *devidp = user_data;
+
+ return *devidp == PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
+}
+
+static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
+{
+ return g_hash_table_find(s->address_spaces,
+ amdvi_find_as_by_devid, &devid);
+}
+
static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
@@ -551,7 +594,7 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte)
{
- uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+ uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
AMDVIState *s = as->iommu_state;
if (!amdvi_get_dte(s, devid, dte)) {
@@ -1011,25 +1054,15 @@ static void amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as)
*/
static void amdvi_reset_address_translation_all(AMDVIState *s)
{
- AMDVIAddressSpace **iommu_as;
+ AMDVIAddressSpace *iommu_as;
+ GHashTableIter as_it;
- for (int bus_num = 0; bus_num < PCI_BUS_MAX; bus_num++) {
+ g_hash_table_iter_init(&as_it, s->address_spaces);
- /* Nothing to do if there are no devices on the current bus */
- if (!s->address_spaces[bus_num]) {
- continue;
- }
- iommu_as = s->address_spaces[bus_num];
-
- for (int devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) {
-
- if (!iommu_as[devfn]) {
- continue;
- }
- /* Use passthrough as default mode after reset */
- iommu_as[devfn]->addr_translation = false;
- amdvi_switch_address_space(iommu_as[devfn]);
- }
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&iommu_as)) {
+ /* Use passthrough as default mode after reset */
+ iommu_as->addr_translation = false;
+ amdvi_switch_address_space(iommu_as);
}
}
@@ -1089,27 +1122,15 @@ static void enable_nodma_mode(AMDVIAddressSpace *as)
*/
static void amdvi_update_addr_translation_mode(AMDVIState *s, uint16_t devid)
{
- uint8_t bus_num, devfn, dte_mode;
+ uint8_t dte_mode;
AMDVIAddressSpace *as;
uint64_t dte[4] = { 0 };
int ret;
- /*
- * Convert the devid encoded in the command to a bus and devfn in
- * order to retrieve the corresponding address space.
- */
- bus_num = PCI_BUS_NUM(devid);
- devfn = devid & 0xff;
-
- /*
- * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has already
- * been allocated within AMDVIState, but must be careful to not access
- * unallocated devfn.
- */
- if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) {
+ as = amdvi_get_as_by_devid(s, devid);
+ if (!as) {
return;
}
- as = s->address_spaces[bus_num][devfn];
ret = amdvi_as_to_dte(as, dte);
@@ -1783,7 +1804,7 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
bool is_write, IOMMUTLBEntry *ret)
{
AMDVIState *s = as->iommu_state;
- uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+ uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
uint64_t entry[4];
int dte_ret;
@@ -1858,7 +1879,7 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
}
amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
- trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
+ trace_amdvi_translation_result(pci_bus_num(as->bus), PCI_SLOT(as->devfn),
PCI_FUNC(as->devfn), addr, ret.translated_addr);
return ret;
}
@@ -2222,30 +2243,28 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
{
char name[128];
AMDVIState *s = opaque;
- AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
- int bus_num = pci_bus_num(bus);
+ AMDVIAddressSpace *amdvi_dev_as;
+ AMDVIAsKey *key;
- iommu_as = s->address_spaces[bus_num];
+ amdvi_dev_as = amdvi_as_lookup(s, bus, devfn);
/* allocate memory during the first run */
- if (!iommu_as) {
- iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
- s->address_spaces[bus_num] = iommu_as;
- }
-
- /* set up AMD-Vi region */
- if (!iommu_as[devfn]) {
+ if (!amdvi_dev_as) {
snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
- iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
- iommu_as[devfn]->bus_num = (uint8_t)bus_num;
- iommu_as[devfn]->devfn = (uint8_t)devfn;
- iommu_as[devfn]->iommu_state = s;
- iommu_as[devfn]->notifier_flags = IOMMU_NOTIFIER_NONE;
- iommu_as[devfn]->iova_tree = iova_tree_new();
- iommu_as[devfn]->addr_translation = false;
+ amdvi_dev_as = g_new0(AMDVIAddressSpace, 1);
+ key = g_new0(AMDVIAsKey, 1);
- amdvi_dev_as = iommu_as[devfn];
+ amdvi_dev_as->bus = bus;
+ amdvi_dev_as->devfn = (uint8_t)devfn;
+ amdvi_dev_as->iommu_state = s;
+ amdvi_dev_as->notifier_flags = IOMMU_NOTIFIER_NONE;
+ amdvi_dev_as->iova_tree = iova_tree_new();
+ amdvi_dev_as->addr_translation = false;
+ key->bus = bus;
+ key->devfn = devfn;
+
+ g_hash_table_insert(s->address_spaces, key, amdvi_dev_as);
/*
* Memory region relationships looks like (Address range shows
@@ -2288,7 +2307,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
amdvi_switch_address_space(amdvi_dev_as);
}
- return &iommu_as[devfn]->as;
+ return &amdvi_dev_as->as;
}
static const PCIIOMMUOps amdvi_iommu_ops = {
@@ -2329,7 +2348,7 @@ static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
if (!s->dma_remap && (new & IOMMU_NOTIFIER_MAP)) {
error_setg_errno(errp, ENOTSUP,
"device %02x.%02x.%x requires dma-remap=1",
- as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn));
+ pci_bus_num(as->bus), PCI_SLOT(as->devfn), PCI_FUNC(as->devfn));
return -ENOTSUP;
}
@@ -2510,6 +2529,9 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
amdvi_uint64_equal, g_free, g_free);
+ s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
+ amdvi_as_equal, g_free, g_free);
+
/* set up MMIO */
memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
"amdvi-mmio", AMDVI_MMIO_SIZE);
--
MST
next prev parent reply other threads:[~2025-11-09 14:36 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-09 14:35 [PULL 00/14] virtio,pci,pc: fixes for 10.2 Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 01/14] MAINTAINERS: Update entry for AMD-Vi Emulation Michael S. Tsirkin
2025-11-09 14:35 ` Michael S. Tsirkin [this message]
2025-11-09 14:35 ` [PULL 03/14] amd_iommu: Support 64-bit address for IOTLB lookup Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 04/14] vhost-user: fix shared object lookup handler logic Michael S. Tsirkin
2025-11-10 9:23 ` Albert Esteve
2025-11-10 14:37 ` Richard Henderson
2025-11-10 15:42 ` Michael S. Tsirkin
2025-11-10 15:57 ` Albert Esteve
2025-11-10 16:06 ` Michael S. Tsirkin
2025-11-10 18:54 ` Albert Esteve
2025-11-09 14:35 ` [PULL 05/14] intel_iommu: Handle PASID cache invalidation Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 06/14] intel_iommu: Reset pasid cache when system level reset Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 07/14] intel_iommu: Fix DMA failure when guest switches IOMMU domain Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 08/14] vhost-user: make vhost_set_vring_file() synchronous Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 09/14] tests/qtest/bios-tables-test: Prepare for _DSM change in the DSDT table Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 10/14] hw/pci-host/gpex-acpi: Fix _DSM function 0 support return value Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 11/14] tests/qtest/bios-tables-test: Update DSDT blobs after GPEX _DSM change Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 12/14] virtio-net: Advertise UDP tunnel GSO support by default Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 13/14] q35: increase default tseg size Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 14/14] vhost-user.rst: clarify when FDs can be sent Michael S. Tsirkin
2025-11-10 16:57 ` [PULL 00/14] virtio,pci,pc: fixes for 10.2 Richard Henderson
2025-11-17 10:27 ` Michael S. Tsirkin
2025-11-17 11:44 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=27d6a0ec0beec45c35a70cd5f12c4530725dda2a.1762698873.git.mst@redhat.com \
--to=mst@redhat.com \
--cc=aik@amd.com \
--cc=alejandro.j.jimenez@oracle.com \
--cc=eduardo@habkost.net \
--cc=marcel.apfelbaum@gmail.com \
--cc=pbonzini@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
--cc=sarunkod@amd.com \
--cc=vasant.hegde@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).