From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
Sairaj Kodilkar <sarunkod@amd.com>,
Vasant Hegde <vasant.hegde@amd.com>,
Alejandro Jimenez <alejandro.j.jimenez@oracle.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Eduardo Habkost <eduardo@habkost.net>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PULL 03/14] amd_iommu: Support 64-bit address for IOTLB lookup
Date: Sun, 9 Nov 2025 09:35:14 -0500 [thread overview]
Message-ID: <637dc49a3ae6b72cdb6415dcdcdb22ff770f2c8d.1762698873.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1762698873.git.mst@redhat.com>
From: Sairaj Kodilkar <sarunkod@amd.com>
The physical AMD IOMMU supports up to 64 bits of IOVA. When a device tries
to read or write from a given DMA address, the IOMMU translates the address
using the I/O page tables assigned to that device. Since the emulated IOMMU
uses per-device page tables, an ideal cache tag would need to be 68 bits
(64-bit address - 12-bit page alignment + 16-bit device ID).
The current software IOTLB implementation uses a GLib hash table with a
64-bit key to hash both the IOVA and device ID, which limits the IOVA to 60
bits. This causes a failure while setting up the device when a guest is
booted with "iommu.forcedac=1", which forces the use of DMA addresses at the
top of the 64-bit address space.
To address this issue, construct the 64-bit hash key using the upper 52 bits
of IOVA (GFN) and lower 12 bits of the device ID to avoid truncation as much
as possible (reducing hash collisions).
Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20251103203209.645434-4-alejandro.j.jimenez@oracle.com>
---
hw/i386/amd_iommu.h | 4 ++--
hw/i386/amd_iommu.c | 57 ++++++++++++++++++++++++++++++---------------
2 files changed, 40 insertions(+), 21 deletions(-)
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 38471b95d1..302ccca512 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -220,8 +220,8 @@
#define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9))
/* IOTLB */
-#define AMDVI_IOTLB_MAX_SIZE 1024
-#define AMDVI_DEVID_SHIFT 36
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_GET_IOTLB_GFN(addr) (addr >> AMDVI_PAGE_SHIFT_4K)
/* default extended feature */
#define AMDVI_DEFAULT_EXT_FEATURES \
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 5c5cfd4989..d689a06eca 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -106,6 +106,11 @@ typedef struct AMDVIAsKey {
uint8_t devfn;
} AMDVIAsKey;
+typedef struct AMDVIIOTLBKey {
+ uint64_t gfn;
+ uint16_t devid;
+} AMDVIIOTLBKey;
+
uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
@@ -377,16 +382,6 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
PCI_STATUS_SIG_TARGET_ABORT);
}
-static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
-{
- return *((const uint64_t *)v1) == *((const uint64_t *)v2);
-}
-
-static guint amdvi_uint64_hash(gconstpointer v)
-{
- return (guint)*(const uint64_t *)v;
-}
-
static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
{
const AMDVIAsKey *key1 = v1;
@@ -425,11 +420,30 @@ static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
amdvi_find_as_by_devid, &devid);
}
+static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2)
+{
+ const AMDVIIOTLBKey *key1 = v1;
+ const AMDVIIOTLBKey *key2 = v2;
+
+ return key1->devid == key2->devid && key1->gfn == key2->gfn;
+}
+
+static guint amdvi_iotlb_hash(gconstpointer v)
+{
+ const AMDVIIOTLBKey *key = v;
+ /* Use GPA and DEVID to find the bucket */
+ return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K |
+ (key->devid & ~AMDVI_PAGE_MASK_4K));
+}
+
+
static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
- uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
- ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ AMDVIIOTLBKey key = {
+ .gfn = AMDVI_GET_IOTLB_GFN(addr),
+ .devid = devid,
+ };
return g_hash_table_lookup(s->iotlb, &key);
}
@@ -451,8 +465,10 @@ static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
- uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
- ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ AMDVIIOTLBKey key = {
+ .gfn = AMDVI_GET_IOTLB_GFN(addr),
+ .devid = devid,
+ };
g_hash_table_remove(s->iotlb, &key);
}
@@ -463,8 +479,10 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
/* don't cache erroneous translations */
if (to_cache.perm != IOMMU_NONE) {
AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
- uint64_t *key = g_new(uint64_t, 1);
- uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+ AMDVIIOTLBKey *key = g_new(AMDVIIOTLBKey, 1);
+
+ key->gfn = AMDVI_GET_IOTLB_GFN(gpa);
+ key->devid = devid;
trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
PCI_FUNC(devid), gpa, to_cache.translated_addr);
@@ -477,7 +495,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
entry->perms = to_cache.perm;
entry->translated_addr = to_cache.translated_addr;
entry->page_mask = to_cache.addr_mask;
- *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ entry->devid = devid;
+
g_hash_table_replace(s->iotlb, key, entry);
}
}
@@ -2526,8 +2545,8 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
}
}
- s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
- amdvi_uint64_equal, g_free, g_free);
+ s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash,
+ amdvi_iotlb_equal, g_free, g_free);
s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
amdvi_as_equal, g_free, g_free);
--
MST
next prev parent reply other threads:[~2025-11-09 14:36 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-09 14:35 [PULL 00/14] virtio,pci,pc: fixes for 10.2 Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 01/14] MAINTAINERS: Update entry for AMD-Vi Emulation Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 02/14] amd_iommu: Fix handling of devices on buses != 0 Michael S. Tsirkin
2025-11-09 14:35 ` Michael S. Tsirkin [this message]
2025-11-09 14:35 ` [PULL 04/14] vhost-user: fix shared object lookup handler logic Michael S. Tsirkin
2025-11-10 9:23 ` Albert Esteve
2025-11-10 14:37 ` Richard Henderson
2025-11-10 15:42 ` Michael S. Tsirkin
2025-11-10 15:57 ` Albert Esteve
2025-11-10 16:06 ` Michael S. Tsirkin
2025-11-10 18:54 ` Albert Esteve
2025-11-09 14:35 ` [PULL 05/14] intel_iommu: Handle PASID cache invalidation Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 06/14] intel_iommu: Reset pasid cache when system level reset Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 07/14] intel_iommu: Fix DMA failure when guest switches IOMMU domain Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 08/14] vhost-user: make vhost_set_vring_file() synchronous Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 09/14] tests/qtest/bios-tables-test: Prepare for _DSM change in the DSDT table Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 10/14] hw/pci-host/gpex-acpi: Fix _DSM function 0 support return value Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 11/14] tests/qtest/bios-tables-test: Update DSDT blobs after GPEX _DSM change Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 12/14] virtio-net: Advertise UDP tunnel GSO support by default Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 13/14] q35: increase default tseg size Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 14/14] vhost-user.rst: clarify when FDs can be sent Michael S. Tsirkin
2025-11-10 16:57 ` [PULL 00/14] virtio,pci,pc: fixes for 10.2 Richard Henderson
2025-11-17 10:27 ` Michael S. Tsirkin
2025-11-17 11:44 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=637dc49a3ae6b72cdb6415dcdcdb22ff770f2c8d.1762698873.git.mst@redhat.com \
--to=mst@redhat.com \
--cc=alejandro.j.jimenez@oracle.com \
--cc=eduardo@habkost.net \
--cc=marcel.apfelbaum@gmail.com \
--cc=pbonzini@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
--cc=sarunkod@amd.com \
--cc=vasant.hegde@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).