qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Sairaj Kodilkar <sarunkod@amd.com>,
	Vasant Hegde <vasant.hegde@amd.com>,
	Alejandro Jimenez <alejandro.j.jimenez@oracle.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PULL 03/14] amd_iommu: Support 64-bit address for IOTLB lookup
Date: Sun, 9 Nov 2025 09:35:14 -0500	[thread overview]
Message-ID: <637dc49a3ae6b72cdb6415dcdcdb22ff770f2c8d.1762698873.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1762698873.git.mst@redhat.com>

From: Sairaj Kodilkar <sarunkod@amd.com>

The physical AMD IOMMU supports up to 64 bits of IOVA. When a device tries
to read or write from a given DMA address, the IOMMU translates the address
using the I/O page tables assigned to that device. Since the emulated IOMMU
uses per-device page tables, an ideal cache tag would need to be 68 bits
(64-bit address - 12-bit page alignment + 16-bit device ID).

The current software IOTLB implementation uses a GLib hash table with a
64-bit key to hash both the IOVA and device ID, which limits the IOVA to 60
bits. This causes a failure while setting up the device when a guest is
booted with "iommu.forcedac=1", which forces the use of DMA addresses at the
top of the 64-bit address space.

To address this issue, construct the 64-bit hash key using the upper 52 bits
of IOVA (GFN) and lower 12 bits of the device ID to avoid truncation as much
as possible (reducing hash collisions).

Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20251103203209.645434-4-alejandro.j.jimenez@oracle.com>
---
 hw/i386/amd_iommu.h |  4 ++--
 hw/i386/amd_iommu.c | 57 ++++++++++++++++++++++++++++++---------------
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 38471b95d1..302ccca512 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -220,8 +220,8 @@
 #define PAGE_SIZE_PTE_COUNT(pgsz)       (1ULL << ((ctz64(pgsz) - 12) % 9))
 
 /* IOTLB */
-#define AMDVI_IOTLB_MAX_SIZE 1024
-#define AMDVI_DEVID_SHIFT    36
+#define AMDVI_IOTLB_MAX_SIZE        1024
+#define AMDVI_GET_IOTLB_GFN(addr)   (addr >> AMDVI_PAGE_SHIFT_4K)
 
 /* default extended feature */
 #define AMDVI_DEFAULT_EXT_FEATURES \
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 5c5cfd4989..d689a06eca 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -106,6 +106,11 @@ typedef struct AMDVIAsKey {
     uint8_t devfn;
 } AMDVIAsKey;
 
+typedef struct AMDVIIOTLBKey {
+    uint64_t gfn;
+    uint16_t devid;
+} AMDVIIOTLBKey;
+
 uint64_t amdvi_extended_feature_register(AMDVIState *s)
 {
     uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
@@ -377,16 +382,6 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
              PCI_STATUS_SIG_TARGET_ABORT);
 }
 
-static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
-{
-    return *((const uint64_t *)v1) == *((const uint64_t *)v2);
-}
-
-static guint amdvi_uint64_hash(gconstpointer v)
-{
-    return (guint)*(const uint64_t *)v;
-}
-
 static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
 {
     const AMDVIAsKey *key1 = v1;
@@ -425,11 +420,30 @@ static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
                              amdvi_find_as_by_devid, &devid);
 }
 
+static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2)
+{
+    const AMDVIIOTLBKey *key1 = v1;
+    const AMDVIIOTLBKey *key2 = v2;
+
+    return key1->devid == key2->devid && key1->gfn == key2->gfn;
+}
+
+static guint amdvi_iotlb_hash(gconstpointer v)
+{
+    const AMDVIIOTLBKey *key = v;
+    /* Use GPA and DEVID to find the bucket */
+    return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K |
+                   (key->devid & ~AMDVI_PAGE_MASK_4K));
+}
+
+
 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
                                            uint64_t devid)
 {
-    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
-                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    AMDVIIOTLBKey key = {
+        .gfn = AMDVI_GET_IOTLB_GFN(addr),
+        .devid = devid,
+    };
     return g_hash_table_lookup(s->iotlb, &key);
 }
 
@@ -451,8 +465,10 @@ static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
                                     uint64_t devid)
 {
-    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
-                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    AMDVIIOTLBKey key = {
+        .gfn = AMDVI_GET_IOTLB_GFN(addr),
+        .devid = devid,
+    };
     g_hash_table_remove(s->iotlb, &key);
 }
 
@@ -463,8 +479,10 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
     /* don't cache erroneous translations */
     if (to_cache.perm != IOMMU_NONE) {
         AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
-        uint64_t *key = g_new(uint64_t, 1);
-        uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+        AMDVIIOTLBKey *key = g_new(AMDVIIOTLBKey, 1);
+
+        key->gfn = AMDVI_GET_IOTLB_GFN(gpa);
+        key->devid = devid;
 
         trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
                 PCI_FUNC(devid), gpa, to_cache.translated_addr);
@@ -477,7 +495,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
         entry->perms = to_cache.perm;
         entry->translated_addr = to_cache.translated_addr;
         entry->page_mask = to_cache.addr_mask;
-        *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+        entry->devid = devid;
+
         g_hash_table_replace(s->iotlb, key, entry);
     }
 }
@@ -2526,8 +2545,8 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
-                                     amdvi_uint64_equal, g_free, g_free);
+    s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash,
+                                     amdvi_iotlb_equal, g_free, g_free);
 
     s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
                                      amdvi_as_equal, g_free, g_free);
-- 
MST



  parent reply	other threads:[~2025-11-09 14:36 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-09 14:35 [PULL 00/14] virtio,pci,pc: fixes for 10.2 Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 01/14] MAINTAINERS: Update entry for AMD-Vi Emulation Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 02/14] amd_iommu: Fix handling of devices on buses != 0 Michael S. Tsirkin
2025-11-09 14:35 ` Michael S. Tsirkin [this message]
2025-11-09 14:35 ` [PULL 04/14] vhost-user: fix shared object lookup handler logic Michael S. Tsirkin
2025-11-10  9:23   ` Albert Esteve
2025-11-10 14:37     ` Richard Henderson
2025-11-10 15:42     ` Michael S. Tsirkin
2025-11-10 15:57       ` Albert Esteve
2025-11-10 16:06         ` Michael S. Tsirkin
2025-11-10 18:54           ` Albert Esteve
2025-11-09 14:35 ` [PULL 05/14] intel_iommu: Handle PASID cache invalidation Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 06/14] intel_iommu: Reset pasid cache when system level reset Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 07/14] intel_iommu: Fix DMA failure when guest switches IOMMU domain Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 08/14] vhost-user: make vhost_set_vring_file() synchronous Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 09/14] tests/qtest/bios-tables-test: Prepare for _DSM change in the DSDT table Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 10/14] hw/pci-host/gpex-acpi: Fix _DSM function 0 support return value Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 11/14] tests/qtest/bios-tables-test: Update DSDT blobs after GPEX _DSM change Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 12/14] virtio-net: Advertise UDP tunnel GSO support by default Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 13/14] q35: increase default tseg size Michael S. Tsirkin
2025-11-09 14:35 ` [PULL 14/14] vhost-user.rst: clarify when FDs can be sent Michael S. Tsirkin
2025-11-10 16:57 ` [PULL 00/14] virtio,pci,pc: fixes for 10.2 Richard Henderson
2025-11-17 10:27 ` Michael S. Tsirkin
2025-11-17 11:44   ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=637dc49a3ae6b72cdb6415dcdcdb22ff770f2c8d.1762698873.git.mst@redhat.com \
    --to=mst@redhat.com \
    --cc=alejandro.j.jimenez@oracle.com \
    --cc=eduardo@habkost.net \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=sarunkod@amd.com \
    --cc=vasant.hegde@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).