qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Fix DMA failure when there is domain switch in
@ 2025-10-15 10:19 Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 1/3] intel_iommu: Handle PASID cache invalidation Zhenzhong Duan
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Zhenzhong Duan @ 2025-10-15 10:19 UTC (permalink / raw)
  To: qemu-devel
  Cc: mst, jasowang, peterx, yi.l.liu, clement.mathieu--drif,
	Zhenzhong Duan

Hi,

This fixes an DMA failure issue in guest when user switch domain
manually. E.g., echo [DMA|identity] > /sys/kernel/iommu_groups/6/type

First two patches come from [PATCH v6 00/22] intel_iommu: Enable first stage translation for passthrough device
which added basic support for pasid cache invalidation, the 3rd patch
add a fix.

Thanks
Zhenzhong

Zhenzhong Duan (3):
  intel_iommu: Handle PASID cache invalidation
  intel_iommu: Reset pasid cache when system level reset
  intel_iommu: Fix DMA failure when guest switches IOMMU domain

 hw/i386/intel_iommu_internal.h |  20 +++-
 include/hw/i386/intel_iommu.h  |   6 ++
 hw/i386/intel_iommu.c          | 168 ++++++++++++++++++++++++++++++---
 hw/i386/trace-events           |   4 +
 4 files changed, 185 insertions(+), 13 deletions(-)

-- 
2.47.1



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] intel_iommu: Handle PASID cache invalidation
  2025-10-15 10:19 [PATCH 0/3] Fix DMA failure when there is domain switch in Zhenzhong Duan
@ 2025-10-15 10:20 ` Zhenzhong Duan
  2025-10-15 12:38   ` Yi Liu
  2025-10-15 10:20 ` [PATCH 2/3] intel_iommu: Reset pasid cache when system level reset Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain Zhenzhong Duan
  2 siblings, 1 reply; 8+ messages in thread
From: Zhenzhong Duan @ 2025-10-15 10:20 UTC (permalink / raw)
  To: qemu-devel
  Cc: mst, jasowang, peterx, yi.l.liu, clement.mathieu--drif,
	Zhenzhong Duan

Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the pasid
entry and track PASID usage and future PASID tagged DMA address translation
support in vIOMMU.

When guest triggers pasid cache invalidation, QEMU will capture it and
update or invalidate pasid cache.

vIOMMU emulator could figure out the reason by fetching latest guest pasid
entry in memory and compare it with cached PASID entry if it's valid.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  19 ++++-
 include/hw/i386/intel_iommu.h  |   6 ++
 hw/i386/intel_iommu.c          | 150 ++++++++++++++++++++++++++++++---
 hw/i386/trace-events           |   3 +
 4 files changed, 165 insertions(+), 13 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0f6a1237e4..80193ff28b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -316,6 +316,8 @@ typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
+    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
+
     VTD_FR_SM_PRE_ABS = 0x47,   /* SCT.8 : PRE bit in a present SM CE is 0 */
 
     /* PASID directory entry access failure */
@@ -517,6 +519,15 @@ typedef union VTDPRDesc VTDPRDesc;
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
 
+/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
+#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
+#define VTD_INV_DESC_PASIDC_G_DSI       0
+#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
+#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
+#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16, 16)
+#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32, 20)
+#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
+
 /* Page Request Descriptor */
 /* For the low 64-bit of 128-bit */
 #define VTD_PRD_TYPE            (1ULL)
@@ -603,6 +614,12 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
 #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
 
+typedef struct VTDPASIDCacheInfo {
+    uint8_t type;
+    uint16_t did;
+    uint32_t pasid;
+} VTDPASIDCacheInfo;
+
 /* PASID Table Related Definitions */
 #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
 #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
@@ -624,7 +641,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
 
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
-#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
 
 #define VTD_SM_PASID_ENTRY_FLPM          3ULL
 #define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 47730ac3c7..6e68734b3c 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -95,6 +95,11 @@ struct VTDPASIDEntry {
     uint64_t val[8];
 };
 
+typedef struct VTDPASIDCacheEntry {
+    struct VTDPASIDEntry pasid_entry;
+    bool valid;
+} VTDPASIDCacheEntry;
+
 struct VTDAddressSpace {
     PCIBus *bus;
     uint8_t devfn;
@@ -107,6 +112,7 @@ struct VTDAddressSpace {
     MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
     IntelIOMMUState *iommu_state;
     VTDContextCacheEntry context_cache_entry;
+    VTDPASIDCacheEntry pasid_cache_entry;
     QLIST_ENTRY(VTDAddressSpace) next;
     /* Superset of notifier flags that this address space has */
     IOMMUNotifierFlag notifier_flags;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6a168d5107..66f45f89cb 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1607,7 +1607,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
 
     if (s->root_scalable) {
         vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
-        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+        return VTD_SM_PASID_ENTRY_DID(&pe);
     }
 
     return VTD_CONTEXT_ENTRY_DID(ce->hi);
@@ -3051,6 +3051,135 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
     return true;
 }
 
+static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
+                                            VTDPASIDEntry *pe)
+{
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    VTDContextEntry ce;
+    int ret;
+
+    if (!s->root_scalable) {
+        return -VTD_FR_RTADDR_INV_TTM;
+    }
+
+    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn,
+                                   &ce);
+    if (ret) {
+        return ret;
+    }
+
+    return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
+}
+
+/*
+ * Update or invalidate pasid cache based on the value in memory.
+ */
+static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
+                                        gpointer user_data)
+{
+    VTDPASIDCacheInfo *pc_info = user_data;
+    VTDAddressSpace *vtd_as = value;
+    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+    VTDPASIDEntry pe;
+    uint16_t did;
+
+    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
+        /*
+         * No valid pasid entry in guest memory. e.g. pasid entry was modified
+         * to be either all-zero or non-present. Either case means existing
+         * pasid cache should be invalidated.
+         */
+        pc_entry->valid = false;
+        return;
+    }
+
+    /*
+     * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
+     * DID check. If DID doesn't match the value in cache or memory, then
+     * it's not a pasid entry we want to invalidate.
+     */
+    switch (pc_info->type) {
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+        if (pc_info->pasid != vtd_as->pasid) {
+            return;
+        }
+        /* Fall through */
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        if (pc_entry->valid) {
+            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
+            if (pc_info->did == did) {
+                break;
+            }
+        }
+        did = VTD_SM_PASID_ENTRY_DID(&pe);
+        if (pc_info->did == did) {
+            break;
+        }
+        return;
+    }
+
+    pc_entry->pasid_entry = pe;
+    pc_entry->valid = true;
+}
+
+static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+    if (!s->root_scalable || !s->dmar_enabled) {
+        return;
+    }
+
+    vtd_iommu_lock(s);
+    g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
+                         pc_info);
+    vtd_iommu_unlock(s);
+}
+
+static bool vtd_process_pasid_desc(IntelIOMMUState *s,
+                                   VTDInvDesc *inv_desc)
+{
+    uint16_t did;
+    uint32_t pasid;
+    VTDPASIDCacheInfo pc_info = {};
+    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE,
+                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+                                     __func__, "pasid cache inv")) {
+        return false;
+    }
+
+    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
+    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
+    pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc);
+
+    switch (pc_info.type) {
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        trace_vtd_inv_desc_pasid_cache_dsi(did);
+        pc_info.did = did;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+        /* PASID selective implies a DID selective */
+        trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
+        pc_info.did = did;
+        pc_info.pasid = pasid ?: PCI_NO_PASID;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_GLOBAL:
+        trace_vtd_inv_desc_pasid_cache_gsi();
+        break;
+
+    default:
+        error_report_once("invalid granularity field in PASID-cache invalidate "
+                          "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64,
+                           inv_desc->val[1], inv_desc->val[0]);
+        return false;
+    }
+
+    vtd_pasid_cache_sync(s, &pc_info);
+    return true;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
                                      VTDInvDesc *inv_desc)
 {
@@ -3266,6 +3395,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_PC:
+        trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]);
+        if (!vtd_process_pasid_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     case VTD_INV_DESC_PIOTLB:
         trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
         if (!vtd_process_piotlb_desc(s, &inv_desc)) {
@@ -3308,16 +3444,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
-    /*
-     * TODO: the entity of below two cases will be implemented in future series.
-     * To make guest (which integrates scalable mode support patch set in
-     * iommu driver) work, just return true is enough so far.
-     */
-    case VTD_INV_DESC_PC:
-        if (s->scalable_mode) {
-            break;
-        }
-    /* fallthrough */
     default:
         error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
                           " (unknown type)", __func__, inv_desc.hi,
@@ -5005,7 +5131,7 @@ static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as,
         return -EINVAL;
     }
     pgtt = VTD_PE_GET_TYPE(&pe);
-    domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+    domain_id = VTD_SM_PASID_ENTRY_DID(&pe);
     ret = 0;
     switch (pgtt) {
     case VTD_SM_PASID_ENTRY_FLT:
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ac9e1a10aa..298addb24d 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -24,6 +24,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
 vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
+vtd_inv_desc_pasid_cache_gsi(void) ""
+vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
+vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] intel_iommu: Reset pasid cache when system level reset
  2025-10-15 10:19 [PATCH 0/3] Fix DMA failure when there is domain switch in Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 1/3] intel_iommu: Handle PASID cache invalidation Zhenzhong Duan
@ 2025-10-15 10:20 ` Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain Zhenzhong Duan
  2 siblings, 0 replies; 8+ messages in thread
From: Zhenzhong Duan @ 2025-10-15 10:20 UTC (permalink / raw)
  To: qemu-devel
  Cc: mst, jasowang, peterx, yi.l.liu, clement.mathieu--drif,
	Zhenzhong Duan

Reset pasid cache when system level reset, for PASID_0, its vtd_as is
allocated by PCI system and never removed, just mark pasid cache invalid.

As we already have vtd_pasid_cache_sync_locked() to handle pasid cache
invalidation, reuse it to do pasid cache invalidation at system reset
level.

Currently only IOMMUFD backed VFIO device caches pasid entry, so we don't
need to care about emulated device.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
 hw/i386/intel_iommu_internal.h |  1 +
 hw/i386/intel_iommu.c          | 18 ++++++++++++++++--
 hw/i386/trace-events           |  1 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 80193ff28b..f6f2b7b8d5 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -618,6 +618,7 @@ typedef struct VTDPASIDCacheInfo {
     uint8_t type;
     uint16_t did;
     uint32_t pasid;
+    bool reset;
 } VTDPASIDCacheInfo;
 
 /* PASID Table Related Definitions */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 66f45f89cb..d656e9c256 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -86,6 +86,18 @@ struct vtd_iotlb_key {
 
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
+static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
+                                        gpointer user_data);
+
+static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
+{
+    VTDPASIDCacheInfo pc_info = { .reset = true };
+
+    trace_vtd_pasid_cache_reset();
+    g_hash_table_foreach(s->vtd_address_spaces,
+                         vtd_pasid_cache_sync_locked, &pc_info);
+}
+
 
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
@@ -381,6 +393,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
     vtd_iommu_lock(s);
     vtd_reset_iotlb_locked(s);
     vtd_reset_context_cache_locked(s);
+    vtd_pasid_cache_reset_locked(s);
     vtd_iommu_unlock(s);
 }
 
@@ -3083,11 +3096,12 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
     VTDPASIDEntry pe;
     uint16_t did;
 
-    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
+    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe) || pc_info->reset) {
         /*
          * No valid pasid entry in guest memory. e.g. pasid entry was modified
          * to be either all-zero or non-present. Either case means existing
-         * pasid cache should be invalidated.
+         * pasid cache should be invalidated. This also applies to system level
+         * reset where the whole guest memory is treated as zeroed.
          */
         pc_entry->valid = false;
         return;
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 298addb24d..b704f4f90c 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -24,6 +24,7 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
 vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
+vtd_pasid_cache_reset(void) ""
 vtd_inv_desc_pasid_cache_gsi(void) ""
 vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
 vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain
  2025-10-15 10:19 [PATCH 0/3] Fix DMA failure when there is domain switch in Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 1/3] intel_iommu: Handle PASID cache invalidation Zhenzhong Duan
  2025-10-15 10:20 ` [PATCH 2/3] intel_iommu: Reset pasid cache when system level reset Zhenzhong Duan
@ 2025-10-15 10:20 ` Zhenzhong Duan
  2025-10-15 12:42   ` Yi Liu
  2 siblings, 1 reply; 8+ messages in thread
From: Zhenzhong Duan @ 2025-10-15 10:20 UTC (permalink / raw)
  To: qemu-devel
  Cc: mst, jasowang, peterx, yi.l.liu, clement.mathieu--drif,
	Zhenzhong Duan

Kernel allows user to switch IOMMU domain, e.g., switch between DMA
and identity domain. When this happen in IOMMU scalable mode, a pasid
cache invalidation request is sent, this request is ignored by vIOMMU
which leads to device binding to wrong address space, then DMA fails.

This issue exists in scalable mode with both first stage and second
stage translations, both emulated and passthrough devices.

Take network device for example, below sequence trigger issue:

1. start a guest with iommu=pt
2. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/unbind
3. echo DMA > /sys/kernel/iommu_groups/6/type
4. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/bind
5. Ping test

Fix it by switching address space in invalidation handler.

Fixes: 4a4f219e8a10 ("intel_iommu: add scalable-mode option to make scalable mode work")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d656e9c256..30275a4f23 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3104,7 +3104,7 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
          * reset where the whole guest memory is treated as zeroed.
          */
         pc_entry->valid = false;
-        return;
+        goto switch_as;
     }
 
     /*
@@ -3134,6 +3134,10 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
 
     pc_entry->pasid_entry = pe;
     pc_entry->valid = true;
+
+switch_as:
+    vtd_switch_address_space(vtd_as);
+    vtd_address_space_sync(vtd_as);
 }
 
 static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] intel_iommu: Handle PASID cache invalidation
  2025-10-15 10:20 ` [PATCH 1/3] intel_iommu: Handle PASID cache invalidation Zhenzhong Duan
@ 2025-10-15 12:38   ` Yi Liu
  2025-10-16  2:45     ` Duan, Zhenzhong
  0 siblings, 1 reply; 8+ messages in thread
From: Yi Liu @ 2025-10-15 12:38 UTC (permalink / raw)
  To: Zhenzhong Duan, qemu-devel; +Cc: mst, jasowang, peterx, clement.mathieu--drif

On 2025/10/15 18:20, Zhenzhong Duan wrote:
> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the pasid
> entry and track PASID usage and future PASID tagged DMA address translation
> support in vIOMMU.
> 
> When guest triggers pasid cache invalidation, QEMU will capture it and
> update or invalidate pasid cache.
> 
> vIOMMU emulator could figure out the reason by fetching latest guest pasid
> entry in memory and compare it with cached PASID entry if it's valid.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  19 ++++-
>   include/hw/i386/intel_iommu.h  |   6 ++
>   hw/i386/intel_iommu.c          | 150 ++++++++++++++++++++++++++++++---
>   hw/i386/trace-events           |   3 +
>   4 files changed, 165 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 0f6a1237e4..80193ff28b 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -316,6 +316,8 @@ typedef enum VTDFaultReason {
>                                     * request while disabled */
>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>   
> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
> +
>       VTD_FR_SM_PRE_ABS = 0x47,   /* SCT.8 : PRE bit in a present SM CE is 0 */
>   
>       /* PASID directory entry access failure */
> @@ -517,6 +519,15 @@ typedef union VTDPRDesc VTDPRDesc;
>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>   
> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
> +#define VTD_INV_DESC_PASIDC_G_DSI       0
> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16, 16)
> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32, 20)
> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
> +
>   /* Page Request Descriptor */
>   /* For the low 64-bit of 128-bit */
>   #define VTD_PRD_TYPE            (1ULL)
> @@ -603,6 +614,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>   
> +typedef struct VTDPASIDCacheInfo {
> +    uint8_t type;
> +    uint16_t did;
> +    uint32_t pasid;
> +} VTDPASIDCacheInfo;
> +
>   /* PASID Table Related Definitions */
>   #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>   #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
> @@ -624,7 +641,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>   
>   #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)

I think this can be done in a separate patch.

>   
>   #define VTD_SM_PASID_ENTRY_FLPM          3ULL
>   #define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 47730ac3c7..6e68734b3c 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>       uint64_t val[8];
>   };
>   
> +typedef struct VTDPASIDCacheEntry {
> +    struct VTDPASIDEntry pasid_entry;
> +    bool valid;
> +} VTDPASIDCacheEntry;
> +
>   struct VTDAddressSpace {
>       PCIBus *bus;
>       uint8_t devfn;
> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>       MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>       IntelIOMMUState *iommu_state;
>       VTDContextCacheEntry context_cache_entry;
> +    VTDPASIDCacheEntry pasid_cache_entry;
>       QLIST_ENTRY(VTDAddressSpace) next;
>       /* Superset of notifier flags that this address space has */
>       IOMMUNotifierFlag notifier_flags;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 6a168d5107..66f45f89cb 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1607,7 +1607,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
>   
>       if (s->root_scalable) {
>           vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>       }
>   
>       return VTD_CONTEXT_ENTRY_DID(ce->hi);
> @@ -3051,6 +3051,135 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>       return true;
>   }
>   
> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
> +                                            VTDPASIDEntry *pe)
> +{
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    VTDContextEntry ce;
> +    int ret;
> +
> +    if (!s->root_scalable) {
> +        return -VTD_FR_RTADDR_INV_TTM;
> +    }
> +
> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn,
> +                                   &ce);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
> +}
> +
> +/*
> + * Update or invalidate pasid cache based on the value in memory.

s/the value in memory./the pasid entry in guest memory.

> + */
> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> +                                        gpointer user_data)
> +{
> +    VTDPASIDCacheInfo *pc_info = user_data;
> +    VTDAddressSpace *vtd_as = value;
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDPASIDEntry pe;
> +    uint16_t did;
> +
> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
> +        /*
> +         * No valid pasid entry in guest memory. e.g. pasid entry was modified
> +         * to be either all-zero or non-present. Either case means existing
> +         * pasid cache should be invalidated.
> +         */
> +        pc_entry->valid = false;
> +        return;
> +    }
> +
> +    /*
> +     * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
> +     * DID check. If DID doesn't match the value in cache or memory, then
> +     * it's not a pasid entry we want to invalidate.
> +     */
> +    switch (pc_info->type) {
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +        if (pc_info->pasid != vtd_as->pasid) {
> +            return;
> +        }
> +        /* Fall through */
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        if (pc_entry->valid) {
> +            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
> +            if (pc_info->did == did) {
> +                break;
> +            }
> +        }
> +        did = VTD_SM_PASID_ENTRY_DID(&pe);
> +        if (pc_info->did == did) {
> +            break;
> +        }

hmmm. how about below?

         /*
          * For newly set pasid entry, iommu driver is supposed to
          * invalidate pasid cache with the did configed in pasid entry
          * when caching-mode is reported. Oherwise qemu vIOMMU just skip
          * it.
          */
         if pc_entry->valid) {
             did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
         } else {
             did = VTD_SM_PASID_ENTRY_DID(&pe);
         }

         if (pc_info->did != did) {
             return;
         }

Regards,
Yi Liu
> +        return;
> +    }
> +
> +    pc_entry->pasid_entry = pe;
> +    pc_entry->valid = true;
> +}
> +
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> +    if (!s->root_scalable || !s->dmar_enabled) {
> +        return;
> +    }
> +
> +    vtd_iommu_lock(s);
> +    g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
> +                         pc_info);
> +    vtd_iommu_unlock(s);
> +}
> +
> +static bool vtd_process_pasid_desc(IntelIOMMUState *s,
> +                                   VTDInvDesc *inv_desc)
> +{
> +    uint16_t did;
> +    uint32_t pasid;
> +    VTDPASIDCacheInfo pc_info = {};
> +    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE,
> +                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
> +
> +    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
> +                                     __func__, "pasid cache inv")) {
> +        return false;
> +    }
> +
> +    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
> +    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
> +    pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc);
> +
> +    switch (pc_info.type) {
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        trace_vtd_inv_desc_pasid_cache_dsi(did);
> +        pc_info.did = did;
> +        break;
> +
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +        /* PASID selective implies a DID selective */
> +        trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
> +        pc_info.did = did;
> +        pc_info.pasid = pasid ?: PCI_NO_PASID;
> +        break;
> +
> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:
> +        trace_vtd_inv_desc_pasid_cache_gsi();
> +        break;
> +
> +    default:
> +        error_report_once("invalid granularity field in PASID-cache invalidate "
> +                          "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64,
> +                           inv_desc->val[1], inv_desc->val[0]);
> +        return false;
> +    }
> +
> +    vtd_pasid_cache_sync(s, &pc_info);
> +    return true;
> +}
> +
>   static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
>                                        VTDInvDesc *inv_desc)
>   {
> @@ -3266,6 +3395,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>           }
>           break;
>   
> +    case VTD_INV_DESC_PC:
> +        trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]);
> +        if (!vtd_process_pasid_desc(s, &inv_desc)) {
> +            return false;
> +        }
> +        break;
> +
>       case VTD_INV_DESC_PIOTLB:
>           trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
>           if (!vtd_process_piotlb_desc(s, &inv_desc)) {
> @@ -3308,16 +3444,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>           }
>           break;
>   
> -    /*
> -     * TODO: the entity of below two cases will be implemented in future series.
> -     * To make guest (which integrates scalable mode support patch set in
> -     * iommu driver) work, just return true is enough so far.
> -     */
> -    case VTD_INV_DESC_PC:
> -        if (s->scalable_mode) {
> -            break;
> -        }
> -    /* fallthrough */
>       default:
>           error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
>                             " (unknown type)", __func__, inv_desc.hi,
> @@ -5005,7 +5131,7 @@ static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as,
>           return -EINVAL;
>       }
>       pgtt = VTD_PE_GET_TYPE(&pe);
> -    domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]);
> +    domain_id = VTD_SM_PASID_ENTRY_DID(&pe);
>       ret = 0;
>       switch (pgtt) {
>       case VTD_SM_PASID_ENTRY_FLT:
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index ac9e1a10aa..298addb24d 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -24,6 +24,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
>   vtd_inv_qi_tail(uint16_t head) "write tail %d"
>   vtd_inv_qi_fetch(void) ""
>   vtd_context_cache_reset(void) ""
> +vtd_inv_desc_pasid_cache_gsi(void) ""
> +vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
> +vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
>   vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
>   vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
>   vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain
  2025-10-15 10:20 ` [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain Zhenzhong Duan
@ 2025-10-15 12:42   ` Yi Liu
  2025-10-16  3:14     ` Duan, Zhenzhong
  0 siblings, 1 reply; 8+ messages in thread
From: Yi Liu @ 2025-10-15 12:42 UTC (permalink / raw)
  To: Zhenzhong Duan, qemu-devel; +Cc: mst, jasowang, peterx, clement.mathieu--drif

On 2025/10/15 18:20, Zhenzhong Duan wrote:
> Kernel allows user to switch IOMMU domain, e.g., switch between DMA
> and identity domain. When this happen in IOMMU scalable mode, a pasid
> cache invalidation request is sent, this request is ignored by vIOMMU
> which leads to device binding to wrong address space, then DMA fails.
> 
> This issue exists in scalable mode with both first stage and second
> stage translations, both emulated and passthrough devices.

does it affect emulated device? The domain switching should have
IOTLB/PIOTLB invalidation. right? Then the emulated device should
not been affected.

> 
> Take network device for example, below sequence trigger issue:
> 
> 1. start a guest with iommu=pt
> 2. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/unbind
> 3. echo DMA > /sys/kernel/iommu_groups/6/type
> 4. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/bind
> 5. Ping test
> 
> Fix it by switching address space in invalidation handler.

a good catch.

> 
> Fixes: 4a4f219e8a10 ("intel_iommu: add scalable-mode option to make scalable mode work")
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu.c | 6 +++++-
>   1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index d656e9c256..30275a4f23 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3104,7 +3104,7 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>            * reset where the whole guest memory is treated as zeroed.
>            */
>           pc_entry->valid = false;
> -        return;
> +        goto switch_as;
>       }
>   
>       /*
> @@ -3134,6 +3134,10 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>   
>       pc_entry->pasid_entry = pe;
>       pc_entry->valid = true;
> +
> +switch_as:
> +    vtd_switch_address_space(vtd_as);
> +    vtd_address_space_sync(vtd_as);
>   }
>   
>   static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)

The change looks good to me. You might want to adjust a bit per the
comment in patch 01.

Regards,
Yi Liu


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/3] intel_iommu: Handle PASID cache invalidation
  2025-10-15 12:38   ` Yi Liu
@ 2025-10-16  2:45     ` Duan, Zhenzhong
  0 siblings, 0 replies; 8+ messages in thread
From: Duan, Zhenzhong @ 2025-10-16  2:45 UTC (permalink / raw)
  To: Liu, Yi L, qemu-devel@nongnu.org
  Cc: mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
	clement.mathieu--drif@eviden.com



>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH 1/3] intel_iommu: Handle PASID cache invalidation
>
>On 2025/10/15 18:20, Zhenzhong Duan wrote:
>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>pasid
>> entry and track PASID usage and future PASID tagged DMA address
>translation
>> support in vIOMMU.
>>
>> When guest triggers pasid cache invalidation, QEMU will capture it and
>> update or invalidate pasid cache.
>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>> entry in memory and compare it with cached PASID entry if it's valid.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/i386/intel_iommu_internal.h |  19 ++++-
>>   include/hw/i386/intel_iommu.h  |   6 ++
>>   hw/i386/intel_iommu.c          | 150
>++++++++++++++++++++++++++++++---
>>   hw/i386/trace-events           |   3 +
>>   4 files changed, 165 insertions(+), 13 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index 0f6a1237e4..80193ff28b 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -316,6 +316,8 @@ typedef enum VTDFaultReason {
>>                                     * request while disabled */
>>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>
>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>> +
>>       VTD_FR_SM_PRE_ABS = 0x47,   /* SCT.8 : PRE bit in a present SM
>CE is 0 */
>>
>>       /* PASID directory entry access failure */
>> @@ -517,6 +519,15 @@ typedef union VTDPRDesc VTDPRDesc;
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL0
>0xfff000000000f1c0ULL
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>
>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
>> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
>> +#define VTD_INV_DESC_PASIDC_G_DSI       0
>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
>> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16,
>16)
>> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32,
>20)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
>> +
>>   /* Page Request Descriptor */
>>   /* For the low 64-bit of 128-bit */
>>   #define VTD_PRD_TYPE            (1ULL)
>> @@ -603,6 +614,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>0xffffffffffe00000ULL
>>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>>
>> +typedef struct VTDPASIDCacheInfo {
>> +    uint8_t type;
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +} VTDPASIDCacheInfo;
>> +
>>   /* PASID Table Related Definitions */
>>   #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>   #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -624,7 +641,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>>
>>   #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted
>guest-address-width */
>> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
>
>I think this can be done in a separate patch.

OK, nesting series has a patch handling this kind of cleanup, will move it there.

>
>>
>>   #define VTD_SM_PASID_ENTRY_FLPM          3ULL
>>   #define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 47730ac3c7..6e68734b3c 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>       uint64_t val[8];
>>   };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> +    struct VTDPASIDEntry pasid_entry;
>> +    bool valid;
>> +} VTDPASIDCacheEntry;
>> +
>>   struct VTDAddressSpace {
>>       PCIBus *bus;
>>       uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>       MemoryRegion iommu_ir_fault; /* Interrupt region for catching
>fault */
>>       IntelIOMMUState *iommu_state;
>>       VTDContextCacheEntry context_cache_entry;
>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>       QLIST_ENTRY(VTDAddressSpace) next;
>>       /* Superset of notifier flags that this address space has */
>>       IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 6a168d5107..66f45f89cb 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -1607,7 +1607,7 @@ static uint16_t
>vtd_get_domain_id(IntelIOMMUState *s,
>>
>>       if (s->root_scalable) {
>>           vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
>> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
>> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>>       }
>>
>>       return VTD_CONTEXT_ENTRY_DID(ce->hi);
>> @@ -3051,6 +3051,135 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>>       return true;
>>   }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> +                                            VTDPASIDEntry *pe)
>> +{
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    if (!s->root_scalable) {
>> +        return -VTD_FR_RTADDR_INV_TTM;
>> +    }
>> +
>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>vtd_as->devfn,
>> +                                   &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
>> +}
>> +
>> +/*
>> + * Update or invalidate pasid cache based on the value in memory.
>
>s/the value in memory./the pasid entry in guest memory.

OK

>
>> + */
>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>> +                                        gpointer user_data)
>> +{
>> +    VTDPASIDCacheInfo *pc_info = user_data;
>> +    VTDAddressSpace *vtd_as = value;
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDPASIDEntry pe;
>> +    uint16_t did;
>> +
>> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>> +        /*
>> +         * No valid pasid entry in guest memory. e.g. pasid entry was
>modified
>> +         * to be either all-zero or non-present. Either case means
>existing
>> +         * pasid cache should be invalidated.
>> +         */
>> +        pc_entry->valid = false;
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * VTD_INV_DESC_PASIDC_G_DSI and
>VTD_INV_DESC_PASIDC_G_PASID_SI require
>> +     * DID check. If DID doesn't match the value in cache or memory,
>then
>> +     * it's not a pasid entry we want to invalidate.
>> +     */
>> +    switch (pc_info->type) {
>> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> +        if (pc_info->pasid != vtd_as->pasid) {
>> +            return;
>> +        }
>> +        /* Fall through */
>> +    case VTD_INV_DESC_PASIDC_G_DSI:
>> +        if (pc_entry->valid) {
>> +            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
>> +            if (pc_info->did == did) {
>> +                break;
>> +            }
>> +        }
>> +        did = VTD_SM_PASID_ENTRY_DID(&pe);
>> +        if (pc_info->did == did) {
>> +            break;
>> +        }
>
>hmmm. how about below?
>
>         /*
>          * For newly set pasid entry, iommu driver is supposed to
>          * invalidate pasid cache with the did configed in pasid entry
>          * when caching-mode is reported. Oherwise qemu vIOMMU just
>skip
>          * it.
>          */
>         if pc_entry->valid) {
>             did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
>         } else {
>             did = VTD_SM_PASID_ENTRY_DID(&pe);
>         }
>
>         if (pc_info->did != did) {
>             return;
>         }

Yes, looks cleaner, will do.

Thanks
Zhenzhong

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain
  2025-10-15 12:42   ` Yi Liu
@ 2025-10-16  3:14     ` Duan, Zhenzhong
  0 siblings, 0 replies; 8+ messages in thread
From: Duan, Zhenzhong @ 2025-10-16  3:14 UTC (permalink / raw)
  To: Liu, Yi L, qemu-devel@nongnu.org
  Cc: mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
	clement.mathieu--drif@eviden.com



>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches
>IOMMU domain
>
>On 2025/10/15 18:20, Zhenzhong Duan wrote:
>> Kernel allows user to switch IOMMU domain, e.g., switch between DMA
>> and identity domain. When this happen in IOMMU scalable mode, a pasid
>> cache invalidation request is sent, this request is ignored by vIOMMU
>> which leads to device binding to wrong address space, then DMA fails.
>>
>> This issue exists in scalable mode with both first stage and second
>> stage translations, both emulated and passthrough devices.
>
>does it affect emulated device? The domain switching should have
>IOTLB/PIOTLB invalidation. right? Then the emulated device should
>not been affected.

Yes, because we missed address space switch in vIOMMU, vtd_iommu_translate isn't called even with DMA domain.

With a vhost emulated net card, I can get below error, guest hang.

qemu-system-x86_64: Fail to lookup the translated address fffff000
qemu-system-x86_64: unable to start vhost net: 14: falling back on userspace virtio
qemu-system-x86_64: Guest says index 65535 is available
qemu-system-x86_64: Guest moved used index from 0 to 65535

Thanks
Zhenzhong

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-10-16  3:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-15 10:19 [PATCH 0/3] Fix DMA failure when there is domain switch in Zhenzhong Duan
2025-10-15 10:20 ` [PATCH 1/3] intel_iommu: Handle PASID cache invalidation Zhenzhong Duan
2025-10-15 12:38   ` Yi Liu
2025-10-16  2:45     ` Duan, Zhenzhong
2025-10-15 10:20 ` [PATCH 2/3] intel_iommu: Reset pasid cache when system level reset Zhenzhong Duan
2025-10-15 10:20 ` [PATCH 3/3] intel_iommu: Fix DMA failure when guest switches IOMMU domain Zhenzhong Duan
2025-10-15 12:42   ` Yi Liu
2025-10-16  3:14     ` Duan, Zhenzhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).