qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
	eric.auger@redhat.com, mst@redhat.com, jasowang@redhat.com,
	peterx@redhat.com, ddutile@redhat.com, jgg@nvidia.com,
	nicolinc@nvidia.com, shameerali.kolothum.thodi@huawei.com,
	joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
	kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Yi Sun <yi.y.sun@linux.intel.com>
Subject: [PATCH v3 10/20] intel_iommu: Handle PASID entry removing and updating
Date: Tue,  8 Jul 2025 07:05:51 -0400	[thread overview]
Message-ID: <20250708110601.633308-11-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20250708110601.633308-1-zhenzhong.duan@intel.com>

This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
pasid entry and track PASID usage and future PASID tagged DMA address
translation support in vIOMMU.

VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
never freed. For other pasid, VTDAddressSpace instance is created/destroyed
per the guest pasid entry set up/destroy.

When guest modifies a PASID entry, QEMU will capture the guest pasid selective
pasid cache invalidation, allocate or remove a VTDAddressSpace instance per the
invalidation reasons:

    a) a present pasid entry moved to non-present
    b) a present pasid entry to be a present entry
    c) a non-present pasid entry moved to present

This patch handles a) and b), following patch will handle c).

vIOMMU emulator could figure out the reason by fetching latest guest pasid entry
and compare it with cached PASID entry.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu.c          | 194 +++++++++++++++++++++++++++++++--
 hw/i386/intel_iommu_internal.h |  27 ++++-
 hw/i386/trace-events           |   3 +
 include/hw/i386/intel_iommu.h  |   6 +
 4 files changed, 218 insertions(+), 12 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 38e7f7b7be..5bda439de6 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1675,7 +1675,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
 
     if (s->root_scalable) {
         vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
-        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+        return VTD_SM_PASID_ENTRY_DID(&pe);
     }
 
     return VTD_CONTEXT_ENTRY_DID(ce->hi);
@@ -3103,6 +3103,181 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
     return true;
 }
 
+static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
+                                            uint32_t pasid, VTDPASIDEntry *pe)
+{
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    VTDContextEntry ce;
+    int ret;
+
+    if (!s->root_scalable) {
+        return -VTD_FR_RTADDR_INV_TTM;
+    }
+
+    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn,
+                                   &ce);
+    if (ret) {
+        return ret;
+    }
+
+    return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
+}
+
+static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
+{
+    return !memcmp(p1, p2, sizeof(*p1));
+}
+
+/*
+ * This function is used to update or clear cached pasid entry in vtd_as.
+ * vtd_as is released when corresponding cached pasid entry is cleared,
+ * except for PCI_NO_PASID which vtd_as is owen by PCI sub-system.
+ */
+static gboolean vtd_flush_pasid_locked(gpointer key, gpointer value,
+                                       gpointer user_data)
+{
+    VTDPASIDCacheInfo *pc_info = user_data;
+    VTDAddressSpace *vtd_as = value;
+    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+    VTDPASIDEntry pe;
+    uint16_t did;
+    uint32_t pasid;
+    int ret;
+
+    if (!pc_entry->valid) {
+        return false;
+    }
+    did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
+
+    if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) {
+        goto remove;
+    }
+
+    switch (pc_info->type) {
+    case VTD_PASID_CACHE_PASIDSI:
+        if (pc_info->pasid != pasid) {
+            return false;
+        }
+        /* fall through */
+    case VTD_PASID_CACHE_DOMSI:
+        if (pc_info->did != did) {
+            return false;
+        }
+        /* fall through */
+    case VTD_PASID_CACHE_GLOBAL_INV:
+        break;
+    default:
+        error_setg(&error_fatal, "invalid pc_info->type for flush");
+    }
+
+    /*
+     * pasid cache invalidation may indicate a present pasid
+     * entry to present pasid entry modification. To cover such
+     * case, vIOMMU emulator needs to fetch latest guest pasid
+     * entry and compares with cached pasid entry, then update
+     * pasid cache.
+     */
+    ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe);
+    if (ret) {
+        /*
+         * No valid pasid entry in guest memory. e.g. pasid entry
+         * was modified to be either all-zero or non-present. Either
+         * case means existing pasid cache should be removed.
+         */
+        goto remove;
+    }
+
+    /* No need to update if cached pasid entry is latest */
+    if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
+        pc_entry->pasid_entry = pe;
+    }
+    return false;
+
+remove:
+    pc_entry->valid = false;
+
+    /*
+     * Don't remove address space of PCI_NO_PASID which is created for PCI
+     * sub-system.
+     */
+    if (vtd_as->pasid == PCI_NO_PASID) {
+        return false;
+    }
+    return true;
+}
+
+/* Update the pasid cache in vIOMMU */
+static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+    if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
+        return;
+    }
+
+    /*
+     * Regards to a pasid cache invalidation, e.g. a PSI.
+     * It could be either cases of below:
+     * a) a present pasid entry moved to non-present
+     * b) a present pasid entry to be a present entry
+     * c) a non-present pasid entry moved to present
+     */
+    vtd_iommu_lock(s);
+    /*
+     * a,b): loop all the existing vtd_as instances for pasid cache remove
+       or update.
+     */
+    g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid_locked,
+                                pc_info);
+    vtd_iommu_unlock(s);
+}
+
+static bool vtd_process_pasid_desc(IntelIOMMUState *s,
+                                   VTDInvDesc *inv_desc)
+{
+    uint16_t did;
+    uint32_t pasid;
+    VTDPASIDCacheInfo pc_info;
+    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE,
+                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+                                     __func__, "pasid cache inv")) {
+        return false;
+    }
+
+    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
+    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
+
+    switch (VTD_INV_DESC_PASIDC_G(inv_desc)) {
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        trace_vtd_pasid_cache_dsi(did);
+        pc_info.type = VTD_PASID_CACHE_DOMSI;
+        pc_info.did = did;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+        /* PASID selective implies a DID selective */
+        trace_vtd_pasid_cache_psi(did, pasid);
+        pc_info.type = VTD_PASID_CACHE_PASIDSI;
+        pc_info.did = did;
+        pc_info.pasid = pasid;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_GLOBAL:
+        trace_vtd_pasid_cache_gsi();
+        pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
+        break;
+
+    default:
+        error_report_once("invalid granularity field in PASID-cache invalidate "
+                          "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64,
+                           inv_desc->val[1], inv_desc->val[0]);
+        return false;
+    }
+
+    vtd_pasid_cache_sync(s, &pc_info);
+    return true;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
                                      VTDInvDesc *inv_desc)
 {
@@ -3264,6 +3439,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_PC:
+        trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]);
+        if (!vtd_process_pasid_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     case VTD_INV_DESC_PIOTLB:
         trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
         if (!vtd_process_piotlb_desc(s, &inv_desc)) {
@@ -3299,16 +3481,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
-    /*
-     * TODO: the entity of below two cases will be implemented in future series.
-     * To make guest (which integrates scalable mode support patch set in
-     * iommu driver) work, just return true is enough so far.
-     */
-    case VTD_INV_DESC_PC:
-        if (s->scalable_mode) {
-            break;
-        }
-    /* fallthrough */
     default:
         error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
                           " (unknown type)", __func__, inv_desc.hi,
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 18bc22fc72..87059d26aa 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
+    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
     /* PASID directory entry access failure */
     VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
     /* The Present(P) field of pasid directory entry is 0 */
@@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
 
+/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
+#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
+#define VTD_INV_DESC_PASIDC_G_DSI       0
+#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
+#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
+#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16, 16)
+#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32, 20)
+#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
@@ -552,6 +562,21 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
 
+typedef enum VTDPCInvType {
+    /* VTD spec defined PASID cache invalidation type */
+    VTD_PASID_CACHE_DOMSI = VTD_INV_DESC_PASIDC_G_DSI,
+    VTD_PASID_CACHE_PASIDSI = VTD_INV_DESC_PASIDC_G_PASID_SI,
+    VTD_PASID_CACHE_GLOBAL_INV = VTD_INV_DESC_PASIDC_G_GLOBAL,
+} VTDPCInvType;
+
+typedef struct VTDPASIDCacheInfo {
+    VTDPCInvType type;
+    uint16_t did;
+    uint32_t pasid;
+    PCIBus *bus;
+    uint16_t devfn;
+} VTDPASIDCacheInfo;
+
 /* PASID Table Related Definitions */
 #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
 #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
@@ -573,7 +598,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
 
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
-#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
 
 #define VTD_SM_PASID_ENTRY_FLPM          3ULL
 #define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ac9e1a10aa..ae5bbfcdc0 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -24,6 +24,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
 vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
+vtd_pasid_cache_gsi(void) ""
+vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
+vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 50f9b27a45..0e3826f6f0 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -95,6 +95,11 @@ struct VTDPASIDEntry {
     uint64_t val[8];
 };
 
+typedef struct VTDPASIDCacheEntry {
+    struct VTDPASIDEntry pasid_entry;
+    bool valid;
+} VTDPASIDCacheEntry;
+
 struct VTDAddressSpace {
     PCIBus *bus;
     uint8_t devfn;
@@ -107,6 +112,7 @@ struct VTDAddressSpace {
     MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
     IntelIOMMUState *iommu_state;
     VTDContextCacheEntry context_cache_entry;
+    VTDPASIDCacheEntry pasid_cache_entry;
     QLIST_ENTRY(VTDAddressSpace) next;
     /* Superset of notifier flags that this address space has */
     IOMMUNotifierFlag notifier_flags;
-- 
2.47.1



  parent reply	other threads:[~2025-07-08 21:24 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-08 11:05 [PATCH v3 00/20] intel_iommu: Enable stage-1 translation for passthrough device Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 01/20] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 02/20] hw/pci: Introduce pci_device_get_viommu_cap() Zhenzhong Duan
2025-07-09  0:39   ` Nicolin Chen
2025-07-09  3:38     ` Duan, Zhenzhong
2025-07-09  4:03       ` Nicolin Chen
2025-07-09  5:52         ` Duan, Zhenzhong
2025-07-09 17:55     ` Donald Dutile
2025-07-09 19:20       ` Nicolin Chen
2025-07-10  1:22         ` Donald Dutile
2025-07-15 15:28           ` Eric Auger
2025-07-15 16:42             ` Donald Dutile
2025-07-10  8:11         ` Shameerali Kolothum Thodi via
2025-07-10 17:01           ` Donald Dutile
2025-07-10 17:07             ` Donald Dutile
2025-07-10 17:25               ` Nicolin Chen
2025-07-10 17:16           ` Nicolin Chen
2025-07-11 13:18             ` Shameerali Kolothum Thodi via
2025-07-15 15:36   ` Eric Auger
2025-07-08 11:05 ` [PATCH v3 03/20] intel_iommu: Implement get_viommu_cap() callback Zhenzhong Duan
2025-07-15 16:32   ` Eric Auger
2025-07-08 11:05 ` [PATCH v3 04/20] vfio/iommufd: Force creating nested parent domain Zhenzhong Duan
2025-07-15 16:36   ` Eric Auger
2025-07-08 11:05 ` [PATCH v3 05/20] hw/pci: Export pci_device_get_iommu_bus_devfn() and return bool Zhenzhong Duan
2025-07-15 16:40   ` Eric Auger
2025-07-16  3:29     ` Duan, Zhenzhong
2025-07-08 11:05 ` [PATCH v3 06/20] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 07/20] intel_iommu: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-07-16  9:22   ` Eric Auger
2025-07-16 10:31     ` Duan, Zhenzhong
2025-07-16 12:09       ` Eric Auger
2025-07-17  3:47         ` Duan, Zhenzhong
2025-07-17  6:48           ` Eric Auger
2025-07-17  7:03             ` Duan, Zhenzhong
2025-07-08 11:05 ` [PATCH v3 08/20] intel_iommu: Fail passthrough device under PCI bridge if x-flts=on Zhenzhong Duan
2025-07-16  9:53   ` Eric Auger
2025-07-17  3:24     ` Duan, Zhenzhong
2025-07-08 11:05 ` [PATCH v3 09/20] intel_iommu: Introduce two helpers vtd_as_from/to_iommu_pasid_locked Zhenzhong Duan
2025-07-16 12:53   ` Eric Auger
2025-07-17  3:48     ` Duan, Zhenzhong
2025-07-08 11:05 ` Zhenzhong Duan [this message]
2025-07-16 15:10   ` [PATCH v3 10/20] intel_iommu: Handle PASID entry removing and updating Eric Auger
2025-07-17  7:02     ` Duan, Zhenzhong
2025-07-08 11:05 ` [PATCH v3 11/20] intel_iommu: Handle PASID entry adding Zhenzhong Duan
2025-07-16 16:44   ` Eric Auger
2025-07-17  7:15     ` Duan, Zhenzhong
2025-07-08 11:05 ` [PATCH v3 12/20] intel_iommu: Introduce a new pasid cache invalidation type FORCE_RESET Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 13/20] intel_iommu: Stick to system MR for IOMMUFD backed host device when x-fls=on Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 14/20] intel_iommu: Bind/unbind guest page table to host Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 15/20] intel_iommu: Replay pasid bindings after context cache invalidation Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 16/20] intel_iommu: Propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 17/20] intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed Zhenzhong Duan
2025-07-08 11:05 ` [PATCH v3 18/20] vfio: Add a new element bypass_ro in VFIOContainerBase Zhenzhong Duan
2025-07-08 11:06 ` [PATCH v3 19/20] Workaround for ERRATA_772415_SPR17 Zhenzhong Duan
2025-07-08 11:06 ` [PATCH v3 20/20] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250708110601.633308-11-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).