qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
	mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
	ddutile@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
	skolothumtho@nvidia.com, joao.m.martins@oracle.com,
	clement.mathieu--drif@eviden.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>
Subject: [PATCH v8 11/23] intel_iommu_accel: Stick to system MR for IOMMUFD backed host device when x-flts=on
Date: Mon, 17 Nov 2025 04:37:14 -0500	[thread overview]
Message-ID: <20251117093729.1121324-12-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20251117093729.1121324-1-zhenzhong.duan@intel.com>

When guest enables scalable mode and setup first stage page table, we don't
want to use IOMMU MR but rather continue using the system MR for IOMMUFD
backed host device.

Then default HWPT in VFIO contains GPA->HPA mappings which could be reused
as nesting parent HWPT to construct nested HWPT in vIOMMU.

Move vtd_as_key into intel_iommu_internal.h as it's also used by accel code.

Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_accel.h    |  6 ++++++
 hw/i386/intel_iommu_internal.h | 11 +++++++++++
 hw/i386/intel_iommu.c          | 28 +++++++++++++++-------------
 hw/i386/intel_iommu_accel.c    | 18 ++++++++++++++++++
 4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index 7ebf137a1a..dbe6ee6982 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -15,6 +15,7 @@
 #ifdef CONFIG_VTD_ACCEL
 bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
                           Error **errp);
+VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
 #else
 static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
                                         VTDHostIOMMUDevice *vtd_hiod,
@@ -24,5 +25,10 @@ static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
                "host IOMMU is incompatible with guest first stage translation");
     return false;
 }
+
+static inline VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
+{
+    return NULL;
+}
 #endif
 #endif
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 02522f64e0..d8dad18304 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -685,4 +685,15 @@ typedef struct VTDHostIOMMUDevice {
     uint8_t devfn;
     HostIOMMUDevice *hiod;
 } VTDHostIOMMUDevice;
+
+/*
+ * PCI bus number (or SID) is not reliable since the device is usaully
+ * initialized before guest can configure the PCI bridge
+ * (SECONDARY_BUS_NUMBER).
+ */
+struct vtd_as_key {
+    PCIBus *bus;
+    uint8_t devfn;
+    uint32_t pasid;
+};
 #endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4ebf56a74f..29e0281af8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -56,17 +56,6 @@
 #define VTD_PE_GET_SS_LEVEL(pe) \
     (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
 
-/*
- * PCI bus number (or SID) is not reliable since the device is usaully
- * initialized before guest can configure the PCI bridge
- * (SECONDARY_BUS_NUMBER).
- */
-struct vtd_as_key {
-    PCIBus *bus;
-    uint8_t devfn;
-    uint32_t pasid;
-};
-
 /* bus/devfn is PCI device's real BDF not the aliased one */
 struct vtd_hiod_key {
     PCIBus *bus;
@@ -1731,12 +1720,25 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
 /* Return whether the device is using IOMMU translation. */
 static bool vtd_switch_address_space(VTDAddressSpace *as)
 {
+    IntelIOMMUState *s;
     bool use_iommu, pt;
 
     assert(as);
 
-    use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as);
-    pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as);
+    s = as->iommu_state;
+    use_iommu = s->dmar_enabled && !vtd_as_pt_enabled(as);
+    pt = s->dmar_enabled && vtd_as_pt_enabled(as);
+
+    /*
+     * When guest enables scalable mode and sets up first stage page table,
+     * we stick to system MR for IOMMUFD backed host device. Then its
+     * default hwpt contains GPA->HPA mappings which is used directly if
+     * PGTT=PT and used as nesting parent if PGTT=FST. Otherwise fall back
+     * to original processing.
+     */
+    if (s->root_scalable && s->fsts && vtd_find_hiod_iommufd(as)) {
+        use_iommu = false;
+    }
 
     trace_vtd_switch_address_space(pci_bus_num(as->bus),
                                    VTD_PCI_SLOT(as->devfn),
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index ead6c42879..ebfc503d64 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -50,3 +50,21 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
                "host IOMMU is incompatible with guest first stage translation");
     return false;
 }
+
+VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
+{
+    IntelIOMMUState *s = as->iommu_state;
+    struct vtd_as_key key = {
+        .bus = as->bus,
+        .devfn = as->devfn,
+    };
+    VTDHostIOMMUDevice *vtd_hiod = g_hash_table_lookup(s->vtd_host_iommu_dev,
+                                                       &key);
+
+    if (vtd_hiod && vtd_hiod->hiod &&
+        object_dynamic_cast(OBJECT(vtd_hiod->hiod),
+                            TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+        return vtd_hiod;
+    }
+    return NULL;
+}
-- 
2.47.1



  parent reply	other threads:[~2025-11-17  9:40 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-17  9:37 [PATCH v8 00/23] intel_iommu: Enable first stage translation for passthrough device Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 01/23] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 02/23] intel_iommu: Delete RPS capability related supporting code Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 03/23] intel_iommu: Update terminology to match VTD spec Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 04/23] hw/pci: Export pci_device_get_iommu_bus_devfn() and return bool Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 05/23] hw/pci: Introduce pci_device_get_viommu_flags() Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 06/23] intel_iommu: Implement get_viommu_flags() callback Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 07/23] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 08/23] vfio/iommufd: Force creating nesting parent HWPT Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 09/23] intel_iommu_accel: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 10/23] intel_iommu_accel: Fail passthrough device under PCI bridge if x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` Zhenzhong Duan [this message]
2025-11-17  9:37 ` [PATCH v8 12/23] intel_iommu: Add some macros and inline functions Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 13/23] intel_iommu_accel: Bind/unbind guest page table to host Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 14/23] intel_iommu_accel: Propagate PASID-based iotlb invalidation " Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 15/23] intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 16/23] intel_iommu: Replay pasid bindings after context cache invalidation Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 17/23] vfio/listener: Bypass readonly region for dirty tracking Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 18/23] intel_iommu: Add migration support with x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 19/23] hw/pci: Introduce pci_device_get_host_iommu_quirks() Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 20/23] intel_iommu_accel: Implement get_host_iommu_quirks() callback Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 21/23] Workaround for ERRATA_772415_SPR17 Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 22/23] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 23/23] docs/devel: Add IOMMUFD nesting documentation Zhenzhong Duan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251117093729.1121324-12-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex@shazbot.org \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=skolothumtho@nvidia.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).