From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, mst@redhat.com, jasowang@redhat.com,
peterx@redhat.com, ddutile@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, shameerali.kolothum.thodi@huawei.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Zhenzhong Duan <zhenzhong.duan@intel.com>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Eduardo Habkost <eduardo@habkost.net>
Subject: [PATCH rfcv3 16/21] intel_iommu: ERRATA_772415 workaround
Date: Wed, 21 May 2025 19:14:46 +0800 [thread overview]
Message-ID: <20250521111452.3316354-17-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20250521111452.3316354-1-zhenzhong.duan@intel.com>
On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
range mapped on stage-2 page table could still be written.
Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
Update, Errata Details, SPR17.
[0] https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update
We utilize the new added IOMMUFD container/ioas/hwpt management framework in
VTD. Add a check to create new VTDIOASContainer to only hold RW mappings,
then this VTDIOASContainer can be used as backend for device with
ERRATA_772415. See below diagram for details:
IntelIOMMUState
|
V
.------------------. .------------------. .-------------------.
| VTDIOASContainer |--->| VTDIOASContainer |--->| VTDIOASContainer |-->...
| (iommufd0,RW&RO) | | (iommufd1,RW&RO) | | (iommufd0,only RW)|
.------------------. .------------------. .-------------------.
| | |
| .-->... |
V V
.-------------------. .-------------------. .---------------.
| VTDS2Hwpt(CC) |--->| VTDS2Hwpt(non-CC) |-->... | VTDS2Hwpt(CC) |-->...
.-------------------. .-------------------. .---------------.
| | | |
| | | |
.-----------. .-----------. .------------. .------------.
| IOMMUFD | | IOMMUFD | | IOMMUFD | | IOMMUFD |
| Device(CC)| | Device(CC)| | Device | | Device(CC) |
| (iommufd0)| | (iommufd0)| | (non-CC) | | (errata) |
| | | | | (iommufd0) | | (iommufd0) |
.-----------. .-----------. .------------. .------------.
Changed to pass VTDHostIOMMUDevice pointer to vtd_check_hdev() so errata
could be saved.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 +
include/hw/i386/intel_iommu.h | 1 +
hw/i386/intel_iommu.c | 25 +++++++++++++++++--------
3 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e76f43bb8f..75d840f9fe 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -654,5 +654,6 @@ typedef struct VTDHostIOMMUDevice {
PCIBus *bus;
uint8_t devfn;
HostIOMMUDevice *hiod;
+ uint32_t errata;
} VTDHostIOMMUDevice;
#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 594281c1d3..9b156dc32e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -103,6 +103,7 @@ typedef struct VTDPASIDCacheEntry {
typedef struct VTDIOASContainer {
struct IOMMUFDBackend *iommufd;
uint32_t ioas_id;
+ uint32_t errata;
MemoryListener listener;
QLIST_HEAD(, VTDS2Hwpt) s2_hwpt_list;
QLIST_ENTRY(VTDIOASContainer) next;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3269a66ac7..9ffc2a8ffc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2437,7 +2437,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
}
#ifdef CONFIG_IOMMUFD
-static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
+static bool iommufd_listener_skipped_section(VTDIOASContainer *container,
+ MemoryRegionSection *section)
{
return !memory_region_is_ram(section->mr) ||
memory_region_is_protected(section->mr) ||
@@ -2447,7 +2448,8 @@ static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
* are never accessed by the CPU and beyond the address width of
* some IOMMU hardware. TODO: VFIO should tell us the IOMMU width.
*/
- section->offset_within_address_space & (1ULL << 63);
+ section->offset_within_address_space & (1ULL << 63) ||
+ (container->errata && section->readonly);
}
static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
@@ -2463,7 +2465,7 @@ static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
Error *err = NULL;
int ret;
- if (iommufd_listener_skipped_section(section)) {
+ if (iommufd_listener_skipped_section(container, section)) {
return;
}
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2514,7 +2516,7 @@ static void iommufd_listener_region_del_s2domain(MemoryListener *listener,
Int128 llend, llsize;
int ret;
- if (iommufd_listener_skipped_section(section)) {
+ if (iommufd_listener_skipped_section(container, section)) {
return;
}
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2770,7 +2772,8 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
/* try to attach to an existing container in this space */
QLIST_FOREACH(container, &s->containers, next) {
- if (container->iommufd != iommufd) {
+ if (container->iommufd != iommufd ||
+ container->errata != vtd_hiod->errata) {
continue;
}
@@ -2797,6 +2800,7 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
container = g_malloc0(sizeof(*container));
container->iommufd = iommufd;
container->ioas_id = ioas_id;
+ container->errata = vtd_hiod->errata;
QLIST_INIT(&container->s2_hwpt_list);
if (vtd_device_attach_container(vtd_hiod, container, pasid, pe, hwpt,
@@ -5355,9 +5359,10 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
-static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+static bool vtd_check_hiod(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp)
{
+ HostIOMMUDevice *hiod = vtd_hiod->hiod;
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
int ret;
@@ -5399,7 +5404,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
}
/*
- * HOST_IOMMU_DEVICE_CAP_NESTING/FS1GP are VTD vendor specific
+ * HOST_IOMMU_DEVICE_CAP_NESTING/FS1GP/ERRATA are VTD vendor specific
* capabilities, so get_cap() should never fail on them now that
* HOST_IOMMU_DEVICE_IOMMU_HW_INFO_TYPE_INTEL_VTD type check passed
* above.
@@ -5416,6 +5421,9 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
return false;
}
+ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_ERRATA, errp);
+ vtd_hiod->errata = ret;
+
error_setg(errp, "host device is uncompatible with stage-1 translation");
return false;
}
@@ -5447,7 +5455,8 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
vtd_hiod->iommu_state = s;
vtd_hiod->hiod = hiod;
- if (!vtd_check_hiod(s, hiod, errp)) {
+ if (!vtd_check_hiod(s, vtd_hiod, errp)) {
+ g_free(vtd_hiod);
vtd_iommu_unlock(s);
return false;
}
--
2.34.1
next prev parent reply other threads:[~2025-05-21 11:21 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-21 11:14 [PATCH rfcv3 00/21] intel_iommu: Enable stage-1 translation for passthrough device Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 01/21] backends/iommufd: Add a helper to invalidate user-managed HWPT Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 02/21] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 03/21] vfio/iommufd: Initialize iommufd specific members in HostIOMMUDeviceIOMMUFD Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 04/21] vfio/iommufd: Implement [at|de]tach_hwpt handlers Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 05/21] vfio/iommufd: Save vendor specific device info Zhenzhong Duan
2025-05-21 21:57 ` Nicolin Chen
2025-05-22 9:21 ` Duan, Zhenzhong
2025-05-22 19:35 ` Nicolin Chen
2025-05-26 12:15 ` Cédric Le Goater
2025-05-27 2:12 ` Duan, Zhenzhong
2025-05-21 11:14 ` [PATCH rfcv3 06/21] iommufd: Implement query of host VTD IOMMU's capability Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 07/21] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 08/21] intel_iommu: Optimize context entry cache utilization Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 09/21] intel_iommu: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 10/21] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 11/21] intel_iommu: Introduce two helpers vtd_as_from/to_iommu_pasid_locked Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 12/21] intel_iommu: Handle PASID entry removing and updating Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 13/21] intel_iommu: Handle PASID entry adding Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 14/21] intel_iommu: Introduce a new pasid cache invalidation type FORCE_RESET Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 15/21] intel_iommu: Bind/unbind guest page table to host Zhenzhong Duan
2025-05-21 22:49 ` Nicolin Chen
2025-05-22 6:50 ` Duan, Zhenzhong
2025-05-22 19:29 ` Nicolin Chen
2025-05-23 6:26 ` Yi Liu
2025-05-26 3:34 ` Duan, Zhenzhong
2025-05-23 6:22 ` Yi Liu
2025-05-23 6:52 ` Duan, Zhenzhong
2025-05-23 21:12 ` Nicolin Chen
2025-05-26 3:46 ` Duan, Zhenzhong
2025-05-26 7:24 ` Yi Liu
2025-05-26 17:35 ` Nicolin Chen
2025-05-28 7:12 ` Duan, Zhenzhong
2025-06-12 12:53 ` Yi Liu
2025-06-12 14:06 ` Shameerali Kolothum Thodi via
2025-06-16 6:04 ` Nicolin Chen
2025-06-16 3:24 ` Duan, Zhenzhong
2025-06-16 6:34 ` Nicolin Chen
2025-06-16 8:54 ` Duan, Zhenzhong
2025-06-16 9:36 ` Yi Liu
2025-06-16 10:16 ` Duan, Zhenzhong
2025-06-17 7:04 ` Yi Liu
2025-06-16 5:59 ` Nicolin Chen
2025-06-16 7:38 ` Yi Liu
2025-06-17 3:22 ` Nicolin Chen
2025-06-17 6:48 ` Yi Liu
2025-06-16 5:47 ` Nicolin Chen
2025-06-16 8:15 ` Duan, Zhenzhong
2025-06-17 3:14 ` Nicolin Chen
2025-06-17 12:37 ` Jason Gunthorpe
2025-06-17 13:03 ` Yi Liu
2025-06-17 13:11 ` Jason Gunthorpe
2025-06-18 2:51 ` Duan, Zhenzhong
2025-06-18 3:40 ` Yi Liu
2025-06-18 11:43 ` Jason Gunthorpe
2025-05-21 11:14 ` Zhenzhong Duan [this message]
2025-05-21 11:14 ` [PATCH rfcv3 17/21] intel_iommu: Replay pasid binds after context cache invalidation Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 18/21] intel_iommu: Propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 19/21] intel_iommu: Refresh pasid bind when either SRTP or TE bit is changed Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 20/21] intel_iommu: Bypass replay in stage-1 page table mode Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 21/21] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-05-26 12:19 ` [PATCH rfcv3 00/21] intel_iommu: Enable stage-1 translation for passthrough device Cédric Le Goater
2025-05-27 2:16 ` Duan, Zhenzhong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250521111452.3316354-17-zhenzhong.duan@intel.com \
--to=zhenzhong.duan@intel.com \
--cc=alex.williamson@redhat.com \
--cc=chao.p.peng@intel.com \
--cc=clement.mathieu--drif@eviden.com \
--cc=clg@redhat.com \
--cc=ddutile@redhat.com \
--cc=eduardo@habkost.net \
--cc=eric.auger@redhat.com \
--cc=jasowang@redhat.com \
--cc=jgg@nvidia.com \
--cc=joao.m.martins@oracle.com \
--cc=kevin.tian@intel.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=nicolinc@nvidia.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).