From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, mst@redhat.com, jasowang@redhat.com,
peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
shameerali.kolothum.thodi@huawei.com, joao.m.martins@oracle.com,
clement.mathieu--drif@eviden.com, kevin.tian@intel.com,
yi.l.liu@intel.com, chao.p.peng@intel.com,
Zhenzhong Duan <zhenzhong.duan@intel.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Eduardo Habkost <eduardo@habkost.net>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PATCH rfcv2 15/20] intel_iommu: ERRATA_772415 workaround
Date: Wed, 19 Feb 2025 16:22:23 +0800 [thread overview]
Message-ID: <20250219082228.3303163-16-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20250219082228.3303163-1-zhenzhong.duan@intel.com>
On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
range mapped on stage-2 page table could still be written.
Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
Update, Errata Details, SPR17.
[0] https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update
We utilize the new added IOMMUFD container/ioas/hwpt management framework in
VTD. Add a check to create new VTDIOASContainer to hold RW-only mappings,
then this VTDIOASContainer can be used as backend for device with
ERRATA_772415. See below diagram for details:
IntelIOMMUState
|
V
.------------------. .------------------. .-------------------.
| VTDIOASContainer |--->| VTDIOASContainer |--->| VTDIOASContainer |-->...
| (iommufd0,RW&RO) | | (iommufd1,RW&RO) | | (iommufd0,RW only)|
.------------------. .------------------. .-------------------.
| | |
| .-->... |
V V
.-------------------. .-------------------. .---------------.
| VTDS2Hwpt(CC) |--->| VTDS2Hwpt(non-CC) |-->... | VTDS2Hwpt(CC) |-->...
.-------------------. .-------------------. .---------------.
| | | |
| | | |
.-----------. .-----------. .------------. .------------.
| IOMMUFD | | IOMMUFD | | IOMMUFD | | IOMMUFD |
| Device(CC)| | Device(CC)| | Device | | Device(CC) |
| (iommufd0)| | (iommufd0)| | (non-CC) | | (errata) |
| | | | | (iommufd0) | | (iommufd0) |
.-----------. .-----------. .------------. .------------.
Changed to pass VTDHostIOMMUDevice pointer to vtd_check_hdev() so errata
could be saved.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 +
include/hw/i386/intel_iommu.h | 1 +
hw/i386/intel_iommu.c | 26 +++++++++++++++++++-------
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 23b7e236b0..8558781af8 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -654,5 +654,6 @@ typedef struct VTDHostIOMMUDevice {
PCIBus *bus;
uint8_t devfn;
HostIOMMUDevice *hiod;
+ uint32_t errata;
} VTDHostIOMMUDevice;
#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 594281c1d3..9b156dc32e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -103,6 +103,7 @@ typedef struct VTDPASIDCacheEntry {
typedef struct VTDIOASContainer {
struct IOMMUFDBackend *iommufd;
uint32_t ioas_id;
+ uint32_t errata;
MemoryListener listener;
QLIST_HEAD(, VTDS2Hwpt) s2_hwpt_list;
QLIST_ENTRY(VTDIOASContainer) next;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e36ac44110..dae1716629 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2443,7 +2443,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
}
#ifdef CONFIG_IOMMUFD
-static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
+static bool iommufd_listener_skipped_section(VTDIOASContainer *container,
+ MemoryRegionSection *section)
{
return !memory_region_is_ram(section->mr) ||
memory_region_is_protected(section->mr) ||
@@ -2453,7 +2454,8 @@ static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
* are never accessed by the CPU and beyond the address width of
* some IOMMU hardware. TODO: VFIO should tell us the IOMMU width.
*/
- section->offset_within_address_space & (1ULL << 63);
+ section->offset_within_address_space & (1ULL << 63) ||
+ (container->errata && section->readonly);
}
static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
@@ -2469,7 +2471,7 @@ static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
Error *err = NULL;
int ret;
- if (iommufd_listener_skipped_section(section)) {
+ if (iommufd_listener_skipped_section(container, section)) {
return;
}
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2520,7 +2522,7 @@ static void iommufd_listener_region_del_s2domain(MemoryListener *listener,
Int128 llend, llsize;
int ret;
- if (iommufd_listener_skipped_section(section)) {
+ if (iommufd_listener_skipped_section(container, section)) {
return;
}
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2776,7 +2778,8 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
/* try to attach to an existing container in this space */
QLIST_FOREACH(container, &s->containers, next) {
- if (container->iommufd != iommufd) {
+ if (container->iommufd != iommufd ||
+ container->errata != vtd_hiod->errata) {
continue;
}
@@ -2803,6 +2806,7 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
container = g_malloc0(sizeof(*container));
container->iommufd = iommufd;
container->ioas_id = ioas_id;
+ container->errata = vtd_hiod->errata;
QLIST_INIT(&container->s2_hwpt_list);
if (vtd_device_attach_container(vtd_hiod, container, pasid, pe, hwpt,
@@ -5329,9 +5333,10 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
-static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+static bool vtd_check_hiod(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp)
{
+ HostIOMMUDevice *hiod = vtd_hiod->hiod;
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
int ret;
@@ -5388,6 +5393,12 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
return false;
}
+ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_ERRATA, errp);
+ if (ret < 0) {
+ return false;
+ }
+ vtd_hiod->errata = ret;
+
error_setg(errp, "host device is uncompatible with stage-1 translation");
return false;
}
@@ -5419,7 +5430,8 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
vtd_hiod->iommu_state = s;
vtd_hiod->hiod = hiod;
- if (!vtd_check_hiod(s, hiod, errp)) {
+ if (!vtd_check_hiod(s, vtd_hiod, errp)) {
+ g_free(vtd_hiod);
vtd_iommu_unlock(s);
return false;
}
--
2.34.1
next prev parent reply other threads:[~2025-02-19 8:30 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-19 8:22 [PATCH rfcv2 00/20] intel_iommu: Enable stage-1 translation for passthrough device Zhenzhong Duan
2025-02-19 8:22 ` [PATCH rfcv2 01/20] backends/iommufd: Add helpers for invalidating user-managed HWPT Zhenzhong Duan
2025-02-20 16:47 ` Eric Auger
2025-02-28 2:26 ` Duan, Zhenzhong
2025-02-24 10:03 ` Shameerali Kolothum Thodi via
2025-02-28 9:36 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 02/20] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Zhenzhong Duan
2025-02-20 17:42 ` Eric Auger
2025-02-28 5:39 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 03/20] HostIOMMUDevice: Introduce realize_late callback Zhenzhong Duan
2025-02-20 17:48 ` Eric Auger
2025-02-28 8:16 ` Duan, Zhenzhong
2025-03-06 15:53 ` Eric Auger
2025-04-07 11:19 ` Cédric Le Goater
2025-04-08 8:00 ` Cédric Le Goater
2025-04-09 8:27 ` Duan, Zhenzhong
2025-04-09 9:58 ` Cédric Le Goater
2025-02-19 8:22 ` [PATCH rfcv2 04/20] vfio/iommufd: Implement HostIOMMUDeviceClass::realize_late() handler Zhenzhong Duan
2025-02-20 18:07 ` Eric Auger
2025-02-28 8:23 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 05/20] vfio/iommufd: Implement [at|de]tach_hwpt handlers Zhenzhong Duan
2025-02-20 18:13 ` Eric Auger
2025-02-28 8:24 ` Duan, Zhenzhong
2025-03-06 15:56 ` Eric Auger
2025-02-19 8:22 ` [PATCH rfcv2 06/20] host_iommu_device: Define two new capabilities HOST_IOMMU_DEVICE_CAP_[NESTING|FS1GP] Zhenzhong Duan
2025-02-20 18:41 ` Eric Auger
2025-02-20 18:44 ` Eric Auger
2025-02-28 8:29 ` Duan, Zhenzhong
2025-03-06 15:59 ` Eric Auger
2025-03-06 19:45 ` Nicolin Chen
2025-03-10 3:48 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 07/20] iommufd: Implement query of HOST_IOMMU_DEVICE_CAP_[NESTING|FS1GP] Zhenzhong Duan
2025-02-20 19:00 ` Eric Auger
2025-02-28 8:32 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 08/20] iommufd: Implement query of HOST_IOMMU_DEVICE_CAP_ERRATA Zhenzhong Duan
2025-02-20 18:55 ` Eric Auger
2025-02-28 8:31 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 09/20] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-02-21 6:39 ` CLEMENT MATHIEU--DRIF
2025-02-21 10:11 ` Eric Auger
2025-02-28 8:47 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 10/20] intel_iommu: Optimize context entry cache utilization Zhenzhong Duan
2025-02-21 10:00 ` Eric Auger
2025-02-28 8:34 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 11/20] intel_iommu: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-02-21 12:49 ` Eric Auger
2025-02-21 14:18 ` Eric Auger
2025-02-28 8:57 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 12/20] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-02-21 13:03 ` Eric Auger
2025-02-28 8:58 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management infrastructure Zhenzhong Duan
2025-02-21 17:02 ` Eric Auger
2025-02-28 9:35 ` Duan, Zhenzhong
2025-02-19 8:22 ` [PATCH rfcv2 14/20] intel_iommu: Bind/unbind guest page table to host Zhenzhong Duan
2025-02-19 8:22 ` Zhenzhong Duan [this message]
2025-02-19 8:22 ` [PATCH rfcv2 16/20] intel_iommu: Replay pasid binds after context cache invalidation Zhenzhong Duan
2025-02-19 8:22 ` [PATCH rfcv2 17/20] intel_iommu: Propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2025-02-19 8:22 ` [PATCH rfcv2 18/20] intel_iommu: Refresh pasid bind when either SRTP or TE bit is changed Zhenzhong Duan
2025-02-19 8:22 ` [PATCH rfcv2 19/20] intel_iommu: Bypass replay in stage-1 page table mode Zhenzhong Duan
2025-02-19 8:22 ` [PATCH rfcv2 20/20] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-02-20 19:03 ` [PATCH rfcv2 00/20] intel_iommu: Enable stage-1 translation for passthrough device Eric Auger
2025-02-21 6:08 ` Duan, Zhenzhong
2025-04-05 3:01 ` Donald Dutile
2025-05-19 8:37 ` Duan, Zhenzhong
2025-05-19 15:39 ` Donald Dutile
2025-05-20 9:13 ` Duan, Zhenzhong
2025-05-20 10:47 ` Donald Dutile
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250219082228.3303163-16-zhenzhong.duan@intel.com \
--to=zhenzhong.duan@intel.com \
--cc=alex.williamson@redhat.com \
--cc=chao.p.peng@intel.com \
--cc=clement.mathieu--drif@eviden.com \
--cc=clg@redhat.com \
--cc=eduardo@habkost.net \
--cc=eric.auger@redhat.com \
--cc=jasowang@redhat.com \
--cc=jgg@nvidia.com \
--cc=joao.m.martins@oracle.com \
--cc=kevin.tian@intel.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=nicolinc@nvidia.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).