qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
	eric.auger@redhat.com, mst@redhat.com, jasowang@redhat.com,
	peterx@redhat.com, ddutile@redhat.com, jgg@nvidia.com,
	nicolinc@nvidia.com, shameerali.kolothum.thodi@huawei.com,
	joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
	kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>
Subject: [PATCH rfcv3 16/21] intel_iommu: ERRATA_772415 workaround
Date: Wed, 21 May 2025 19:14:46 +0800	[thread overview]
Message-ID: <20250521111452.3316354-17-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20250521111452.3316354-1-zhenzhong.duan@intel.com>

On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
range mapped on stage-2 page table could still be written.

Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
Update, Errata Details, SPR17.

[0] https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update

We utilize the new added IOMMUFD container/ioas/hwpt management framework in
VTD. Add a check to create new VTDIOASContainer to only hold RW mappings,
then this VTDIOASContainer can be used as backend for device with
ERRATA_772415. See below diagram for details:

      IntelIOMMUState
             |
             V
    .------------------.    .------------------.    .-------------------.
    | VTDIOASContainer |--->| VTDIOASContainer |--->| VTDIOASContainer  |-->...
    | (iommufd0,RW&RO) |    | (iommufd1,RW&RO) |    | (iommufd0,only RW)|
    .------------------.    .------------------.    .-------------------.
             |                       |                              |
             |                       .-->...                        |
             V                                                      V
      .-------------------.    .-------------------.          .---------------.
      |   VTDS2Hwpt(CC)   |--->| VTDS2Hwpt(non-CC) |-->...    | VTDS2Hwpt(CC) |-->...
      .-------------------.    .-------------------.          .---------------.
          |            |               |                            |
          |            |               |                            |
    .-----------.  .-----------.  .------------.              .------------.
    | IOMMUFD   |  | IOMMUFD   |  | IOMMUFD    |              | IOMMUFD    |
    | Device(CC)|  | Device(CC)|  | Device     |              | Device(CC) |
    | (iommufd0)|  | (iommufd0)|  | (non-CC)   |              | (errata)   |
    |           |  |           |  | (iommufd0) |              | (iommufd0) |
    .-----------.  .-----------.  .------------.              .------------.

Changed to pass VTDHostIOMMUDevice pointer to vtd_check_hdev() so errata
could be saved.

Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  1 +
 include/hw/i386/intel_iommu.h  |  1 +
 hw/i386/intel_iommu.c          | 25 +++++++++++++++++--------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e76f43bb8f..75d840f9fe 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -654,5 +654,6 @@ typedef struct VTDHostIOMMUDevice {
     PCIBus *bus;
     uint8_t devfn;
     HostIOMMUDevice *hiod;
+    uint32_t errata;
 } VTDHostIOMMUDevice;
 #endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 594281c1d3..9b156dc32e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -103,6 +103,7 @@ typedef struct VTDPASIDCacheEntry {
 typedef struct VTDIOASContainer {
     struct IOMMUFDBackend *iommufd;
     uint32_t ioas_id;
+    uint32_t errata;
     MemoryListener listener;
     QLIST_HEAD(, VTDS2Hwpt) s2_hwpt_list;
     QLIST_ENTRY(VTDIOASContainer) next;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3269a66ac7..9ffc2a8ffc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2437,7 +2437,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
 }
 
 #ifdef CONFIG_IOMMUFD
-static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
+static bool iommufd_listener_skipped_section(VTDIOASContainer *container,
+                                             MemoryRegionSection *section)
 {
     return !memory_region_is_ram(section->mr) ||
            memory_region_is_protected(section->mr) ||
@@ -2447,7 +2448,8 @@ static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
             * are never accessed by the CPU and beyond the address width of
             * some IOMMU hardware.  TODO: VFIO should tell us the IOMMU width.
             */
-           section->offset_within_address_space & (1ULL << 63);
+           section->offset_within_address_space & (1ULL << 63) ||
+           (container->errata && section->readonly);
 }
 
 static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
@@ -2463,7 +2465,7 @@ static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
     Error *err = NULL;
     int ret;
 
-    if (iommufd_listener_skipped_section(section)) {
+    if (iommufd_listener_skipped_section(container, section)) {
         return;
     }
     iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2514,7 +2516,7 @@ static void iommufd_listener_region_del_s2domain(MemoryListener *listener,
     Int128 llend, llsize;
     int ret;
 
-    if (iommufd_listener_skipped_section(section)) {
+    if (iommufd_listener_skipped_section(container, section)) {
         return;
     }
     iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2770,7 +2772,8 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
 
     /* try to attach to an existing container in this space */
     QLIST_FOREACH(container, &s->containers, next) {
-        if (container->iommufd != iommufd) {
+        if (container->iommufd != iommufd ||
+            container->errata != vtd_hiod->errata) {
             continue;
         }
 
@@ -2797,6 +2800,7 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
     container = g_malloc0(sizeof(*container));
     container->iommufd = iommufd;
     container->ioas_id = ioas_id;
+    container->errata = vtd_hiod->errata;
     QLIST_INIT(&container->s2_hwpt_list);
 
     if (vtd_device_attach_container(vtd_hiod, container, pasid, pe, hwpt,
@@ -5355,9 +5359,10 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
-static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+static bool vtd_check_hiod(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
                            Error **errp)
 {
+    HostIOMMUDevice *hiod = vtd_hiod->hiod;
     HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
     int ret;
 
@@ -5399,7 +5404,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
     }
 
     /*
-     * HOST_IOMMU_DEVICE_CAP_NESTING/FS1GP are VTD vendor specific
+     * HOST_IOMMU_DEVICE_CAP_NESTING/FS1GP/ERRATA are VTD vendor specific
      * capabilities, so get_cap() should never fail on them now that
      * HOST_IOMMU_DEVICE_IOMMU_HW_INFO_TYPE_INTEL_VTD type check passed
      * above.
@@ -5416,6 +5421,9 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
         return false;
     }
 
+    ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_ERRATA, errp);
+    vtd_hiod->errata = ret;
+
     error_setg(errp, "host device is uncompatible with stage-1 translation");
     return false;
 }
@@ -5447,7 +5455,8 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
     vtd_hiod->iommu_state = s;
     vtd_hiod->hiod = hiod;
 
-    if (!vtd_check_hiod(s, hiod, errp)) {
+    if (!vtd_check_hiod(s, vtd_hiod, errp)) {
+        g_free(vtd_hiod);
         vtd_iommu_unlock(s);
         return false;
     }
-- 
2.34.1



  parent reply	other threads:[~2025-05-21 11:21 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-21 11:14 [PATCH rfcv3 00/21] intel_iommu: Enable stage-1 translation for passthrough device Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 01/21] backends/iommufd: Add a helper to invalidate user-managed HWPT Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 02/21] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 03/21] vfio/iommufd: Initialize iommufd specific members in HostIOMMUDeviceIOMMUFD Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 04/21] vfio/iommufd: Implement [at|de]tach_hwpt handlers Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 05/21] vfio/iommufd: Save vendor specific device info Zhenzhong Duan
2025-05-21 21:57   ` Nicolin Chen
2025-05-22  9:21     ` Duan, Zhenzhong
2025-05-22 19:35       ` Nicolin Chen
2025-05-26 12:15   ` Cédric Le Goater
2025-05-27  2:12     ` Duan, Zhenzhong
2025-05-21 11:14 ` [PATCH rfcv3 06/21] iommufd: Implement query of host VTD IOMMU's capability Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 07/21] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 08/21] intel_iommu: Optimize context entry cache utilization Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 09/21] intel_iommu: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 10/21] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 11/21] intel_iommu: Introduce two helpers vtd_as_from/to_iommu_pasid_locked Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 12/21] intel_iommu: Handle PASID entry removing and updating Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 13/21] intel_iommu: Handle PASID entry adding Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 14/21] intel_iommu: Introduce a new pasid cache invalidation type FORCE_RESET Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 15/21] intel_iommu: Bind/unbind guest page table to host Zhenzhong Duan
2025-05-21 22:49   ` Nicolin Chen
2025-05-22  6:50     ` Duan, Zhenzhong
2025-05-22 19:29       ` Nicolin Chen
2025-05-23  6:26         ` Yi Liu
2025-05-26  3:34         ` Duan, Zhenzhong
2025-05-23  6:22     ` Yi Liu
2025-05-23  6:52       ` Duan, Zhenzhong
2025-05-23 21:12       ` Nicolin Chen
2025-05-26  3:46         ` Duan, Zhenzhong
2025-05-26  7:24         ` Yi Liu
2025-05-26 17:35           ` Nicolin Chen
2025-05-28  7:12             ` Duan, Zhenzhong
2025-06-12 12:53               ` Yi Liu
2025-06-12 14:06                 ` Shameerali Kolothum Thodi via
2025-06-16  6:04                   ` Nicolin Chen
2025-06-16  3:24                 ` Duan, Zhenzhong
2025-06-16  6:34                   ` Nicolin Chen
2025-06-16  8:54                     ` Duan, Zhenzhong
2025-06-16  9:36                       ` Yi Liu
2025-06-16 10:16                         ` Duan, Zhenzhong
2025-06-17  7:04                           ` Yi Liu
2025-06-16  5:59                 ` Nicolin Chen
2025-06-16  7:38                   ` Yi Liu
2025-06-17  3:22                     ` Nicolin Chen
2025-06-17  6:48                       ` Yi Liu
2025-06-16  5:47               ` Nicolin Chen
2025-06-16  8:15                 ` Duan, Zhenzhong
2025-06-17  3:14                   ` Nicolin Chen
2025-06-17 12:37                     ` Jason Gunthorpe
2025-06-17 13:03                       ` Yi Liu
2025-06-17 13:11                         ` Jason Gunthorpe
2025-06-18  2:51                           ` Duan, Zhenzhong
2025-06-18  3:40                           ` Yi Liu
2025-06-18 11:43                             ` Jason Gunthorpe
2025-05-21 11:14 ` Zhenzhong Duan [this message]
2025-05-21 11:14 ` [PATCH rfcv3 17/21] intel_iommu: Replay pasid binds after context cache invalidation Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 18/21] intel_iommu: Propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 19/21] intel_iommu: Refresh pasid bind when either SRTP or TE bit is changed Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 20/21] intel_iommu: Bypass replay in stage-1 page table mode Zhenzhong Duan
2025-05-21 11:14 ` [PATCH rfcv3 21/21] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-05-26 12:19 ` [PATCH rfcv3 00/21] intel_iommu: Enable stage-1 translation for passthrough device Cédric Le Goater
2025-05-27  2:16   ` Duan, Zhenzhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250521111452.3316354-17-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).