qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
	eric.auger@redhat.com, mst@redhat.com, jasowang@redhat.com,
	peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
	shameerali.kolothum.thodi@huawei.com, joao.m.martins@oracle.com,
	clement.mathieu--drif@eviden.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PATCH rfcv2 15/20] intel_iommu: ERRATA_772415 workaround
Date: Wed, 19 Feb 2025 16:22:23 +0800	[thread overview]
Message-ID: <20250219082228.3303163-16-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20250219082228.3303163-1-zhenzhong.duan@intel.com>

On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
range mapped on stage-2 page table could still be written.

Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
Update, Errata Details, SPR17.

[0] https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update

We utilize the new added IOMMUFD container/ioas/hwpt management framework in
VTD. Add a check to create new VTDIOASContainer to hold RW-only mappings,
then this VTDIOASContainer can be used as backend for device with
ERRATA_772415. See below diagram for details:

      IntelIOMMUState
             |
             V
    .------------------.    .------------------.    .-------------------.
    | VTDIOASContainer |--->| VTDIOASContainer |--->| VTDIOASContainer  |-->...
    | (iommufd0,RW&RO) |    | (iommufd1,RW&RO) |    | (iommufd0,RW only)|
    .------------------.    .------------------.    .-------------------.
             |                       |                              |
             |                       .-->...                        |
             V                                                      V
      .-------------------.    .-------------------.          .---------------.
      |   VTDS2Hwpt(CC)   |--->| VTDS2Hwpt(non-CC) |-->...    | VTDS2Hwpt(CC) |-->...
      .-------------------.    .-------------------.          .---------------.
          |            |               |                            |
          |            |               |                            |
    .-----------.  .-----------.  .------------.              .------------.
    | IOMMUFD   |  | IOMMUFD   |  | IOMMUFD    |              | IOMMUFD    |
    | Device(CC)|  | Device(CC)|  | Device     |              | Device(CC) |
    | (iommufd0)|  | (iommufd0)|  | (non-CC)   |              | (errata)   |
    |           |  |           |  | (iommufd0) |              | (iommufd0) |
    .-----------.  .-----------.  .------------.              .------------.

Changed to pass VTDHostIOMMUDevice pointer to vtd_check_hdev() so errata
could be saved.

Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  1 +
 include/hw/i386/intel_iommu.h  |  1 +
 hw/i386/intel_iommu.c          | 26 +++++++++++++++++++-------
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 23b7e236b0..8558781af8 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -654,5 +654,6 @@ typedef struct VTDHostIOMMUDevice {
     PCIBus *bus;
     uint8_t devfn;
     HostIOMMUDevice *hiod;
+    uint32_t errata;
 } VTDHostIOMMUDevice;
 #endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 594281c1d3..9b156dc32e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -103,6 +103,7 @@ typedef struct VTDPASIDCacheEntry {
 typedef struct VTDIOASContainer {
     struct IOMMUFDBackend *iommufd;
     uint32_t ioas_id;
+    uint32_t errata;
     MemoryListener listener;
     QLIST_HEAD(, VTDS2Hwpt) s2_hwpt_list;
     QLIST_ENTRY(VTDIOASContainer) next;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e36ac44110..dae1716629 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2443,7 +2443,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
 }
 
 #ifdef CONFIG_IOMMUFD
-static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
+static bool iommufd_listener_skipped_section(VTDIOASContainer *container,
+                                             MemoryRegionSection *section)
 {
     return !memory_region_is_ram(section->mr) ||
            memory_region_is_protected(section->mr) ||
@@ -2453,7 +2454,8 @@ static bool iommufd_listener_skipped_section(MemoryRegionSection *section)
             * are never accessed by the CPU and beyond the address width of
             * some IOMMU hardware.  TODO: VFIO should tell us the IOMMU width.
             */
-           section->offset_within_address_space & (1ULL << 63);
+           section->offset_within_address_space & (1ULL << 63) ||
+           (container->errata && section->readonly);
 }
 
 static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
@@ -2469,7 +2471,7 @@ static void iommufd_listener_region_add_s2domain(MemoryListener *listener,
     Error *err = NULL;
     int ret;
 
-    if (iommufd_listener_skipped_section(section)) {
+    if (iommufd_listener_skipped_section(container, section)) {
         return;
     }
     iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2520,7 +2522,7 @@ static void iommufd_listener_region_del_s2domain(MemoryListener *listener,
     Int128 llend, llsize;
     int ret;
 
-    if (iommufd_listener_skipped_section(section)) {
+    if (iommufd_listener_skipped_section(container, section)) {
         return;
     }
     iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
@@ -2776,7 +2778,8 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
 
     /* try to attach to an existing container in this space */
     QLIST_FOREACH(container, &s->containers, next) {
-        if (container->iommufd != iommufd) {
+        if (container->iommufd != iommufd ||
+            container->errata != vtd_hiod->errata) {
             continue;
         }
 
@@ -2803,6 +2806,7 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
     container = g_malloc0(sizeof(*container));
     container->iommufd = iommufd;
     container->ioas_id = ioas_id;
+    container->errata = vtd_hiod->errata;
     QLIST_INIT(&container->s2_hwpt_list);
 
     if (vtd_device_attach_container(vtd_hiod, container, pasid, pe, hwpt,
@@ -5329,9 +5333,10 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
-static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+static bool vtd_check_hiod(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
                            Error **errp)
 {
+    HostIOMMUDevice *hiod = vtd_hiod->hiod;
     HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
     int ret;
 
@@ -5388,6 +5393,12 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
         return false;
     }
 
+    ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_ERRATA, errp);
+    if (ret < 0) {
+        return false;
+    }
+    vtd_hiod->errata = ret;
+
     error_setg(errp, "host device is uncompatible with stage-1 translation");
     return false;
 }
@@ -5419,7 +5430,8 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
     vtd_hiod->iommu_state = s;
     vtd_hiod->hiod = hiod;
 
-    if (!vtd_check_hiod(s, hiod, errp)) {
+    if (!vtd_check_hiod(s, vtd_hiod, errp)) {
+        g_free(vtd_hiod);
         vtd_iommu_unlock(s);
         return false;
     }
-- 
2.34.1



  parent reply	other threads:[~2025-02-19  8:30 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-19  8:22 [PATCH rfcv2 00/20] intel_iommu: Enable stage-1 translation for passthrough device Zhenzhong Duan
2025-02-19  8:22 ` [PATCH rfcv2 01/20] backends/iommufd: Add helpers for invalidating user-managed HWPT Zhenzhong Duan
2025-02-20 16:47   ` Eric Auger
2025-02-28  2:26     ` Duan, Zhenzhong
2025-02-24 10:03   ` Shameerali Kolothum Thodi via
2025-02-28  9:36     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 02/20] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Zhenzhong Duan
2025-02-20 17:42   ` Eric Auger
2025-02-28  5:39     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 03/20] HostIOMMUDevice: Introduce realize_late callback Zhenzhong Duan
2025-02-20 17:48   ` Eric Auger
2025-02-28  8:16     ` Duan, Zhenzhong
2025-03-06 15:53       ` Eric Auger
2025-04-07 11:19   ` Cédric Le Goater
2025-04-08  8:00     ` Cédric Le Goater
2025-04-09  8:27       ` Duan, Zhenzhong
2025-04-09  9:58         ` Cédric Le Goater
2025-02-19  8:22 ` [PATCH rfcv2 04/20] vfio/iommufd: Implement HostIOMMUDeviceClass::realize_late() handler Zhenzhong Duan
2025-02-20 18:07   ` Eric Auger
2025-02-28  8:23     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 05/20] vfio/iommufd: Implement [at|de]tach_hwpt handlers Zhenzhong Duan
2025-02-20 18:13   ` Eric Auger
2025-02-28  8:24     ` Duan, Zhenzhong
2025-03-06 15:56       ` Eric Auger
2025-02-19  8:22 ` [PATCH rfcv2 06/20] host_iommu_device: Define two new capabilities HOST_IOMMU_DEVICE_CAP_[NESTING|FS1GP] Zhenzhong Duan
2025-02-20 18:41   ` Eric Auger
2025-02-20 18:44     ` Eric Auger
2025-02-28  8:29     ` Duan, Zhenzhong
2025-03-06 15:59       ` Eric Auger
2025-03-06 19:45         ` Nicolin Chen
2025-03-10  3:48           ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 07/20] iommufd: Implement query of HOST_IOMMU_DEVICE_CAP_[NESTING|FS1GP] Zhenzhong Duan
2025-02-20 19:00   ` Eric Auger
2025-02-28  8:32     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 08/20] iommufd: Implement query of HOST_IOMMU_DEVICE_CAP_ERRATA Zhenzhong Duan
2025-02-20 18:55   ` Eric Auger
2025-02-28  8:31     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 09/20] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-02-21  6:39   ` CLEMENT MATHIEU--DRIF
2025-02-21 10:11   ` Eric Auger
2025-02-28  8:47     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 10/20] intel_iommu: Optimize context entry cache utilization Zhenzhong Duan
2025-02-21 10:00   ` Eric Auger
2025-02-28  8:34     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 11/20] intel_iommu: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-02-21 12:49   ` Eric Auger
2025-02-21 14:18     ` Eric Auger
2025-02-28  8:57     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 12/20] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-02-21 13:03   ` Eric Auger
2025-02-28  8:58     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management infrastructure Zhenzhong Duan
2025-02-21 17:02   ` Eric Auger
2025-02-28  9:35     ` Duan, Zhenzhong
2025-02-19  8:22 ` [PATCH rfcv2 14/20] intel_iommu: Bind/unbind guest page table to host Zhenzhong Duan
2025-02-19  8:22 ` Zhenzhong Duan [this message]
2025-02-19  8:22 ` [PATCH rfcv2 16/20] intel_iommu: Replay pasid binds after context cache invalidation Zhenzhong Duan
2025-02-19  8:22 ` [PATCH rfcv2 17/20] intel_iommu: Propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2025-02-19  8:22 ` [PATCH rfcv2 18/20] intel_iommu: Refresh pasid bind when either SRTP or TE bit is changed Zhenzhong Duan
2025-02-19  8:22 ` [PATCH rfcv2 19/20] intel_iommu: Bypass replay in stage-1 page table mode Zhenzhong Duan
2025-02-19  8:22 ` [PATCH rfcv2 20/20] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-02-20 19:03 ` [PATCH rfcv2 00/20] intel_iommu: Enable stage-1 translation for passthrough device Eric Auger
2025-02-21  6:08   ` Duan, Zhenzhong
2025-04-05  3:01 ` Donald Dutile
2025-05-19  8:37   ` Duan, Zhenzhong
2025-05-19 15:39     ` Donald Dutile
2025-05-20  9:13       ` Duan, Zhenzhong
2025-05-20 10:47         ` Donald Dutile

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250219082228.3303163-16-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).