qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
	eric.auger@redhat.com, peterx@redhat.com, jasowang@redhat.com,
	mst@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
	joao.m.martins@oracle.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, yi.y.sun@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>
Subject: [PATCH rfcv1 06/23] intel_iommu: check and sync host IOMMU cap/ecap in scalable modern mode
Date: Mon, 15 Jan 2024 18:37:18 +0800	[thread overview]
Message-ID: <20240115103735.132209-7-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20240115103735.132209-1-zhenzhong.duan@intel.com>

When vIOMMU is configured in scalable modern mode, stage-1 page table is
supported. We need to check and sync host side cap/ecap with vIOMMU
cap/ecap.

This happens when PCIe device (i.e., VFIO case) sets IOMMUFDDevice to vIOMMU.
Some of the bits in cap/ecap is user controllable, then user setting is
compared with host cap/ecap for compatibility, i.e., if intel_iommu is
configured in scalable modern but VTD_ECAP_NEST isn't set in host ecap,
that device will fail to attach. For other bits not controlled by user,
i.e. VTD_CAP/ECAP_MASK bits, host cap/ecap is picked.

Below is the sequence to initial and finalize vIOMMU cap/ecap:

vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init()
iommu->host_cap/ecap is initialized as iommu->cap/ecap.  ---- vtd_init()
iommu->host_cap/ecap is updated some bits(VTD_CAP/ECAP_MASK) with host setting. ---- vtd_sync_hw_info()
iommu->cap/ecap is finalized as iommu->host_cap/ecap.  ---- vtd_machine_done_hook()

iommu->host_cap/ecap is a temporary storage to hold intermediate value
when synthesize host cap/ecap with vIOMMU's initial configured cap/ecap.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h | 10 ++++
 hw/i386/intel_iommu.c          | 83 ++++++++++++++++++++++++++++++----
 2 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index ee4a784a35..6d881adf9b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -191,13 +191,19 @@
 #define VTD_ECAP_PT                 (1ULL << 6)
 #define VTD_ECAP_SC                 (1ULL << 7)
 #define VTD_ECAP_MHMV               (15ULL << 20)
+#define VTD_ECAP_NEST               (1ULL << 26)
 #define VTD_ECAP_SRS                (1ULL << 31)
 #define VTD_ECAP_EAFS               (1ULL << 34)
+#define VTD_ECAP_PSS(val)           (((val) & 0x1fULL) << 35)
 #define VTD_ECAP_PASID              (1ULL << 40)
 #define VTD_ECAP_SMTS               (1ULL << 43)
 #define VTD_ECAP_SLTS               (1ULL << 46)
 #define VTD_ECAP_FLTS               (1ULL << 47)
 
+#define VTD_ECAP_MASK               (VTD_ECAP_SRS | VTD_ECAP_EAFS)
+#define VTD_GET_PSS(val)            (((val) >> 35) & 0x1f)
+#define VTD_ECAP_PSS_MASK           (0x1fULL << 35)
+
 /* CAP_REG */
 /* (offset >> 4) << 24 */
 #define VTD_CAP_FRO                 (DMAR_FRCD_REG_OFFSET << 20)
@@ -214,11 +220,15 @@
 #define VTD_CAP_DRAIN_WRITE         (1ULL << 54)
 #define VTD_CAP_DRAIN_READ          (1ULL << 55)
 #define VTD_CAP_FL1GP               (1ULL << 56)
+#define VTD_CAP_FL5LP               (1ULL << 60)
 #define VTD_CAP_DRAIN               (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
 #define VTD_CAP_CM                  (1ULL << 7)
 #define VTD_PASID_ID_SHIFT          20
 #define VTD_PASID_ID_MASK           ((1ULL << VTD_PASID_ID_SHIFT) - 1)
 
+
+#define VTD_CAP_MASK                (VTD_CAP_FL1GP | VTD_CAP_FL5LP)
+
 /* Supported Adjusted Guest Address Widths */
 #define VTD_CAP_SAGAW_SHIFT         8
 #define VTD_CAP_SAGAW_MASK          (0x1fULL << VTD_CAP_SAGAW_SHIFT)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1d007c33a8..c0973aaccb 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3819,19 +3819,82 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
+static bool vtd_check_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd,
+                              Error **errp)
+{
+    if (!(vtd->ecap_reg & VTD_ECAP_NEST)) {
+        error_setg(errp, "Need nested translation on host in modern mode");
+        return false;
+    }
+
+    return true;
+}
+
+/* cap/ecap are readonly after vIOMMU finalized */
+static bool vtd_check_hw_info_finalized(IntelIOMMUState *s,
+                                        struct iommu_hw_info_vtd *vtd,
+                                        Error **errp)
+{
+    if (s->cap & ~vtd->cap_reg & VTD_CAP_MASK) {
+        error_setg(errp, "vIOMMU cap %lx isn't compatible with host %llx",
+                   s->cap, vtd->cap_reg);
+        return false;
+    }
+
+    if (s->ecap & ~vtd->ecap_reg & VTD_ECAP_MASK) {
+        error_setg(errp, "vIOMMU ecap %lx isn't compatible with host %llx",
+                   s->ecap, vtd->ecap_reg);
+        return false;
+    }
+
+    if (s->ecap & vtd->ecap_reg & VTD_ECAP_PASID &&
+        VTD_GET_PSS(s->ecap) > VTD_GET_PSS(vtd->ecap_reg)) {
+        error_setg(errp, "vIOMMU pasid bits %lu > host pasid bits %llu",
+                   VTD_GET_PSS(s->ecap), VTD_GET_PSS(vtd->ecap_reg));
+        return false;
+    }
+
+    return true;
+}
+
 static bool vtd_sync_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd,
                              Error **errp)
 {
-    uint64_t addr_width;
+    uint64_t cap, ecap, addr_width, pasid_bits;
 
-    addr_width = (vtd->cap_reg >> 16) & 0x3fULL;
-    if (s->aw_bits > addr_width) {
-        error_setg(errp, "User aw-bits: %u > host address width: %lu",
-                   s->aw_bits, addr_width);
+    if (!s->scalable_modern) {
+        addr_width = (vtd->cap_reg >> 16) & 0x3fULL;
+        if (s->aw_bits > addr_width) {
+            error_setg(errp, "User aw-bits: %u > host address width: %lu",
+                       s->aw_bits, addr_width);
+            return false;
+        }
+        return true;
+    }
+
+    if (!vtd_check_hw_info(s, vtd, errp)) {
         return false;
     }
 
-    /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */
+    if (s->cap_finalized) {
+        return vtd_check_hw_info_finalized(s, vtd, errp);
+    }
+
+    /* sync host cap/ecap to vIOMMU */
+
+    cap = s->host_cap & vtd->cap_reg & VTD_CAP_MASK;
+    s->host_cap &= ~VTD_CAP_MASK;
+    s->host_cap |= cap;
+    ecap = s->host_ecap & vtd->ecap_reg & VTD_ECAP_MASK;
+    s->host_ecap &= ~VTD_ECAP_MASK;
+    s->host_ecap |= ecap;
+
+    pasid_bits = VTD_GET_PSS(vtd->ecap_reg);
+    if (s->host_ecap & VTD_ECAP_PASID &&
+        VTD_GET_PSS(s->host_ecap) > pasid_bits) {
+        s->host_ecap &= ~VTD_ECAP_PSS_MASK;
+        s->host_ecap |= VTD_ECAP_PSS(pasid_bits);
+    }
 
     return true;
 }
@@ -3873,9 +3936,13 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
 
     assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
 
-    /* None IOMMUFD case */
-    if (!idev) {
+    if (!s->scalable_modern && !idev) {
+        /* Legacy vIOMMU and non-IOMMUFD backend */
         return 0;
+    } else if (!idev) {
+        /* Modern vIOMMU and non-IOMMUFD backend */
+        error_setg(errp, "Need IOMMUFD backend to setup nested page table");
+        return -1;
     }
 
     if (!vtd_check_idev(s, idev, errp)) {
-- 
2.34.1



  parent reply	other threads:[~2024-01-15 10:40 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-15 10:37 [PATCH rfcv1 00/23] intel_iommu: Enable stage-1 translation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 01/23] Update linux header to support nested hwpt alloc Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 02/23] backends/iommufd: add helpers for allocating user-managed HWPT Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 03/23] backends/iommufd_device: introduce IOMMUFDDevice targeted interface Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 04/23] vfio: implement IOMMUFDDevice interface callbacks Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 05/23] intel_iommu: add a placeholder variable for scalable modern mode Zhenzhong Duan
2024-01-15 10:37 ` Zhenzhong Duan [this message]
2024-01-15 10:37 ` [PATCH rfcv1 07/23] intel_iommu: process PASID cache invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 08/23] intel_iommu: add PASID cache management infrastructure Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 09/23] vfio/iommufd_device: Add ioas_id in IOMMUFDDevice and pass to vIOMMU Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 10/23] intel_iommu: bind/unbind guest page table to host Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 11/23] intel_iommu: ERRATA_772415 workaround Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 12/23] intel_iommu: replay pasid binds after context cache invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 13/23] intel_iommu: process PASID-based iotlb invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 14/23] intel_iommu: propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 15/23] intel_iommu: process PASID-based Device-TLB invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 16/23] intel_iommu: rename slpte in iotlb_entry to pte Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 17/23] intel_iommu: implement firt level translation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 18/23] intel_iommu: fix the fault reason report Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 19/23] intel_iommu: introduce pasid iotlb cache Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 20/23] intel_iommu: piotlb invalidation should notify unmap Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 21/23] intel_iommu: invalidate piotlb when flush pasid Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 22/23] intel_iommu: refresh pasid bind after pasid cache force reset Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 23/23] intel_iommu: modify x-scalable-mode to be string option Zhenzhong Duan
     [not found]   ` <CGME20240131144013eucas1p22d46339ae42f54dd59c23e8b95502dda@eucas1p2.samsung.com>
2024-01-31 14:40     ` Joel Granados
2024-01-31 15:24       ` Yi Liu
2024-02-04 21:05         ` Joel Granados
2024-01-22  4:29 ` [PATCH rfcv1 00/23] intel_iommu: Enable stage-1 translation Jason Wang
2024-01-22  5:59   ` Duan, Zhenzhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240115103735.132209-7-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clg@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).