From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, peterx@redhat.com, jasowang@redhat.com,
mst@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, yi.y.sun@intel.com, chao.p.peng@intel.com,
Yi Sun <yi.y.sun@linux.intel.com>,
Zhenzhong Duan <zhenzhong.duan@intel.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Eduardo Habkost <eduardo@habkost.net>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and sync host IOMMU cap/ecap
Date: Mon, 15 Jan 2024 18:13:13 +0800 [thread overview]
Message-ID: <20240115101313.131139-7-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20240115101313.131139-1-zhenzhong.duan@intel.com>
From: Yi Liu <yi.l.liu@intel.com>
Add a framework to check and synchronize host IOMMU cap/ecap with
vIOMMU cap/ecap.
Currently only stage-2 translation is supported which is backed by
shadow page table on host side. So we don't need exact matching of
each bit of cap/ecap between vIOMMU and host. However, we can still
utilize this framework to ensure compatibility of host and vIOMMU's
address width at least, i.e., vIOMMU's aw_bits <= host aw_bits,
which is missed before.
When stage-1 translation is supported in future, a.k.a. scalable
modern mode, we need to ensure compatibility of each bits. Some
bits are user controllable, they should be checked with host side
to ensure compatibility. Other bits are not, they should be synced
into vIOMMU cap/ecap for compatibility.
The sequence will be:
vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init()
iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init()
iommu->host_cap/ecap is checked and updated some bits with host cap/ecap. ---- vtd_sync_hw_info()
iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- vtd_machine_done_hook()
iommu->host_cap/ecap is a temporary storage to hold intermediate value
when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/i386/intel_iommu.h | 4 ++
hw/i386/intel_iommu.c | 78 +++++++++++++++++++++++++++++++----
2 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index c65fdde56f..b8abbcce12 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -292,6 +292,9 @@ struct IntelIOMMUState {
uint64_t cap; /* The value of capability reg */
uint64_t ecap; /* The value of extended capability reg */
+ uint64_t host_cap; /* The value of host capability reg */
+ uint64_t host_ecap; /* The value of host ext-capability reg */
+
uint32_t context_cache_gen; /* Should be in [1,MAX] */
GHashTable *iotlb; /* IOTLB */
@@ -314,6 +317,7 @@ struct IntelIOMMUState {
bool dma_translation; /* Whether DMA translation supported */
bool pasid; /* Whether to support PASID */
+ bool cap_finalized; /* Whether VTD capability finalized */
/*
* Protects IOMMU states in general. Currently it protects the
* per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4c1d058ebd..be03fcbf52 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3819,6 +3819,47 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
+static bool vtd_sync_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd,
+ Error **errp)
+{
+ uint64_t addr_width;
+
+ addr_width = (vtd->cap_reg >> 16) & 0x3fULL;
+ if (s->aw_bits > addr_width) {
+ error_setg(errp, "User aw-bits: %u > host address width: %lu",
+ s->aw_bits, addr_width);
+ return false;
+ }
+
+ /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */
+
+ return true;
+}
+
+/*
+ * virtual VT-d which wants nested needs to check the host IOMMU
+ * nesting cap info behind the assigned devices. Thus that vIOMMU
+ * could bind guest page table to host.
+ */
+static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev,
+ Error **errp)
+{
+ struct iommu_hw_info_vtd vtd;
+ enum iommu_hw_info_type type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
+
+ if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) {
+ error_setg(errp, "Failed to get IOMMU capability!!!");
+ return false;
+ }
+
+ if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
+ error_setg(errp, "IOMMU hardware is not compatible!!!");
+ return false;
+ }
+
+ return vtd_sync_hw_info(s, &vtd, errp);
+}
+
static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
IOMMUFDDevice *idev, Error **errp)
{
@@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
return 0;
}
+ if (!vtd_check_idev(s, idev, errp)) {
+ return -1;
+ }
+
vtd_iommu_lock(s);
vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
@@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- memset(s->csr, 0, DMAR_REG_SIZE);
- memset(s->wmask, 0, DMAR_REG_SIZE);
- memset(s->w1cmask, 0, DMAR_REG_SIZE);
- memset(s->womask, 0, DMAR_REG_SIZE);
+ /* CAP/ECAP are initialized in machine create done stage */
+ memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG);
+ memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG);
+ memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG);
+ memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG);
s->root = 0;
s->root_scalable = false;
@@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s)
vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
}
- vtd_cap_init(s);
+ if (!s->cap_finalized) {
+ vtd_cap_init(s);
+ s->host_cap = s->cap;
+ s->host_ecap = s->ecap;
+ }
+
vtd_reset_caches(s);
/* Define registers with default values and bit semantics */
vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
- vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
- vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
@@ -4241,6 +4290,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return true;
}
+static void vtd_setup_capability_reg(IntelIOMMUState *s)
+{
+ vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
+ vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
+}
+
static int vtd_machine_done_notify_one(Object *child, void *unused)
{
IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
@@ -4259,6 +4314,14 @@ static int vtd_machine_done_notify_one(Object *child, void *unused)
static void vtd_machine_done_hook(Notifier *notifier, void *unused)
{
+ IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+ iommu->cap = iommu->host_cap;
+ iommu->ecap = iommu->host_ecap;
+ iommu->cap_finalized = true;
+
+ vtd_setup_capability_reg(iommu);
+
object_child_foreach_recursive(object_get_root(),
vtd_machine_done_notify_one, NULL);
}
@@ -4292,6 +4355,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
QLIST_INIT(&s->vtd_as_with_notifiers);
qemu_mutex_init(&s->iommu_lock);
+ s->cap_finalized = false;
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
memory_region_add_subregion(get_system_memory(),
--
2.34.1
next prev parent reply other threads:[~2024-01-15 10:18 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-15 10:13 [PATCH rfcv1 0/6] Check and sync host IOMMU cap/ecap with vIOMMU Zhenzhong Duan
2024-01-15 10:13 ` [PATCH rfcv1 1/6] backends/iommufd_device: introduce IOMMUFDDevice Zhenzhong Duan
2024-01-17 14:11 ` Eric Auger
2024-01-18 2:58 ` Duan, Zhenzhong
2024-01-18 12:42 ` Eric Auger
2024-01-19 7:31 ` Duan, Zhenzhong
2024-01-22 16:25 ` Cédric Le Goater
2024-01-23 5:51 ` Duan, Zhenzhong
2024-01-23 10:10 ` Eric Auger
2024-01-15 10:13 ` [PATCH rfcv1 2/6] hw/pci: introduce pci_device_set/unset_iommu_device() Zhenzhong Duan
2024-01-17 14:11 ` Eric Auger
2024-01-18 7:58 ` Duan, Zhenzhong
2024-01-22 16:55 ` Cédric Le Goater
2024-01-23 6:37 ` Duan, Zhenzhong
2024-01-23 7:40 ` Cédric Le Goater
2024-01-23 9:25 ` Duan, Zhenzhong
2024-01-23 10:18 ` Eric Auger
2024-01-24 9:23 ` Duan, Zhenzhong
2024-01-15 10:13 ` [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device callback Zhenzhong Duan
2024-01-17 15:44 ` Eric Auger
2024-01-18 8:43 ` Duan, Zhenzhong
2024-01-18 12:34 ` Eric Auger
2024-01-19 7:27 ` Duan, Zhenzhong
2024-01-22 17:09 ` Cédric Le Goater
2024-01-23 9:46 ` Duan, Zhenzhong
2024-01-15 10:13 ` [PATCH rfcv1 4/6] vfio: initialize IOMMUFDDevice and pass to vIOMMU Zhenzhong Duan
2024-01-17 15:37 ` Joao Martins
2024-01-18 8:17 ` Duan, Zhenzhong
2024-01-18 10:17 ` Yi Liu
2024-01-18 10:20 ` Joao Martins
2024-01-17 17:30 ` Eric Auger
2024-01-18 9:23 ` Duan, Zhenzhong
2024-01-22 17:15 ` Cédric Le Goater
2024-01-23 9:46 ` Duan, Zhenzhong
2024-01-23 12:54 ` Cédric Le Goater
2024-01-24 9:26 ` Duan, Zhenzhong
2024-01-15 10:13 ` [PATCH rfcv1 5/6] intel_iommu: extract out vtd_cap_init to initialize cap/ecap Zhenzhong Duan
2024-01-17 17:36 ` Eric Auger
2024-01-15 10:13 ` Zhenzhong Duan [this message]
2024-01-17 17:56 ` [PATCH rfcv1 6/6] intel_iommu: add a framework to check and sync host IOMMU cap/ecap Eric Auger
2024-01-18 9:30 ` Duan, Zhenzhong
2024-01-18 12:40 ` Eric Auger
2024-01-19 11:55 ` Duan, Zhenzhong
2024-01-23 13:10 ` Eric Auger
2024-01-23 8:39 ` Cédric Le Goater
2024-01-23 10:01 ` Duan, Zhenzhong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240115101313.131139-7-zhenzhong.duan@intel.com \
--to=zhenzhong.duan@intel.com \
--cc=alex.williamson@redhat.com \
--cc=chao.p.peng@intel.com \
--cc=clg@redhat.com \
--cc=eduardo@habkost.net \
--cc=eric.auger@redhat.com \
--cc=jasowang@redhat.com \
--cc=jgg@nvidia.com \
--cc=joao.m.martins@oracle.com \
--cc=kevin.tian@intel.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=nicolinc@nvidia.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
--cc=yi.l.liu@intel.com \
--cc=yi.y.sun@intel.com \
--cc=yi.y.sun@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).