Linux IOMMU Development
 help / color / mirror / Atom feed
From: Yi Liu <yi.l.liu@intel.com>
To: joro@8bytes.org, alex.williamson@redhat.com, jgg@nvidia.com,
	kevin.tian@intel.com, robin.murphy@arm.com,
	baolu.lu@linux.intel.com
Cc: cohuck@redhat.com, eric.auger@redhat.com, nicolinc@nvidia.com,
	kvm@vger.kernel.org, mjrosato@linux.ibm.com,
	chao.p.peng@linux.intel.com, yi.l.liu@intel.com,
	yi.y.sun@linux.intel.com, peterx@redhat.com, jasowang@redhat.com,
	shameerali.kolothum.thodi@huawei.com, lulu@redhat.com,
	suravee.suthikulpanit@amd.com, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
	zhenzhong.duan@intel.com
Subject: [PATCH v3 10/10] iommu/vt-d: Disallow nesting on domains with read-only mappings
Date: Thu, 11 May 2023 07:51:10 -0700	[thread overview]
Message-ID: <20230511145110.27707-11-yi.l.liu@intel.com> (raw)
In-Reply-To: <20230511145110.27707-1-yi.l.liu@intel.com>

From: Lu Baolu <baolu.lu@linux.intel.com>

When remapping hardware is configured by system software in scalable mode
as Nested (PGTT=011b) and with PWSNP field Set in the PASID-table-entry,
it may Set Accessed bit and Dirty bit (and Extended Access bit if enabled)
in first-stage page-table entries even when second-stage mappings indicate
that corresponding first-stage page-table is Read-Only.

As the result, contents of pages designated by VMM as Read-Only can be
modified by IOMMU via PML5E (PML4E for 4-level tables) access as part of
address translation process due to DMAs issued by Guest.

Disallow the nested translation when there are read-only pages in the
corresponding second-stage mappings. And, no read-only pages are allowed
to be configured in the second-stage table of a nested translation.
For the latter, an alternative is to disallow read-only mappings in
any stage-2 domain as long as it's ever been used as a parent. In this
way, we can simply replace the user counter with a flag.

In concept if the user understands this errata and does expect to
enable nested translation it should never install any RO mapping
in stage-2 in the entire VM life cycle."

Reference from Sapphire Rapids Specification Update [1], errata details,
SPR17.

[1] https://www.intel.com/content/www/us/en/content-details/772415/content-details.html

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/intel/iommu.c  | 13 +++++++++++++
 drivers/iommu/intel/iommu.h  |  4 ++++
 drivers/iommu/intel/nested.c | 22 ++++++++++++++++++++--
 include/uapi/linux/iommufd.h | 12 +++++++++++-
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 20d4ae1cb8a6..42288bd449a0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2150,6 +2150,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	struct dma_pte *first_pte = NULL, *pte = NULL;
 	unsigned int largepage_lvl = 0;
 	unsigned long lvl_pages = 0;
+	unsigned long flags;
 	phys_addr_t pteval;
 	u64 attr;
 
@@ -2159,6 +2160,17 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
 		return -EINVAL;
 
+	if (!(prot & DMA_PTE_WRITE) && !domain->read_only_mapped) {
+		spin_lock_irqsave(&domain->lock, flags);
+		if (domain->nested_users > 0) {
+			spin_unlock_irqrestore(&domain->lock, flags);
+			return -EINVAL;
+		}
+
+		domain->read_only_mapped = true;
+		spin_unlock_irqrestore(&domain->lock, flags);
+	}
+
 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
 	attr |= DMA_FL_PTE_PRESENT;
 	if (domain->use_first_level) {
@@ -4756,6 +4768,7 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length)
 	if (!vtd)
 		return ERR_PTR(-ENOMEM);
 
+	vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
 	vtd->cap_reg = iommu->cap;
 	vtd->ecap_reg = iommu->ecap;
 	*length = sizeof(*vtd);
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 581596d90c1b..95644c6815af 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -616,6 +616,10 @@ struct dmar_domain {
 			int		agaw;
 			/* maximum mapped address */
 			u64		max_addr;
+			/* domain has mappings with read-only permission */
+			bool		read_only_mapped;
+			/* user nested domain count */
+			int		nested_users;
 		};
 
 		/* Nested user domain */
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index d13fbcd3f5a6..9092ce28382c 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -61,7 +61,14 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
 
 static void intel_nested_domain_free(struct iommu_domain *domain)
 {
-	kfree(to_dmar_domain(domain));
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct dmar_domain *s2_domain = dmar_domain->s2_domain;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s2_domain->lock, flags);
+	s2_domain->nested_users--;
+	spin_unlock_irqrestore(&s2_domain->lock, flags);
+	kfree(dmar_domain);
 }
 
 static void intel_nested_invalidate(struct device *dev,
@@ -143,14 +150,25 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *s2_domain,
 					       const union iommu_domain_user_data *user_data)
 {
 	const struct iommu_hwpt_intel_vtd *vtd = (struct iommu_hwpt_intel_vtd *)user_data;
+	struct dmar_domain *s2_dmar_domain = to_dmar_domain(s2_domain);
 	struct dmar_domain *domain;
+	unsigned long flags;
 
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
 	if (!domain)
 		return NULL;
 
+	spin_lock_irqsave(&s2_dmar_domain->lock, flags);
+	if (s2_dmar_domain->read_only_mapped) {
+		spin_unlock_irqrestore(&s2_dmar_domain->lock, flags);
+		kfree(domain);
+		return NULL;
+	}
+	s2_dmar_domain->nested_users++;
+	spin_unlock_irqrestore(&s2_dmar_domain->lock, flags);
+
 	domain->use_first_level = true;
-	domain->s2_domain = to_dmar_domain(s2_domain);
+	domain->s2_domain = s2_dmar_domain;
 	domain->s1_pgtbl = vtd->pgtbl_addr;
 	domain->s1_cfg = *vtd;
 	domain->domain.ops = &intel_nested_domain_ops;
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index b46270a4cf46..8626f36e0353 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -471,10 +471,20 @@ enum iommu_hw_info_type {
 	IOMMU_HW_INFO_TYPE_INTEL_VTD,
 };
 
+/**
+ * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
+ * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow nesting on domains
+ *                                   with read-only mapping.
+ *                                   https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
+ */
+enum iommu_hw_info_vtd_flags {
+	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
+};
+
 /**
  * struct iommu_hw_info_vtd - Intel VT-d hardware information
  *
- * @flags: Must be 0
+ * @flags: Combination of enum iommu_hw_info_vtd_flags
  * @__reserved: Must be 0
  *
  * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
-- 
2.34.1


  parent reply	other threads:[~2023-05-11 14:51 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-11 14:51 [PATCH v3 00/10] Add Intel VT-d nested translation Yi Liu
2023-05-11 14:51 ` [PATCH v3 01/10] iommufd: Add data structure for Intel VT-d stage-1 domain allocation Yi Liu
2023-05-24  6:59   ` Tian, Kevin
2023-05-25  2:28   ` Zhang, Tina
2023-05-29 20:00     ` Jason Gunthorpe
2023-05-29 19:53   ` Jason Gunthorpe
2023-05-11 14:51 ` [PATCH v3 02/10] iommu/vt-d: Extend dmar_domain to support nested domain Yi Liu
2023-05-24  7:02   ` Tian, Kevin
2023-05-26  2:56     ` Baolu Lu
2023-05-11 14:51 ` [PATCH v3 03/10] iommu/vt-d: Add helper for nested domain allocation Yi Liu
2023-05-11 14:51 ` [PATCH v3 04/10] iommu/vt-d: Add helper to setup pasid nested translation Yi Liu
2023-05-24  7:16   ` Tian, Kevin
2023-05-26  4:16     ` Baolu Lu
2023-06-07  8:34       ` Liu, Yi L
2023-06-08  3:32         ` Liu, Yi L
2023-06-08  3:35       ` Liu, Yi L
2023-06-08  3:37         ` Baolu Lu
2023-05-11 14:51 ` [PATCH v3 05/10] iommu/vt-d: Make domain attach helpers to be extern Yi Liu
2023-05-11 14:51 ` [PATCH v3 06/10] iommu/vt-d: Set the nested domain to a device Yi Liu
2023-05-24  7:22   ` Tian, Kevin
2023-05-26  4:24     ` Baolu Lu
2023-05-11 14:51 ` [PATCH v3 07/10] iommu/vt-d: Add iotlb flush for nested domain Yi Liu
2023-05-24  7:33   ` Tian, Kevin
2023-06-08  7:14     ` Liu, Yi L
2023-06-08  8:07       ` Baolu Lu
2023-06-20  6:22         ` Liu, Yi L
2023-05-11 14:51 ` [PATCH v3 08/10] iommu/vt-d: Add nested domain allocation Yi Liu
2023-05-11 14:51 ` [PATCH v3 09/10] iommu/vt-d: Implement hw_info for iommu capability query Yi Liu
2023-05-11 14:51 ` Yi Liu [this message]
2023-05-24  7:44   ` [PATCH v3 10/10] iommu/vt-d: Disallow nesting on domains with read-only mappings Tian, Kevin
2023-05-26  4:28     ` Baolu Lu
2023-05-24  8:59 ` [PATCH v3 00/10] Add Intel VT-d nested translation Tian, Kevin
2023-05-25 18:06   ` Alex Williamson
2023-05-26 11:25     ` Tian, Kevin
2023-05-29 18:43   ` Jason Gunthorpe
2023-05-30  0:16     ` Alex Williamson
2023-05-30 16:42       ` Jason Gunthorpe
2023-06-14  8:07     ` Tian, Kevin
2023-06-14 11:52       ` Jason Gunthorpe
2023-06-16  2:29         ` Tian, Kevin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230511145110.27707-11-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=cohuck@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lulu@redhat.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=yi.y.sun@linux.intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox