From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>
Cc: Kyung Min Park <kyung.min.park@intel.com>,
iommu@lists.linux-foundation.org,
Chuck Lever <chuck.lever@oracle.com>,
Bjorn Helgaas <bhelgaas@google.com>,
Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH 4/7] iommu/vt-d: Add iotlb_sync_map callback
Date: Thu, 4 Feb 2021 09:43:58 +0800 [thread overview]
Message-ID: <20210204014401.2846425-5-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20210204014401.2846425-1-baolu.lu@linux.intel.com>
Some Intel VT-d hardware implementations don't support memory coherency
for page table walk (presented by the Page-Walk-coherency bit in the
ecap register), so that software must flush the corresponding CPU cache
lines explicitly after each page table entry update.
The iommu_map_sg() code iterates through the given scatter-gather list
and invokes iommu_map() for each element in the scatter-gather list,
which calls into the vendor IOMMU driver through iommu_ops callback. As
the result, a single sg mapping may lead to multiple cache line flushes,
which leads to the degradation of I/O performance after the commit
<c588072bba6b5> ("iommu/vt-d: Convert intel iommu driver to the iommu
ops").
Fix this by adding iotlb_sync_map callback and centralizing the clflush
operations after all sg mappings.
Fixes: c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops")
Reported-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://lore.kernel.org/linux-iommu/D81314ED-5673-44A6-B597-090E3CB83EB0@oracle.com/
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Robin Murphy <robin.murphy@arm.com>
[ cel: removed @first_pte, which is no longer used ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://lore.kernel.org/linux-iommu/161177763962.1311.15577661784296014186.stgit@manet.1015granger.net
---
drivers/iommu/intel/iommu.c | 90 ++++++++++++++++++++++++-------------
1 file changed, 60 insertions(+), 30 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ecd36e456de3..ecbd05d8a1fc 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2283,9 +2283,9 @@ static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages, int prot)
{
- struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ struct dma_pte *pte = NULL;
phys_addr_t pteval;
u64 attr;
@@ -2314,7 +2314,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
phys_pfn, nr_pages);
- first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+ pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
if (!pte)
return -ENOMEM;
/* It is large page*/
@@ -2375,34 +2375,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
* recalculate 'pte' and switch back to smaller pages for the
* end of the mapping, if the trailing size is not enough to
* use another superpage (i.e. nr_pages < lvl_pages).
+ *
+ * We leave clflush for the leaf pte changes to iotlb_sync_map()
+ * callback.
*/
pte++;
if (!nr_pages || first_pte_in_page(pte) ||
- (largepage_lvl > 1 && nr_pages < lvl_pages)) {
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
+ (largepage_lvl > 1 && nr_pages < lvl_pages))
pte = NULL;
- }
- }
-
- return 0;
-}
-
-static int
-domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot)
-{
- int iommu_id, ret;
- struct intel_iommu *iommu;
-
- /* Do the real mapping first */
- ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot);
- if (ret)
- return ret;
-
- for_each_domain_iommu(iommu_id, domain) {
- iommu = g_iommus[iommu_id];
- __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
}
return 0;
@@ -4943,7 +4923,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
int prot = 0;
- int ret;
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
@@ -4969,9 +4948,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
/* Round up size to next multiple of PAGE_SIZE, if it and
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
- ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot);
- return ret;
+ return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
+ hpa >> VTD_PAGE_SHIFT, size, prot);
}
static size_t intel_iommu_unmap(struct iommu_domain *domain,
@@ -5454,6 +5432,57 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
+static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
+ unsigned long clf_pages)
+{
+ struct dma_pte *first_pte = NULL, *pte = NULL;
+ unsigned long lvl_pages = 0;
+ int level = 0;
+
+ while (clf_pages > 0) {
+ if (!pte) {
+ level = 0;
+ pte = pfn_to_dma_pte(domain, clf_pfn, &level);
+ if (WARN_ON(!pte))
+ return;
+ first_pte = pte;
+ lvl_pages = lvl_to_nr_pages(level);
+ }
+
+ if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
+ return;
+
+ clf_pages -= lvl_pages;
+ clf_pfn += lvl_pages;
+ pte++;
+
+ if (!clf_pages || first_pte_in_page(pte) ||
+ (level > 1 && clf_pages < lvl_pages)) {
+ domain_flush_cache(domain, first_pte,
+ (void *)pte - (void *)first_pte);
+ pte = NULL;
+ }
+ }
+}
+
+static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long pages = aligned_nrpages(iova, size);
+ unsigned long pfn = iova >> VTD_PAGE_SHIFT;
+ struct intel_iommu *iommu;
+ int iommu_id;
+
+ if (!dmar_domain->iommu_coherency)
+ clflush_sync_map(dmar_domain, pfn, pages);
+
+ for_each_domain_iommu(iommu_id, dmar_domain) {
+ iommu = g_iommus[iommu_id];
+ __mapping_notify_one(iommu, dmar_domain, pfn, pages);
+ }
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -5466,6 +5495,7 @@ const struct iommu_ops intel_iommu_ops = {
.aux_detach_dev = intel_iommu_aux_detach_device,
.aux_get_pasid = intel_iommu_aux_get_pasid,
.map = intel_iommu_map,
+ .iotlb_sync_map = intel_iommu_iotlb_sync_map,
.unmap = intel_iommu_unmap,
.flush_iotlb_all = intel_flush_iotlb_all,
.iotlb_sync = intel_iommu_tlb_sync,
--
2.25.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2021-02-04 1:53 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-04 1:43 [PATCH 0/7] [PULL REQUEST] iommu/vt-d: Update for v5.12 Lu Baolu
2021-02-04 1:43 ` [PATCH 1/7] iommu/vt-d: Fix 'physical' typos Lu Baolu
2021-02-04 1:43 ` [PATCH 2/7] iommu/vt-d: Audit IOMMU Capabilities and add helper functions Lu Baolu
2021-02-04 1:43 ` [PATCH 3/7] iommu/vt-d: Move capability check code to cap_audit files Lu Baolu
2021-02-04 1:43 ` Lu Baolu [this message]
2021-02-04 1:43 ` [PATCH 5/7] iommu/vt-d: Add new enum value and structure for SATC Lu Baolu
2021-02-04 1:44 ` [PATCH 6/7] iommu/vt-d: Parse SATC reporting structure Lu Baolu
2021-02-04 1:44 ` [PATCH 7/7] iommu/vt-d: Apply SATC policy Lu Baolu
2021-02-04 5:32 ` [PATCH 0/7] [PULL REQUEST] iommu/vt-d: Update for v5.12 Lu Baolu
2021-02-04 11:52 ` Lu Baolu
2021-02-04 13:42 ` Joerg Roedel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210204014401.2846425-5-baolu.lu@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=bhelgaas@google.com \
--cc=chuck.lever@oracle.com \
--cc=iommu@lists.linux-foundation.org \
--cc=joro@8bytes.org \
--cc=kyung.min.park@intel.com \
--cc=robin.murphy@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox