From: David Woodhouse <dwmw2@infradead.org>
To: "linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: iommu@lists.linux-foundation.org, Sudeep Dutt <sudeep.dutt@intel.com>
Subject: [RFC PATCH] iommu/vt-d: Add IOTLB flush support for kernel addresses
Date: Tue, 20 Oct 2015 16:52:59 +0100 [thread overview]
Message-ID: <1445356379.4486.56.camel@infradead.org> (raw)
[-- Attachment #1: Type: text/plain, Size: 6764 bytes --]
On top of the tree at git.infradead.org/users/dwmw2/linux-svm.git
(http:// or git://).
For userspace addresses, we use the MMU notifiers and flush the IOTLB
as appropriate.
However, we need to do it for kernel addresses too — which basically
means adding a hook to tlb_flush_kernel_range(). Does this look
reasonable? I was trying to avoid it and insist on supporting addresses
within the kernel's static mapping only. But it doesn't look like
that's a reasonable thing to require.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
arch/x86/mm/tlb.c | 2 ++
drivers/iommu/intel-svm.c | 37 ++++++++++++++++++++++++++++++++++---
include/linux/intel-iommu.h | 6 +++++-
include/linux/intel-svm.h | 13 +++++--------
4 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 0a48ccf..61d9533 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -44,14 +44,11 @@ struct svm_dev_ops {
/*
* The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
+ * for access to kernel addresses. IOTLB flushes are performed as required
+ * by means of a hook from flush_tlb_kernel_range(). This flag is mutually
+ * exclusive with the SVM_FLAG_PRIVATE_PASID flag — there can be only one
+ * PASID used for kernel mode, to keep the performance implications of the
+ * IOTLB flush hook relatively sane.
*/
#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8ddb5d0..40ebe83 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/intel-iommu.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -266,6 +267,7 @@ static void do_kernel_range_flush(void *info)
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
+ intel_iommu_flush_kernel_pasid(start, end);
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a584df0..f8ca3c1 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
#include <linux/pci-ats.h>
#include <linux/dmar.h>
#include <linux/interrupt.h>
+#include <asm/tlbflush.h>
static irqreturn_t prq_event_thread(int irq, void *d);
@@ -264,6 +265,26 @@ static const struct mmu_notifier_ops intel_mmuops = {
.invalidate_range = intel_invalidate_range,
};
+void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long pages;
+
+ if (end == TLB_FLUSH_ALL)
+ pages = end;
+ else
+ pages = (end - start) >> VTD_PAGE_SHIFT;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct intel_svm *svm = rcu_dereference(iommu->kernel_svm);
+ if (svm)
+ intel_flush_svm_range(svm, start, pages, 0, 1);
+ }
+ rcu_read_unlock();
+}
+
static DEFINE_MUTEX(pasid_mutex);
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
@@ -286,6 +307,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
pasid_max = 1 << 20;
if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+ if (flags & SVM_FLAG_PRIVATE_PASID)
+ return -EINVAL;
if (!ecap_srs(iommu->ecap))
return -EINVAL;
} else if (pasid) {
@@ -294,7 +317,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
}
mutex_lock(&pasid_mutex);
- if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ if (SVM_FLAG_SUPERVISOR_MODE)
+ svm = iommu->kernel_svm;
+ else if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
int i;
idr_for_each_entry(&iommu->pasid_idr, svm, i) {
@@ -378,8 +403,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
}
iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
mm = NULL;
- } else
+ } else {
iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+ rcu_assign_pointer(iommu->kernel_svm, svm);
+ }
wmb();
}
list_add_rcu(&sdev->list, &svm->devs);
@@ -432,8 +459,12 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
mmu_notifier_unregister(&svm->notifier, svm->mm);
idr_remove(&svm->iommu->pasid_idr, svm->pasid);
- if (svm->mm)
+ if (svm->mm) {
mmput(svm->mm);
+ } else {
+ rcu_assign_pointer(iommu->kernel_svm, NULL);
+ synchronize_rcu();
+ }
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 821273c..169bc84 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -391,6 +391,7 @@ enum {
struct pasid_entry;
struct pasid_state_entry;
struct page_req_dsc;
+struct intel_svm;
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
@@ -426,6 +427,7 @@ struct intel_iommu {
struct page_req_dsc *prq;
unsigned char prq_name[16]; /* Name for PRQ interrupt */
struct idr pasid_idr;
+ struct intel_svm __rcu *kernel_svm;
#endif
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -496,8 +498,10 @@ struct intel_svm {
extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
+extern void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end);
+#else
+#define intel_iommu_flush_kernel_pasid(start, end) do { ; } while(0)
#endif
-
extern const struct attribute_group *intel_iommu_groups[];
#endif
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]
next reply other threads:[~2015-10-20 15:54 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-20 15:52 David Woodhouse [this message]
2015-10-20 16:03 ` [RFC PATCH] iommu/vt-d: Add IOTLB flush support for kernel addresses Joerg Roedel
2015-10-20 16:17 ` David Woodhouse
2015-10-23 10:20 ` Joerg Roedel
2015-10-23 10:33 ` David Woodhouse
2015-10-23 11:03 ` Joerg Roedel
2015-10-23 11:37 ` David Woodhouse
2015-10-23 12:42 ` Joerg Roedel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1445356379.4486.56.camel@infradead.org \
--to=dwmw2@infradead.org \
--cc=iommu@lists.linux-foundation.org \
--cc=linux-mm@kvack.org \
--cc=sudeep.dutt@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).