From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: linuxppc-dev@lists.ozlabs.org
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>,
Alex Williamson <alex.williamson@redhat.com>,
Paul Mackerras <paulus@samba.org>,
linux-kernel@vger.kernel.org
Subject: [PATCH kernel v8 28/31] powerpc/mmu: Add userspace-to-physical addresses translation cache
Date: Fri, 10 Apr 2015 16:31:10 +1000 [thread overview]
Message-ID: <1428647473-11738-29-git-send-email-aik@ozlabs.ru> (raw)
In-Reply-To: <1428647473-11738-1-git-send-email-aik@ozlabs.ru>
We are adding support for DMA memory pre-registration to be used in
conjunction with VFIO. The idea is that the userspace which is going to
run a guest may want to pre-register a user space memory region so
it all gets pinned once and never goes away. Having this done,
a hypervisor will not have to pin/unpin pages on every DMA map/unmap
request. This is going to help with multiple pinning of the same memory
and in-kernel acceleration of DMA requests.
This adds a list of memory regions to mm_context_t. Each region consists
of a header and a list of physical addresses. This adds API to:
1. register/unregister memory regions;
2. do final cleanup (which puts all pre-registered pages);
3. do userspace to physical address translation;
4. manage a mapped pages counter; when it is zero, it is safe to
unregister the region.
Multiple registration of the same region is allowed, kref is used to
track the number of registrations.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v8:
* s/mm_iommu_table_group_mem_t/struct mm_iommu_table_group_mem_t/
* fixed error fallback look (s/[i]/[j]/)
---
arch/powerpc/include/asm/mmu-hash64.h | 3 +
arch/powerpc/include/asm/mmu_context.h | 17 +++
arch/powerpc/mm/Makefile | 1 +
arch/powerpc/mm/mmu_context_hash64.c | 6 +
arch/powerpc/mm/mmu_context_hash64_iommu.c | 215 +++++++++++++++++++++++++++++
5 files changed, 242 insertions(+)
create mode 100644 arch/powerpc/mm/mmu_context_hash64_iommu.c
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 4f13c3e..83214c4 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -535,6 +535,9 @@ typedef struct {
/* for 4K PTE fragment support */
void *pte_frag;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ struct list_head iommu_group_mem_list;
+#endif
} mm_context_t;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 73382eb..d6116ca 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -16,6 +16,23 @@
*/
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
extern void destroy_context(struct mm_struct *mm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct mm_iommu_table_group_mem_t;
+
+extern bool mm_iommu_preregistered(void);
+extern long mm_iommu_alloc(unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_get(unsigned long ua,
+ unsigned long entries);
+extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_cleanup(mm_context_t *ctx);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+ unsigned long size);
+extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa);
+extern long mm_iommu_mapped_update(struct mm_iommu_table_group_mem_t *mem,
+ bool inc);
+#endif
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 438dcd3..49fbfc7 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -35,3 +35,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
+obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_hash64_iommu.o
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 178876ae..eb3080c 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -89,6 +89,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
#ifdef CONFIG_PPC_64K_PAGES
mm->context.pte_frag = NULL;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
+#endif
return 0;
}
@@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm)
void destroy_context(struct mm_struct *mm)
{
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_cleanup(&mm->context);
+#endif
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
diff --git a/arch/powerpc/mm/mmu_context_hash64_iommu.c b/arch/powerpc/mm/mmu_context_hash64_iommu.c
new file mode 100644
index 0000000..af7668c
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_hash64_iommu.c
@@ -0,0 +1,215 @@
+/*
+ * IOMMU helpers in MMU context.
+ *
+ * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/vmalloc.h>
+#include <linux/kref.h>
+#include <asm/mmu_context.h>
+
+struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+ struct kref kref; /* one reference per VFIO container */
+ atomic_t mapped; /* number of currently mapped pages */
+ u64 ua; /* userspace address */
+ u64 entries; /* number of entries in hpas[] */
+ u64 *hpas; /* vmalloc'ed */
+};
+
+bool mm_iommu_preregistered(void)
+{
+ if (!current || !current->mm)
+ return false;
+
+ return !list_empty(¤t->mm->context.iommu_group_mem_list);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+long mm_iommu_alloc(unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ long i, j;
+ struct page *page = NULL;
+
+ list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua == ua) && (mem->entries == entries))
+ return -EBUSY;
+
+ /* Overlap? */
+ if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
+ (ua < (mem->ua + (mem->entries << PAGE_SHIFT))))
+ return -EINVAL;
+ }
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+
+ mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
+ if (!mem->hpas) {
+ kfree(mem);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+ 1/* pages */, 1/* iswrite */, &page)) {
+ for (j = 0; j < i; ++j)
+ put_page(pfn_to_page(
+ mem->hpas[j] >> PAGE_SHIFT));
+ vfree(mem->hpas);
+ kfree(mem);
+ return -EFAULT;
+ }
+
+ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+ }
+
+ kref_init(&mem->kref);
+ atomic_set(&mem->mapped, 0);
+ mem->ua = ua;
+ mem->entries = entries;
+ *pmem = mem;
+
+ list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_alloc);
+
+static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+{
+ long i;
+ struct page *page = NULL;
+
+ for (i = 0; i < mem->entries; ++i) {
+ if (!mem->hpas[i])
+ continue;
+
+ page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+ if (!page)
+ continue;
+
+ put_page(page);
+ mem->hpas[i] = 0;
+ }
+}
+
+static void mm_iommu_free(struct rcu_head *head)
+{
+ struct mm_iommu_table_group_mem_t *mem = container_of(head,
+ struct mm_iommu_table_group_mem_t, rcu);
+
+ mm_iommu_unpin(mem);
+ vfree(mem->hpas);
+ kfree(mem);
+}
+
+static void mm_iommu_release(struct kref *kref)
+{
+ struct mm_iommu_table_group_mem_t *mem = container_of(kref,
+ struct mm_iommu_table_group_mem_t, kref);
+
+ list_del_rcu(&mem->next);
+ call_rcu(&mem->rcu, mm_iommu_free);
+}
+
+struct mm_iommu_table_group_mem_t *mm_iommu_get(unsigned long ua,
+ unsigned long entries)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+
+ list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua == ua) && (mem->entries == entries)) {
+ kref_get(&mem->kref);
+ return mem;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
+{
+ if (atomic_read(&mem->mapped))
+ return -EBUSY;
+
+ kref_put(&mem->kref, mm_iommu_release);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+ unsigned long size)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_rcu(mem,
+ ¤t->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa)
+{
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ u64 *va = &mem->hpas[entry];
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ *hpa = *va | (ua & ~PAGE_MASK);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+long mm_iommu_mapped_update(struct mm_iommu_table_group_mem_t *mem, bool inc)
+{
+ long ret = 0;
+
+ if (inc)
+ atomic_inc(&mem->mapped);
+ else
+ ret = atomic_dec_if_positive(&mem->mapped);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_update);
+
+void mm_iommu_cleanup(mm_context_t *ctx)
+{
+ while (!list_empty(&ctx->iommu_group_mem_list)) {
+ struct mm_iommu_table_group_mem_t *mem;
+
+ mem = list_first_entry(&ctx->iommu_group_mem_list,
+ struct mm_iommu_table_group_mem_t, next);
+ mm_iommu_release(&mem->kref);
+ }
+}
--
2.0.0
next prev parent reply other threads:[~2015-04-10 6:33 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-10 6:30 [PATCH kernel v8 00/31] powerpc/iommu/vfio: Enable Dynamic DMA windows Alexey Kardashevskiy
2015-04-10 6:30 ` [PATCH kernel v8 01/31] vfio: powerpc/spapr: Move page pinning from arch code to VFIO IOMMU driver Alexey Kardashevskiy
2015-04-15 3:56 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 02/31] vfio: powerpc/spapr: Do cleanup when releasing the group Alexey Kardashevskiy
2015-04-15 4:00 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 03/31] vfio: powerpc/spapr: Check that IOMMU page is fully contained by system page Alexey Kardashevskiy
2015-04-15 4:03 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 04/31] vfio: powerpc/spapr: Use it_page_size Alexey Kardashevskiy
2015-04-10 6:30 ` [PATCH kernel v8 05/31] vfio: powerpc/spapr: Move locked_vm accounting to helpers Alexey Kardashevskiy
2015-04-15 4:09 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 06/31] vfio: powerpc/spapr: Disable DMA mappings on disabled container Alexey Kardashevskiy
2015-04-15 7:05 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 07/31] vfio: powerpc/spapr: Moving pinning/unpinning to helpers Alexey Kardashevskiy
2015-04-15 7:10 ` David Gibson
2015-04-15 12:09 ` Alexey Kardashevskiy
2015-04-10 6:30 ` [PATCH kernel v8 08/31] vfio: powerpc/spapr: Rework groups attaching Alexey Kardashevskiy
2015-04-15 7:14 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 09/31] powerpc/powernv: Do not set "read" flag if direction==DMA_NONE Alexey Kardashevskiy
2015-04-15 7:17 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 10/31] powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table Alexey Kardashevskiy
2015-04-15 7:23 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 11/31] powerpc/iommu: Introduce iommu_table_alloc() helper Alexey Kardashevskiy
2015-04-16 5:31 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 12/31] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group Alexey Kardashevskiy
2015-04-16 5:55 ` David Gibson
2015-04-16 15:48 ` Alexey Kardashevskiy
2015-04-20 2:36 ` David Gibson
2015-04-17 9:46 ` Alexey Kardashevskiy
2015-04-20 2:37 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 13/31] vfio: powerpc/spapr: powerpc/iommu: Rework IOMMU ownership control Alexey Kardashevskiy
2015-04-16 6:00 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: " Alexey Kardashevskiy
2015-04-16 6:07 ` David Gibson
2015-04-17 10:09 ` Alexey Kardashevskiy
2015-04-20 2:44 ` David Gibson
2015-04-20 6:55 ` Alexey Kardashevskiy
2015-04-21 9:43 ` David Gibson
2015-04-21 11:47 ` Alexey Kardashevskiy
2015-04-22 5:22 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 15/31] powerpc/iommu: Fix IOMMU ownership control functions Alexey Kardashevskiy
2015-04-10 21:28 ` Alex Williamson
2015-04-16 6:10 ` David Gibson
2015-04-17 10:16 ` Alexey Kardashevskiy
2015-04-20 2:46 ` David Gibson
2015-04-20 6:34 ` Alexey Kardashevskiy
2015-04-21 7:12 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 16/31] powerpc/powernv/ioda/ioda2: Rework tce_build()/tce_free() Alexey Kardashevskiy
2015-04-16 6:17 ` David Gibson
2015-04-10 6:30 ` [PATCH kernel v8 17/31] powerpc/iommu/powernv: Release replaced TCE Alexey Kardashevskiy
2015-04-16 6:26 ` David Gibson
2015-04-17 10:37 ` Alexey Kardashevskiy
2015-04-20 2:50 ` David Gibson
2015-04-10 6:31 ` [PATCH kernel v8 18/31] powerpc/powernv/ioda2: Rework iommu_table creation Alexey Kardashevskiy
2015-04-16 6:29 ` David Gibson
2015-04-10 6:31 ` [PATCH kernel v8 19/31] powerpc/powernv/ioda2: Introduce pnv_pci_ioda2_create_table/pnc_pci_free_table Alexey Kardashevskiy
2015-04-16 6:42 ` David Gibson
2015-04-10 6:31 ` [PATCH kernel v8 20/31] powerpc/powernv/ioda2: Introduce pnv_pci_ioda2_set_window Alexey Kardashevskiy
2015-04-16 6:43 ` David Gibson
2015-04-10 6:31 ` [PATCH kernel v8 21/31] powerpc/iommu: Split iommu_free_table into 2 helpers Alexey Kardashevskiy
2015-04-16 6:46 ` David Gibson
2015-04-16 16:29 ` Alexey Kardashevskiy
2015-04-20 2:51 ` David Gibson
2015-04-10 6:31 ` [PATCH kernel v8 22/31] powerpc/powernv: Implement multilevel TCE tables Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 23/31] powerpc/powernv: Change prototypes to receive iommu Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 24/31] powerpc/powernv/ioda: Define and implement DMA table/window management callbacks Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 25/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework ownership Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 26/31] powerpc/iommu: Add userspace view of TCE table Alexey Kardashevskiy
2015-04-10 21:31 ` Alex Williamson
2015-04-10 6:31 ` [PATCH kernel v8 27/31] powerpc/iommu/ioda2: Add get_table_size() to calculate the size of fiture table Alexey Kardashevskiy
2015-04-10 6:31 ` Alexey Kardashevskiy [this message]
2015-04-10 6:31 ` [PATCH kernel v8 29/31] vfio: powerpc/spapr: Register memory and define IOMMU v2 Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 30/31] vfio: powerpc/spapr: Support multiple groups in one container if possible Alexey Kardashevskiy
2015-04-10 6:31 ` [PATCH kernel v8 31/31] vfio: powerpc/spapr: Support Dynamic DMA windows Alexey Kardashevskiy
2015-04-10 22:13 ` [PATCH kernel v8 00/31] powerpc/iommu/vfio: Enable " Alex Williamson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1428647473-11738-29-git-send-email-aik@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=alex.williamson@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).