From: Qiaowei Ren <qiaowei.ren@intel.com>
To: "H. Peter Anvin" <hpa@zytor.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>,
Dave Hansen <dave.hansen@intel.com>
Cc: x86@kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Qiaowei Ren <qiaowei.ren@intel.com>
Subject: [PATCH v8 09/10] x86, mpx: cleanup unused bound tables
Date: Thu, 11 Sep 2014 16:46:49 +0800 [thread overview]
Message-ID: <1410425210-24789-10-git-send-email-qiaowei.ren@intel.com> (raw)
In-Reply-To: <1410425210-24789-1-git-send-email-qiaowei.ren@intel.com>
Since the kernel allocated those tables on-demand without userspace
knowledge, it is also responsible for freeing them when the associated
mappings go away.
Here, the solution for this issue is to hook do_munmap() to check
whether one process is MPX enabled. If yes, those bounds tables covered
in the virtual address region which is being unmapped will be freed also.
Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
---
arch/x86/include/asm/mmu_context.h | 16 +++
arch/x86/include/asm/mpx.h | 9 ++
arch/x86/mm/mpx.c | 252 ++++++++++++++++++++++++++++++++++++
include/asm-generic/mmu_context.h | 6 +
mm/mmap.c | 2 +
5 files changed, 285 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 166af2a..d13e01c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -10,6 +10,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
+#include <asm/mpx.h>
#ifndef CONFIG_PARAVIRT
#include <asm-generic/mm_hooks.h>
@@ -102,4 +103,19 @@ do { \
} while (0)
#endif
+static inline void arch_unmap(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_X86_INTEL_MPX
+ /*
+ * Check whether this vma comes from MPX-enabled application.
+ * If so, release this vma related bound tables.
+ */
+ if (mm->bd_addr && !(vma->vm_flags & VM_MPX))
+ mpx_unmap(mm, start, end);
+
+#endif
+}
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 6cb0853..e848a74 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -42,6 +42,13 @@
#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
+#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
+#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
+#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
+ MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
+#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
+ MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
+
#define MPX_BNDSTA_ERROR_CODE 0x3
#define MPX_BNDCFG_ENABLE_FLAG 0x1
#define MPX_BD_ENTRY_VALID_FLAG 0x1
@@ -63,6 +70,8 @@ struct mpx_insn {
#define MAX_MPX_INSN_SIZE 15
unsigned long mpx_mmap(unsigned long len);
+void mpx_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
#ifdef CONFIG_X86_INTEL_MPX
int do_mpx_bt_fault(struct xsave_struct *xsave_buf);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e1b28e6..feb1f01 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -1,7 +1,16 @@
+/*
+ * mpx.c - Memory Protection eXtensions
+ *
+ * Copyright (c) 2014, Intel Corporation.
+ * Qiaowei Ren <qiaowei.ren@intel.com>
+ * Dave Hansen <dave.hansen@intel.com>
+ */
+
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <asm/mpx.h>
#include <asm/mman.h>
+#include <asm/mmu_context.h>
#include <linux/sched/sysctl.h>
static const char *mpx_mapping_name(struct vm_area_struct *vma)
@@ -77,3 +86,246 @@ out:
up_write(&mm->mmap_sem);
return ret;
}
+
+/*
+ * Get the base of bounds tables pointed by specific bounds
+ * directory entry.
+ */
+static int get_bt_addr(long __user *bd_entry, unsigned long *bt_addr)
+{
+ int valid;
+
+ if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*(bd_entry))))
+ return -EFAULT;
+
+ pagefault_disable();
+ if (get_user(*bt_addr, bd_entry))
+ goto out;
+ pagefault_enable();
+
+ valid = *bt_addr & MPX_BD_ENTRY_VALID_FLAG;
+ *bt_addr &= MPX_BT_ADDR_MASK;
+
+ /*
+ * If this bounds directory entry is nonzero, and meanwhile
+ * the valid bit is zero, one SIGSEGV will be produced due to
+ * this unexpected situation.
+ */
+ if (!valid && *bt_addr)
+ return -EINVAL;
+ if (!valid)
+ return -ENOENT;
+
+ return 0;
+
+out:
+ pagefault_enable();
+ return -EFAULT;
+}
+
+/*
+ * Free the backing physical pages of bounds table 'bt_addr'.
+ * Assume start...end is within that bounds table.
+ */
+static int __must_check zap_bt_entries(struct mm_struct *mm,
+ unsigned long bt_addr,
+ unsigned long start, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ /* Find the vma which overlaps this bounds table */
+ vma = find_vma(mm, bt_addr);
+ /*
+ * The table entry comes from userspace and could be
+ * pointing anywhere, so make sure it is at least
+ * pointing to valid memory.
+ */
+ if (!vma || !(vma->vm_flags & VM_MPX) ||
+ vma->vm_start > bt_addr ||
+ vma->vm_end < bt_addr+MPX_BT_SIZE_BYTES)
+ return -EINVAL;
+
+ zap_page_range(vma, start, end - start, NULL);
+ return 0;
+}
+
+static int __must_check unmap_single_bt(struct mm_struct *mm,
+ long __user *bd_entry, unsigned long bt_addr)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = user_atomic_cmpxchg_inatomic(&bt_addr, bd_entry,
+ bt_addr | MPX_BD_ENTRY_VALID_FLAG, 0);
+ pagefault_enable();
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * to avoid recursion, do_munmap() will check whether it comes
+ * from one bounds table through VM_MPX flag.
+ */
+ return do_munmap(mm, bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
+}
+
+/*
+ * If the bounds table pointed by bounds directory 'bd_entry' is
+ * not shared, unmap this whole bounds table. Otherwise, only free
+ * those backing physical pages of bounds table entries covered
+ * in this virtual address region start...end.
+ */
+static int __must_check unmap_shared_bt(struct mm_struct *mm,
+ long __user *bd_entry, unsigned long start,
+ unsigned long end, bool prev_shared, bool next_shared)
+{
+ unsigned long bt_addr;
+ int ret;
+
+ ret = get_bt_addr(bd_entry, &bt_addr);
+ if (ret)
+ return ret;
+
+ if (prev_shared && next_shared)
+ ret = zap_bt_entries(mm, bt_addr,
+ bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
+ bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
+ else if (prev_shared)
+ ret = zap_bt_entries(mm, bt_addr,
+ bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
+ bt_addr+MPX_BT_SIZE_BYTES);
+ else if (next_shared)
+ ret = zap_bt_entries(mm, bt_addr, bt_addr,
+ bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
+ else
+ ret = unmap_single_bt(mm, bd_entry, bt_addr);
+
+ return ret;
+}
+
+/*
+ * A virtual address region being munmap()ed might share bounds table
+ * with adjacent VMAs. We only need to free the backing physical
+ * memory of these shared bounds tables entries covered in this virtual
+ * address region.
+ *
+ * the VMAs covering the virtual address region start...end have already
+ * been split if necessary and removed from the VMA list.
+ */
+static int __must_check unmap_side_bts(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ int ret;
+ long __user *bde_start, *bde_end;
+ struct vm_area_struct *prev, *next;
+ bool prev_shared = false, next_shared = false;
+
+ bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
+ bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
+
+ /*
+ * Check whether bde_start and bde_end are shared with adjacent
+ * VMAs. Because the VMAs covering the virtual address region
+ * start...end have already been removed from the VMA list, if
+ * next is not NULL it will satisfy start < end <= next->vm_start.
+ * And if prev is not NULL, prev->vm_end <= start < end.
+ */
+ next = find_vma_prev(mm, start, &prev);
+ if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1))
+ == bde_start)
+ prev_shared = true;
+ if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start))
+ == bde_end)
+ next_shared = true;
+
+ /*
+ * This virtual address region being munmap()ed is only
+ * covered by one bounds table.
+ *
+ * In this case, if this table is also shared with adjacent
+ * VMAs, only part of the backing physical memory of the bounds
+ * table need be freeed. Otherwise the whole bounds table need
+ * be unmapped.
+ */
+ if (bde_start == bde_end) {
+ return unmap_shared_bt(mm, bde_start, start, end,
+ prev_shared, next_shared);
+ }
+
+ /*
+ * If more than one bounds tables are covered in this virtual
+ * address region being munmap()ed, we need to separately check
+ * whether bde_start and bde_end are shared with adjacent VMAs.
+ */
+ ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false);
+ if (ret)
+ return ret;
+
+ ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int __must_check mpx_try_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ int ret;
+ long __user *bd_entry, *bde_start, *bde_end;
+ unsigned long bt_addr;
+
+ /*
+ * unmap bounds tables pointed out by start/end bounds directory
+ * entries, or only free part of their backing physical memroy
+ * if they are shared with adjacent VMAs.
+ */
+ ret = unmap_side_bts(mm, start, end);
+ if (ret == -EFAULT)
+ return ret;
+
+ /*
+ * unmap those bounds table which are entirely covered in this
+ * virtual address region.
+ */
+ bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
+ bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
+ for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) {
+ ret = get_bt_addr(bd_entry, &bt_addr);
+ /*
+ * A fault means we have to drop mmap_sem,
+ * perform the fault, and retry this somehow.
+ */
+ if (ret == -EFAULT)
+ return ret;
+ /*
+ * Any other issue (like a bad bounds-directory)
+ * we can try the next one.
+ */
+ if (ret)
+ continue;
+
+ ret = unmap_single_bt(mm, bd_entry, bt_addr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Free unused bounds tables covered in a virtual address region being
+ * munmap()ed. Assume end > start.
+ *
+ * This function will be called by do_munmap(), and the VMAs covering
+ * the virtual address region start...end have already been split if
+ * necessary and remvoed from the VMA list.
+ */
+void mpx_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ int ret;
+
+ ret = mpx_try_unmap(mm, start, end);
+ if (ret == -EINVAL)
+ force_sig(SIGSEGV, current);
+}
diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h
index a7eec91..ac558ca 100644
--- a/include/asm-generic/mmu_context.h
+++ b/include/asm-generic/mmu_context.h
@@ -42,4 +42,10 @@ static inline void activate_mm(struct mm_struct *prev_mm,
{
}
+static inline void arch_unmap(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
#endif /* __ASM_GENERIC_MMU_CONTEXT_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4..abe533f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2583,6 +2583,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
/* Fix up all other VM information */
remove_vma_list(mm, vma);
+ arch_unmap(mm, vma, start, end);
+
return 0;
}
--
1.7.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2014-09-11 8:54 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-11 8:46 [PATCH v8 00/10] Intel MPX support Qiaowei Ren
2014-09-11 8:46 ` [PATCH v8 01/10] x86, mpx: introduce VM_MPX to indicate that a VMA is MPX specific Qiaowei Ren
2014-09-11 8:46 ` [PATCH v8 02/10] x86, mpx: add MPX specific mmap interface Qiaowei Ren
2014-09-11 8:46 ` [PATCH v8 03/10] x86, mpx: add macro cpu_has_mpx Qiaowei Ren
2014-09-11 8:46 ` [PATCH v8 04/10] x86, mpx: hook #BR exception handler to allocate bound tables Qiaowei Ren
2014-09-12 22:58 ` Dave Hansen
2014-09-13 7:24 ` Ren, Qiaowei
2014-09-24 14:40 ` Dave Hansen
2014-09-11 8:46 ` [PATCH v8 05/10] x86, mpx: extend siginfo structure to include bound violation information Qiaowei Ren
2014-09-11 8:46 ` [PATCH v8 06/10] mips: sync struct siginfo with general version Qiaowei Ren
2014-09-11 22:13 ` Thomas Gleixner
2014-09-12 2:54 ` Ren, Qiaowei
2014-09-12 8:17 ` Thomas Gleixner
2014-09-13 7:13 ` Ren, Qiaowei
2014-09-11 8:46 ` [PATCH v8 07/10] x86, mpx: decode MPX instruction to get bound violation information Qiaowei Ren
2014-09-11 22:18 ` Thomas Gleixner
2014-09-11 22:32 ` Dave Hansen
2014-09-11 22:35 ` H. Peter Anvin
2014-09-11 23:37 ` Thomas Gleixner
2014-09-12 4:44 ` H. Peter Anvin
2014-09-12 13:10 ` Thomas Gleixner
2014-09-12 13:39 ` H. Peter Anvin
2014-09-12 17:48 ` Thomas Gleixner
2014-09-12 17:52 ` Thomas Gleixner
2014-09-12 19:07 ` H. Peter Anvin
2014-09-11 8:46 ` [PATCH v8 08/10] x86, mpx: add prctl commands PR_MPX_REGISTER, PR_MPX_UNREGISTER Qiaowei Ren
2014-09-11 15:03 ` Dave Hansen
2014-09-12 3:10 ` Ren, Qiaowei
2014-09-11 23:28 ` Thomas Gleixner
2014-09-12 0:10 ` Dave Hansen
2014-09-12 8:11 ` Thomas Gleixner
2014-09-12 9:24 ` Thomas Gleixner
2014-09-12 14:36 ` Dave Hansen
2014-09-12 17:34 ` Thomas Gleixner
2014-09-12 18:42 ` Thomas Gleixner
2014-09-12 20:35 ` Dave Hansen
2014-09-12 20:18 ` Dave Hansen
2014-09-13 9:01 ` Thomas Gleixner
2014-09-12 15:22 ` Dave Hansen
2014-09-12 17:42 ` Thomas Gleixner
2014-09-12 20:33 ` Dave Hansen
2014-09-15 0:00 ` One Thousand Gnomes
2014-09-16 3:20 ` Ren, Qiaowei
2014-09-16 4:17 ` Dave Hansen
2014-09-16 7:50 ` Kevin Easton
2014-09-18 0:40 ` Ren, Qiaowei
2014-09-18 3:23 ` Kevin Easton
2014-09-18 2:37 ` Ren, Qiaowei
2014-09-18 4:43 ` Dave Hansen
2014-09-18 7:17 ` Kevin Easton
2014-09-18 6:20 ` Dave Hansen
2014-09-11 8:46 ` Qiaowei Ren [this message]
2014-09-11 14:59 ` [PATCH v8 09/10] x86, mpx: cleanup unused bound tables Dave Hansen
2014-09-12 3:02 ` Ren, Qiaowei
2014-09-12 4:59 ` Dave Hansen
2014-09-15 20:53 ` Dave Hansen
2014-09-16 8:06 ` Ren, Qiaowei
2014-09-11 8:46 ` [PATCH v8 10/10] x86, mpx: add documentation on Intel MPX Qiaowei Ren
2014-09-12 0:51 ` [PATCH v8 00/10] Intel MPX support Dave Hansen
2014-09-12 19:21 ` Thomas Gleixner
2014-09-12 21:23 ` Dave Hansen
2014-09-13 9:25 ` Thomas Gleixner
2014-09-12 21:31 ` Dave Hansen
2014-09-12 22:08 ` Dave Hansen
2014-09-13 9:39 ` Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1410425210-24789-10-git-send-email-qiaowei.ren@intel.com \
--to=qiaowei.ren@intel.com \
--cc=dave.hansen@intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).