From: Peter Zijlstra <peterz@infradead.org>
To: torvalds@linux-foundation.org, paulmck@linux.vnet.ibm.com,
tglx@linutronix.de, akpm@linux-foundation.org, riel@redhat.com,
mgorman@suse.de, oleg@redhat.com, mingo@redhat.com,
minchan@kernel.org, kamezawa.hiroyu@jp.fujitsu.com,
viro@zeniv.linux.org.uk, laijs@cn.fujitsu.com, dave@stgolabs.net
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Peter Zijlstra <peterz@infradead.org>
Subject: [RFC][PATCH 1/6] mm: Dont assume page-table invariance during faults
Date: Mon, 20 Oct 2014 23:56:34 +0200 [thread overview]
Message-ID: <20141020222841.244195829@infradead.org> (raw)
In-Reply-To: 20141020215633.717315139@infradead.org
[-- Attachment #1: peterz-mm-kill-pte-pointer.patch --]
[-- Type: text/plain, Size: 6982 bytes --]
One of the side effects of speculating on faults (without holding
mmap_sem) is that we can race with free_pgtables() and therefore we
cannot assume the page-tables will stick around.
Remove the relyance on the pte pointer.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
mm/memory.c | 76 ++++++++++++++++--------------------------------------------
1 file changed, 21 insertions(+), 55 deletions(-)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1933,31 +1933,6 @@ int apply_to_page_range(struct mm_struct
}
EXPORT_SYMBOL_GPL(apply_to_page_range);
-/*
- * handle_pte_fault chooses page fault handler according to an entry
- * which was read non-atomically. Before making any commitment, on
- * those architectures or configurations (e.g. i386 with PAE) which
- * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault
- * must check under lock before unmapping the pte and proceeding
- * (but do_wp_page is only called after already making such a check;
- * and do_anonymous_page can safely check later on).
- */
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
- pte_t *page_table, pte_t orig_pte)
-{
- int same = 1;
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
- if (sizeof(pte_t) > sizeof(unsigned long)) {
- spinlock_t *ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
- same = pte_same(*page_table, orig_pte);
- spin_unlock(ptl);
- }
-#endif
- pte_unmap(page_table);
- return same;
-}
-
static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
{
debug_dma_assert_idle(src);
@@ -2407,21 +2382,18 @@ EXPORT_SYMBOL(unmap_mapping_range);
* as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
spinlock_t *ptl;
struct page *page, *swapcache;
struct mem_cgroup *memcg;
swp_entry_t entry;
- pte_t pte;
+ pte_t *page_table, pte;
int locked;
int exclusive = 0;
int ret = 0;
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
- goto out;
-
entry = pte_to_swp_entry(orig_pte);
if (unlikely(non_swap_entry(entry))) {
if (is_migration_entry(entry)) {
@@ -2624,15 +2596,13 @@ static inline int check_stack_guard_page
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pmd_t *pmd,
unsigned int flags)
{
struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
- pte_t entry;
-
- pte_unmap(page_table);
+ pte_t entry, *page_table;
/* Check if we need to add a guard page to the stack */
if (check_stack_guard_page(vma, address) < 0)
@@ -3031,13 +3001,12 @@ static int do_shared_fault(struct mm_str
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
pgoff_t pgoff = (((address & PAGE_MASK)
- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- pte_unmap(page_table);
if (!(flags & FAULT_FLAG_WRITE))
return do_read_fault(mm, vma, address, pmd, pgoff, flags,
orig_pte);
@@ -3059,16 +3028,13 @@ static int do_linear_fault(struct mm_str
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
pgoff_t pgoff;
flags |= FAULT_FLAG_NONLINEAR;
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
- return 0;
-
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
/*
* Page table corrupted: show pte and kill process.
@@ -3103,7 +3069,7 @@ static int numa_migrate_prep(struct page
}
static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+ unsigned long addr, pte_t pte, pmd_t *pmd)
{
struct page *page = NULL;
spinlock_t *ptl;
@@ -3112,6 +3078,7 @@ static int do_numa_page(struct mm_struct
int target_nid;
bool migrated = false;
int flags = 0;
+ pte_t *ptep;
/*
* The "pte" at this point cannot be used safely without
@@ -3122,8 +3089,7 @@ static int do_numa_page(struct mm_struct
* the _PAGE_NUMA bit and it is not really expected that there
* would be concurrent hardware modifications to the PTE.
*/
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
if (unlikely(!pte_same(*ptep, pte))) {
pte_unmap_unlock(ptep, ptl);
goto out;
@@ -3195,34 +3161,32 @@ static int do_numa_page(struct mm_struct
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, pmd_t *pmd, unsigned int flags)
+ pte_t entry, pmd_t *pmd, unsigned int flags)
{
- pte_t entry;
spinlock_t *ptl;
+ pte_t *pte;
- entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
if (likely(vma->vm_ops->fault))
return do_linear_fault(mm, vma, address,
- pte, pmd, flags, entry);
+ pmd, flags, entry);
}
return do_anonymous_page(mm, vma, address,
- pte, pmd, flags);
+ pmd, flags);
}
if (pte_file(entry))
return do_nonlinear_fault(mm, vma, address,
- pte, pmd, flags, entry);
+ pmd, flags, entry);
return do_swap_page(mm, vma, address,
- pte, pmd, flags, entry);
+ pmd, flags, entry);
}
if (pte_numa(entry))
- return do_numa_page(mm, vma, address, entry, pte, pmd);
+ return do_numa_page(mm, vma, address, entry, pmd);
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
if (flags & FAULT_FLAG_WRITE) {
@@ -3261,7 +3225,7 @@ static int __handle_mm_fault(struct mm_s
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte;
+ pte_t *pte, entry;
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags);
@@ -3331,8 +3295,10 @@ static int __handle_mm_fault(struct mm_s
* safe to run pte_offset_map().
*/
pte = pte_offset_map(pmd, address);
+ entry = ACCESS_ONCE(*pte);
+ pte_unmap(pte);
- return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+ return handle_pte_fault(mm, vma, address, entry, pmd, flags);
}
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2014-10-20 22:41 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-20 21:56 [RFC][PATCH 0/6] Another go at speculative page faults Peter Zijlstra
2014-10-20 21:56 ` Peter Zijlstra [this message]
2014-10-20 21:56 ` [RFC][PATCH 2/6] mm: Prepare for FAULT_FLAG_SPECULATIVE Peter Zijlstra
2014-10-20 21:56 ` [RFC][PATCH 3/6] mm: VMA sequence count Peter Zijlstra
2014-10-22 11:26 ` Kirill A. Shutemov
2014-10-22 11:39 ` Peter Zijlstra
2014-10-22 11:53 ` Kirill A. Shutemov
2014-10-22 12:15 ` Peter Zijlstra
2014-10-22 13:44 ` Peter Zijlstra
2014-10-23 12:36 ` Kirill A. Shutemov
2014-10-23 14:22 ` Peter Zijlstra
2014-10-23 15:05 ` Kirill A. Shutemov
2014-10-20 21:56 ` [RFC][PATCH 4/6] SRCU free VMAs Peter Zijlstra
2014-10-20 23:41 ` Linus Torvalds
2014-10-21 8:07 ` Peter Zijlstra
2014-10-24 15:16 ` Christoph Lameter
2014-10-24 15:51 ` Peter Zijlstra
2014-10-24 17:08 ` Christoph Lameter
2014-10-21 8:22 ` Peter Zijlstra
2014-10-23 10:14 ` Lai Jiangshan
2014-10-23 11:03 ` Peter Zijlstra
2014-10-24 3:33 ` Lai Jiangshan
2014-10-24 7:26 ` Peter Zijlstra
2014-10-20 21:56 ` [RFC][PATCH 5/6] mm: Provide speculative fault infrastructure Peter Zijlstra
2014-10-21 8:35 ` Kirill A. Shutemov
2014-10-21 10:41 ` Peter Zijlstra
2014-10-21 19:00 ` Peter Zijlstra
2014-10-20 21:56 ` [RFC][PATCH 6/6] mm,x86: Add speculative pagefault handling Peter Zijlstra
2014-10-21 0:07 ` [RFC][PATCH 0/6] Another go at speculative page faults Andy Lutomirski
2014-10-21 8:11 ` Peter Zijlstra
2014-10-21 16:23 ` Ingo Molnar
2014-10-21 17:09 ` Kirill A. Shutemov
2014-10-21 17:56 ` Peter Zijlstra
2014-10-23 10:40 ` Lai Jiangshan
2014-10-23 11:04 ` Peter Zijlstra
2014-10-24 7:54 ` Ingo Molnar
2014-10-24 13:14 ` Peter Zijlstra
2014-10-28 5:32 ` Namhyung Kim
2014-10-21 17:25 ` Peter Zijlstra
2014-10-22 12:35 ` Ingo Molnar
2014-10-22 7:34 ` Davidlohr Bueso
2014-10-22 11:29 ` Kirill A. Shutemov
2014-10-22 11:45 ` Peter Zijlstra
2014-10-22 11:55 ` Kirill A. Shutemov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141020222841.244195829@infradead.org \
--to=peterz@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=dave@stgolabs.net \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=minchan@kernel.org \
--cc=mingo@redhat.com \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=riel@redhat.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).