From: tip-bot for Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, hpa@zytor.com, mingo@kernel.org,
torvalds@linux-foundation.org, a.p.zijlstra@chello.nl,
pjt@google.com, riel@redhat.com, akpm@linux-foundation.org,
tglx@linutronix.de
Subject: [tip:sched/numa] mm/mpol: Use special PROT_NONE to migrate pages
Date: Wed, 26 Sep 2012 23:06:20 -0700 [thread overview]
Message-ID: <tip-e98gyl8kr9jzooh2s4piuils@git.kernel.org> (raw)
Commit-ID: 39d6cb39a81744473e13c693a9f988a9e342018b
Gitweb: http://git.kernel.org/tip/39d6cb39a81744473e13c693a9f988a9e342018b
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Tue, 17 Jul 2012 22:54:51 +0200
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 26 Sep 2012 11:48:35 +0200
mm/mpol: Use special PROT_NONE to migrate pages
Combine our previous PROT_NONE, mpol_misplaced and
migrate_misplaced_page() pieces into an effective migrate on fault
scheme.
Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-e98gyl8kr9jzooh2s4piuils@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
mm/huge_memory.c | 41 ++++++++++++++++++++++++++++++++++++++++-
mm/memory.c | 42 ++++++++++++++++++++++++++++++++++++------
2 files changed, 76 insertions(+), 7 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5d7b114..a147d29 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -17,6 +17,7 @@
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
+#include <linux/migrate.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
@@ -766,12 +767,48 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned int flags, pmd_t entry)
{
unsigned long haddr = address & HPAGE_PMD_MASK;
+ struct page *page = NULL;
+ int node;
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(*pmd, entry)))
goto out_unlock;
- /* do fancy stuff */
+ if (unlikely(pmd_trans_splitting(entry))) {
+ spin_unlock(&mm->page_table_lock);
+ wait_split_huge_page(vma->anon_vma, pmd);
+ return;
+ }
+
+#ifdef CONFIG_NUMA
+ page = pmd_page(entry);
+ VM_BUG_ON(!PageCompound(page) || !PageHead(page));
+
+ get_page(page);
+ spin_unlock(&mm->page_table_lock);
+
+ /*
+ * XXX should we serialize against split_huge_page ?
+ */
+
+ node = mpol_misplaced(page, vma, haddr);
+ if (node == -1)
+ goto do_fixup;
+
+ /*
+ * Due to lacking code to migrate thp pages, we'll split
+ * (which preserves the special PROT_NONE) and re-take the
+ * fault on the normal pages.
+ */
+ split_huge_page(page);
+ put_page(page);
+ return;
+
+do_fixup:
+ spin_lock(&mm->page_table_lock);
+ if (unlikely(!pmd_same(*pmd, entry)))
+ goto out_unlock;
+#endif
/* change back to regular protection */
entry = pmd_modify(entry, vma->vm_page_prot);
@@ -780,6 +817,8 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct vm_area_struct *vma,
out_unlock:
spin_unlock(&mm->page_table_lock);
+ if (page)
+ put_page(page);
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
diff --git a/mm/memory.c b/mm/memory.c
index bea2ed5..d896a24 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
#include <linux/swapops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
+#include <linux/migrate.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
@@ -3441,17 +3442,42 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, pmd_t *pmd,
unsigned int flags, pte_t entry)
{
+ struct page *page = NULL;
spinlock_t *ptl;
- int ret = 0;
+ int node;
- if (!pte_unmap_same(mm, pmd, ptep, entry))
- goto out;
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ if (unlikely(!pte_same(*ptep, entry)))
+ goto unlock;
+#ifdef CONFIG_NUMA
/*
- * Do fancy stuff...
+ * For NUMA systems we use the special PROT_NONE maps to drive
+ * lazy page migration, see MPOL_MF_LAZY and related.
*/
+ page = vm_normal_page(vma, address, entry);
+ if (!page)
+ goto do_fixup_locked;
+
+ get_page(page);
+ pte_unmap_unlock(ptep, ptl);
+
+ node = mpol_misplaced(page, vma, address);
+ if (node == -1)
+ goto do_fixup;
/*
+ * Page migration will install a new pte with vma->vm_page_prot,
+ * otherwise fall-through to the fixup. Next time,.. perhaps.
+ */
+ if (!migrate_misplaced_page(mm, page, node)) {
+ put_page(page);
+ return 0;
+ }
+
+do_fixup:
+ /*
* OK, nothing to do,.. change the protection back to what it
* ought to be.
*/
@@ -3459,6 +3485,9 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pte_same(*ptep, entry)))
goto unlock;
+do_fixup_locked:
+#endif /* CONFIG_NUMA */
+
flush_cache_page(vma, address, pte_pfn(entry));
ptep_modify_prot_start(mm, address, ptep);
@@ -3468,8 +3497,9 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma,
update_mmu_cache(vma, address, ptep);
unlock:
pte_unmap_unlock(ptep, ptl);
-out:
- return ret;
+ if (page)
+ put_page(page);
+ return 0;
}
/*
next reply other threads:[~2012-09-27 6:06 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-27 6:06 tip-bot for Peter Zijlstra [this message]
2012-10-02 20:14 ` [patch sched/numa] mm/mpol: Fix build warning for UMA kernels David Rientjes
2012-10-04 10:33 ` [tip:sched/numa] " tip-bot for David Rientjes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-e98gyl8kr9jzooh2s4piuils@git.kernel.org \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pjt@google.com \
--cc=riel@redhat.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).