linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Paul Turner <pjt@google.com>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	Christoph Lameter <cl@linux.com>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mgorman@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Thomas Gleixner <tglx@linutronix.de>,
	Lee Schermerhorn <lee.schermerhorn@hp.com>
Subject: [PATCH 15/31] mm/mpol: Add MPOL_MF_LAZY
Date: Tue, 13 Nov 2012 18:13:38 +0100	[thread overview]
Message-ID: <1352826834-11774-16-git-send-email-mingo@kernel.org> (raw)
In-Reply-To: <1352826834-11774-1-git-send-email-mingo@kernel.org>

From: Lee Schermerhorn <lee.schermerhorn@hp.com>

This patch adds another mbind() flag to request "lazy migration".  The
flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected
pages are marked PROT_NONE. The pages will be migrated in the fault
path on "first touch", if the policy dictates at that time.

"Lazy Migration" will allow testing of migrate-on-fault via mbind().
Also allows applications to specify that only subsequently touched
pages be migrated to obey new policy, instead of all pages in range.
This can be useful for multi-threaded applications working on a
large shared data area that is initialized by an initial thread
resulting in all pages on one [or a few, if overflowed] nodes.
After PROT_NONE, the pages in regions assigned to the worker threads
will be automatically migrated local to the threads on 1st touch.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
[ nearly complete rewrite.. ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-7rsodo9x8zvm5awru5o7zo0y@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/mempolicy.h | 13 ++++++++---
 mm/mempolicy.c                 | 49 +++++++++++++++++++++++++++---------------
 2 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 472de8a..6a1baae 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -49,9 +49,16 @@ enum mpol_rebind_step {
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
+#define MPOL_MF_MOVE	 (1<<1)	/* Move pages owned by this process to conform
+				   to policy */
+#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to policy */
+#define MPOL_MF_LAZY	 (1<<3)	/* Modifies '_MOVE:  lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4)	/* Internal flags start here */
+
+#define MPOL_MF_VALID	(MPOL_MF_STRICT   | 	\
+			 MPOL_MF_MOVE     | 	\
+			 MPOL_MF_MOVE_ALL |	\
+			 MPOL_MF_LAZY)
 
 /*
  * Internal flags that share the struct mempolicy flags word with
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1b2890c..5ee326c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -583,22 +583,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		return ERR_PTR(-EFAULT);
 	prev = NULL;
 	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
+		unsigned long endvma = vma->vm_end;
+
+		if (endvma > end)
+			endvma = end;
+		if (vma->vm_start > start)
+			start = vma->vm_start;
+
 		if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 			if (!vma->vm_next && vma->vm_end < end)
 				return ERR_PTR(-EFAULT);
 			if (prev && prev->vm_end < vma->vm_start)
 				return ERR_PTR(-EFAULT);
 		}
-		if (!is_vm_hugetlb_page(vma) &&
-		    ((flags & MPOL_MF_STRICT) ||
+
+		if (is_vm_hugetlb_page(vma))
+			goto next;
+
+		if (flags & MPOL_MF_LAZY) {
+			change_prot_none(vma, start, endvma);
+			goto next;
+		}
+
+		if ((flags & MPOL_MF_STRICT) ||
 		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-				vma_migratable(vma)))) {
-			unsigned long endvma = vma->vm_end;
+		      vma_migratable(vma))) {
 
-			if (endvma > end)
-				endvma = end;
-			if (vma->vm_start > start)
-				start = vma->vm_start;
 			err = check_pgd_range(vma, start, endvma, nodes,
 						flags, private);
 			if (err) {
@@ -606,6 +616,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 				break;
 			}
 		}
+next:
 		prev = vma;
 	}
 	return first;
@@ -1137,8 +1148,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 	int err;
 	LIST_HEAD(pagelist);
 
-	if (flags & ~(unsigned long)(MPOL_MF_STRICT |
-				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+  	if (flags & ~(unsigned long)MPOL_MF_VALID)
 		return -EINVAL;
 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
 		return -EPERM;
@@ -1161,6 +1171,9 @@ static long do_mbind(unsigned long start, unsigned long len,
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
+	if (flags & MPOL_MF_LAZY)
+		new->flags |= MPOL_F_MOF;
+
 	/*
 	 * If we are using the default policy then operation
 	 * on discontinuous address spaces is okay after all
@@ -1197,21 +1210,23 @@ static long do_mbind(unsigned long start, unsigned long len,
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
-	err = PTR_ERR(vma);
-	if (!IS_ERR(vma)) {
-		int nr_failed = 0;
-
+	err = PTR_ERR(vma);	/* maybe ... */
+	if (!IS_ERR(vma) && mode != MPOL_NOOP)
 		err = mbind_range(mm, start, end, new);
 
+	if (!err) {
+		int nr_failed = 0;
+
 		if (!list_empty(&pagelist)) {
+			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
-						(unsigned long)vma,
-						false, MIGRATE_SYNC);
+						  (unsigned long)vma,
+						  false, MIGRATE_SYNC);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
 
-		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+		if (nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	} else
 		putback_lru_pages(&pagelist);
-- 
1.7.11.7

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-11-13 17:15 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-13 17:13 [PATCH 00/31] Latest numa/core patches, v15 Ingo Molnar
2012-11-13 17:13 ` [PATCH 01/31] mm/generic: Only flush the local TLB in ptep_set_access_flags() Ingo Molnar
2012-11-13 17:13 ` [PATCH 02/31] x86/mm: Only do a local tlb flush " Ingo Molnar
2012-11-13 17:13 ` [PATCH 03/31] sched, numa, mm: Make find_busiest_queue() a method Ingo Molnar
2012-11-13 17:13 ` [PATCH 04/31] sched, numa, mm: Describe the NUMA scheduling problem formally Ingo Molnar
2012-11-13 17:13 ` [PATCH 05/31] sched, numa, mm, s390/thp: Implement pmd_pgprot() for s390 Ingo Molnar
2012-11-13 17:13 ` [PATCH 06/31] mm/thp: Preserve pgprot across huge page split Ingo Molnar
2012-11-13 17:13 ` [PATCH 07/31] x86/mm: Introduce pte_accessible() Ingo Molnar
2012-11-13 17:13 ` [PATCH 08/31] mm: Only flush the TLB when clearing an accessible pte Ingo Molnar
2012-11-13 17:13 ` [PATCH 09/31] sched, numa, mm, MIPS/thp: Add pmd_pgprot() implementation Ingo Molnar
2012-11-13 17:13 ` [PATCH 10/31] mm/pgprot: Move the pgprot_modify() fallback definition to mm.h Ingo Molnar
2012-11-13 17:13 ` [PATCH 11/31] mm/mpol: Make MPOL_LOCAL a real policy Ingo Molnar
2012-11-13 17:13 ` [PATCH 12/31] mm/mpol: Add MPOL_MF_NOOP Ingo Molnar
2012-11-13 17:13 ` [PATCH 13/31] mm/mpol: Check for misplaced page Ingo Molnar
2012-11-13 17:13 ` [PATCH 14/31] mm/mpol: Create special PROT_NONE infrastructure Ingo Molnar
2012-11-13 17:13 ` Ingo Molnar [this message]
2012-11-13 17:13 ` [PATCH 16/31] numa, mm: Support NUMA hinting page faults from gup/gup_fast Ingo Molnar
2012-11-13 17:13 ` [PATCH 17/31] mm/migrate: Introduce migrate_misplaced_page() Ingo Molnar
2012-11-13 17:13 ` [PATCH 18/31] mm/mpol: Use special PROT_NONE to migrate pages Ingo Molnar
2012-11-13 17:13 ` [PATCH 19/31] x86/mm: Completely drop the TLB flush from ptep_set_access_flags() Ingo Molnar
2012-11-13 17:13 ` [PATCH 20/31] sched, numa, mm: Introduce sched_feat_numa() Ingo Molnar
2012-11-13 17:13 ` [PATCH 21/31] sched, numa, mm: Implement THP migration Ingo Molnar
2012-11-13 18:48   ` Johannes Weiner
2012-11-14  2:23     ` Hugh Dickins
2012-11-14  2:27       ` [PATCH 1/2] sched, numa, mm: Add memcg support to do_huge_pmd_numa_page() Hugh Dickins
2012-11-14  2:29       ` [PATCH 2/2] sched, numa, mm: Fixes and cleanups in do_huge_pmd_numa_page() Hugh Dickins
2012-11-14  4:28       ` [PATCH 21/31] sched, numa, mm: Implement THP migration Andrew Morton
2012-11-13 17:13 ` [PATCH 22/31] sched, numa, mm: Add last_cpu to page flags Ingo Molnar
2012-11-13 17:13 ` [PATCH 23/31] sched, numa, mm, arch: Add variable locality exception Ingo Molnar
2012-11-13 17:13 ` [PATCH 24/31] sched, numa, mm: Add credits for NUMA placement Ingo Molnar
2012-11-13 17:13 ` [PATCH 25/31] sched, mm, x86: Add the ARCH_SUPPORTS_NUMA_BALANCING flag Ingo Molnar
2012-11-13 17:13 ` [PATCH 26/31] sched, numa, mm: Add the scanning page fault machinery Ingo Molnar
2012-11-13 17:13 ` [PATCH 27/31] sched, numa, mm: Add adaptive NUMA affinity support Ingo Molnar
2012-11-13 17:13 ` [PATCH 28/31] sched, numa, mm: Implement constant, per task Working Set Sampling (WSS) rate Ingo Molnar
2012-11-13 17:13 ` [PATCH 29/31] sched, numa, mm: Count WS scanning against present PTEs, not virtual memory ranges Ingo Molnar
2012-11-13 17:13 ` [PATCH 30/31] sched, numa, mm: Implement slow start for working set sampling Ingo Molnar
2012-11-13 17:13 ` [PATCH 31/31] mm: Allow the migration of shared pages Ingo Molnar
2012-11-13 17:54 ` [PATCH 00/31] Latest numa/core patches, v15 Mel Gorman
2012-11-14  7:52   ` Ingo Molnar
2012-11-14 11:36     ` Mel Gorman
2012-11-14 12:03     ` Mel Gorman
2012-11-17  8:45 ` Alex Shi
2012-11-18 19:32   ` Linus Torvalds
  -- strict thread matches above, loose matches on Subject: below --
2012-10-25 12:16 [PATCH 00/31] numa/core patches Peter Zijlstra
2012-10-25 12:16 ` [PATCH 15/31] mm/mpol: Add MPOL_MF_LAZY Peter Zijlstra
2012-11-01 12:01   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1352826834-11774-16-git-send-email-mingo@kernel.org \
    --to=mingo@kernel.org \
    --cc=Lee.Schermerhorn@hp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).