linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mempolicy: refix mbind_range() vma issue
@ 2011-12-09 16:55 kosaki.motohiro
  2011-12-12 11:20 ` Johannes Weiner
  0 siblings, 1 reply; 7+ messages in thread
From: kosaki.motohiro @ 2011-12-09 16:55 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, Caspar Zhang

From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

commit 8aacc9f550 (mm/mempolicy.c: fix pgoff in mbind vma merge) is
slightly incorrect fix. It doesn't handle vma merge case 4 (see
mmap.c#vma_merge() source comment).

This patch fixes it.

testcase:  mbind_vma_test.c
=====================================================
 #include <numaif.h>
 #include <numa.h>
 #include <sys/mman.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>

static unsigned long pagesize;
void* mmap_addr;
struct bitmask *nmask;
char buf[1024];
FILE *file;
char retbuf[10240] = "";

char *rubysrc = "ruby -e '\
  pid = %d; \
  vstart = 0x%llx; \
  vend = 0x%llx; \
  s = `pmap -q #{pid}`; \
  rary = []; \
  s.each_line {|line|; \
    ary=line.split(\" \"); \
    addr = ary[0].to_i(16); \
    if(vstart <= addr && addr < vend) then \
      rary.push(ary[1].to_i()/4); \
    end; \
  }; \
  print rary.join(\",\"); \
'";

void init(void)
{
	void* addr;

	nmask = numa_allocate_nodemask();
	numa_bitmask_setbit(nmask, 0);

	pagesize = getpagesize();

	addr = mmap(NULL, pagesize*8, PROT_NONE,
		    MAP_ANON|MAP_PRIVATE, 0, 0);
	if (addr == MAP_FAILED)
		perror("mmap "), exit(1);

	if (mmap(addr+pagesize, pagesize*6, PROT_READ|PROT_WRITE,
		 MAP_ANON|MAP_PRIVATE|MAP_FIXED, 0, 0) == MAP_FAILED)
		perror("mmap "), exit(1);

	mmap_addr = addr + pagesize;

	/* make page populate */
	memset(mmap_addr, 0, pagesize*6);
}

void fin(void)
{
	void* addr = mmap_addr - pagesize;
	munmap(addr, pagesize*8);

	memset(buf, 0, sizeof(buf));
	memset(retbuf, 0, sizeof(retbuf));
}

void mem_bind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_BIND, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_interleave(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_unbind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_DEFAULT, NULL, 0, 0);
	if (err)
		perror("mbind "), exit(err);
}

void Assert(char *expected, char *value, char *name, int line)
{
	if (strcmp(expected, value) == 0) {
		fprintf(stderr, "%s: passed\n", name);
		return;
	}
	else {
		fprintf(stderr, "%s: %d: test failed. expect '%s', actual '%s'\n",
			name, line,
			expected, value);
//		exit(1);
	}
}

/*
      AAAA
    PPPPPPNNNNNN
    might become
    PPNNNNNNNNNN
    case 4 below
*/
void case4(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 4);
	mem_unbind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case4", __LINE__);

	fin();
}

/*
       AAAA
 PPPPPPNNNNNN
 might become
 PPPPPPPPPPNN
 case 5 below
*/
void case5(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case5", __LINE__);

	fin();
}

/*
	    AAAA
	PPPPNNNNXXXX
	might become
	PPPPPPPPPPPP 6
*/
void case6(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("6", retbuf, "case6", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPPPPPXXXX 7
*/
void case7(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case7", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPNNNNNNNN 8
*/
void case8(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_interleave(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case8", __LINE__);

	fin();
}

int main(int argc, char** argv)
{
	case4();
	case5();
	case6();
	case7();
	case8();

	return 0;
}
=============================================================

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
CC: Caspar Zhang <caspar@casparzhang.com>
---
 mm/mempolicy.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index adc3954..fd07eae 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -636,6 +636,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 	struct vm_area_struct *prev;
 	struct vm_area_struct *vma;
 	int err = 0;
+	pgoff_t pgoff;
 	unsigned long vmstart;
 	unsigned long vmend;
 
@@ -643,13 +644,17 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 	if (!vma || vma->vm_start > start)
 		return -EFAULT;
 
+	if (start > vma->vm_start)
+		prev = vma;
+
 	for (; vma && vma->vm_start < end; prev = vma, vma = next) {
 		next = vma->vm_next;
 		vmstart = max(start, vma->vm_start);
 		vmend   = min(end, vma->vm_end);
 
+		pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
 		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
-				  vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+				  vma->anon_vma, vma->vm_file, pgoff,
 				  new_pol);
 		if (prev) {
 			vma = prev;
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mempolicy: refix mbind_range() vma issue
  2011-12-09 16:55 [PATCH] mempolicy: refix mbind_range() vma issue kosaki.motohiro
@ 2011-12-12 11:20 ` Johannes Weiner
  2011-12-20 18:17   ` [PATCH] [v2] " kosaki.motohiro
  2011-12-20 18:19   ` [PATCH] " KOSAKI Motohiro
  0 siblings, 2 replies; 7+ messages in thread
From: Johannes Weiner @ 2011-12-12 11:20 UTC (permalink / raw)
  To: kosaki.motohiro
  Cc: linux-mm, linux-kernel, KOSAKI Motohiro, Minchan Kim,
	Caspar Zhang

On Fri, Dec 09, 2011 at 11:55:09AM -0500, kosaki.motohiro@gmail.com wrote:
> From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> commit 8aacc9f550 (mm/mempolicy.c: fix pgoff in mbind vma merge) is
> slightly incorrect fix. It doesn't handle vma merge case 4 (see
> mmap.c#vma_merge() source comment).
> 
> This patch fixes it.
> 
> testcase:  mbind_vma_test.c
> =====================================================
>  #include <numaif.h>
>  #include <numa.h>
>  #include <sys/mman.h>
>  #include <stdio.h>
>  #include <unistd.h>
>  #include <stdlib.h>
>  #include <string.h>
> 
> static unsigned long pagesize;
> void* mmap_addr;
> struct bitmask *nmask;
> char buf[1024];
> FILE *file;
> char retbuf[10240] = "";
> 
> char *rubysrc = "ruby -e '\
>   pid = %d; \
>   vstart = 0x%llx; \
>   vend = 0x%llx; \
>   s = `pmap -q #{pid}`; \
>   rary = []; \
>   s.each_line {|line|; \
>     ary=line.split(\" \"); \
>     addr = ary[0].to_i(16); \
>     if(vstart <= addr && addr < vend) then \
>       rary.push(ary[1].to_i()/4); \
>     end; \
>   }; \
>   print rary.join(\",\"); \
> '";

;-)

But thanks for going through the test cases so meticulously!

That being said, would you mind including the output before and after
this patch in the changelog?

> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

for the fix itself, but I think the changelog needs a bit more
information:

> @@ -636,6 +636,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
>  	struct vm_area_struct *prev;
>  	struct vm_area_struct *vma;
>  	int err = 0;
> +	pgoff_t pgoff;
>  	unsigned long vmstart;
>  	unsigned long vmend;
>  
> @@ -643,13 +644,17 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
>  	if (!vma || vma->vm_start > start)
>  		return -EFAULT;
>  
> +	if (start > vma->vm_start)
> +		prev = vma;
> +
>  	for (; vma && vma->vm_start < end; prev = vma, vma = next) {
>  		next = vma->vm_next;
>  		vmstart = max(start, vma->vm_start);
>  		vmend   = min(end, vma->vm_end);
>  
> +		pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
>  		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
> -				  vma->anon_vma, vma->vm_file, vma->vm_pgoff,
> +				  vma->anon_vma, vma->vm_file, pgoff,
>  				  new_pol);
>  		if (prev) {
>  			vma = prev;

This is essentially a revert of the aforementioned commit.

What you added instead is the fixing of @prev: only when mbind is
vma-aligned can the new area be potentially merged into the preceding
one.  Otherwise that original vma is the one we need to check for
compatibility with the mbind range and leave the original prev alone:

	[prev         ][vma            ]
	                    |start

	[prev         ][vma][mbind vma ]

This should NOT attempt to merge mbind vma with prev (and forget about
and leak vma, iirc), but check if vma and the mbind vma are compatible
or should be separate areas.

Could you please add something to that extent to the changelog?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] [v2] mempolicy: refix mbind_range() vma issue
  2011-12-12 11:20 ` Johannes Weiner
@ 2011-12-20 18:17   ` kosaki.motohiro
  2011-12-20 19:28     ` Johannes Weiner
  2011-12-20 18:19   ` [PATCH] " KOSAKI Motohiro
  1 sibling, 1 reply; 7+ messages in thread
From: kosaki.motohiro @ 2011-12-20 18:17 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: KOSAKI Motohiro, Johannes Weiner, Minchan Kim, Caspar Zhang,
	Andrew Morton, Stephen Wilson, Andrea Arcangeli

From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

commit 8aacc9f550 (mm/mempolicy.c: fix pgoff in mbind vma merge) is
slightly incorrect fix.

Why? Think following case.

1. map 4 pages of a file at offset 0

   [0123]

2. map 2 pages just after the first mapping of the same file but with
   page offset 2

   [0123][23]

3. mbind() 2 pages from the first mapping at offset 2.
   mbind_range() should treat new vma is,

   [0123][23]
     |23|
     mbind vma

   but it does

   [0123][23]
     |01|
     mbind vma

   Oops. then, it makes wrong vma merge and splitting ([01][0123] or similar).

This patch fixes it.

[testcase]
  test result - before the patch

	case4: 126: test failed. expect '2,4', actual '2,2,2'
       	case5: passed
	case6: passed
	case7: passed
	case8: passed
	case_n: 246: test failed. expect '4,2', actual '1,4'

	------------[ cut here ]------------
	kernel BUG at mm/filemap.c:135!
	invalid opcode: 0000 [#4] SMP DEBUG_PAGEALLOC

	(snip long bug on messages)

  test result - after the patch

	case4: passed
       	case5: passed
	case6: passed
	case7: passed
	case8: passed
	case_n: passed

  source:  mbind_vma_test.c
============================================================
 #include <numaif.h>
 #include <numa.h>
 #include <sys/mman.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>

static unsigned long pagesize;
void* mmap_addr;
struct bitmask *nmask;
char buf[1024];
FILE *file;
char retbuf[10240] = "";
int mapped_fd;

char *rubysrc = "ruby -e '\
  pid = %d; \
  vstart = 0x%llx; \
  vend = 0x%llx; \
  s = `pmap -q #{pid}`; \
  rary = []; \
  s.each_line {|line|; \
    ary=line.split(\" \"); \
    addr = ary[0].to_i(16); \
    if(vstart <= addr && addr < vend) then \
      rary.push(ary[1].to_i()/4); \
    end; \
  }; \
  print rary.join(\",\"); \
'";

void init(void)
{
	void* addr;
	char buf[128];

	nmask = numa_allocate_nodemask();
	numa_bitmask_setbit(nmask, 0);

	pagesize = getpagesize();

	sprintf(buf, "%s", "mbind_vma_XXXXXX");
	mapped_fd = mkstemp(buf);
	if (mapped_fd == -1)
		perror("mkstemp "), exit(1);
	unlink(buf);

	if (lseek(mapped_fd, pagesize*8, SEEK_SET) < 0)
		perror("lseek "), exit(1);
	if (write(mapped_fd, "\0", 1) < 0)
		perror("write "), exit(1);

	addr = mmap(NULL, pagesize*8, PROT_NONE,
		    MAP_SHARED, mapped_fd, 0);
	if (addr == MAP_FAILED)
		perror("mmap "), exit(1);

	if (mprotect(addr+pagesize, pagesize*6, PROT_READ|PROT_WRITE) < 0)
		perror("mprotect "), exit(1);

	mmap_addr = addr + pagesize;

	/* make page populate */
	memset(mmap_addr, 0, pagesize*6);
}

void fin(void)
{
	void* addr = mmap_addr - pagesize;
	munmap(addr, pagesize*8);

	memset(buf, 0, sizeof(buf));
	memset(retbuf, 0, sizeof(retbuf));
}

void mem_bind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_BIND, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_interleave(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_unbind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_DEFAULT, NULL, 0, 0);
	if (err)
		perror("mbind "), exit(err);
}

void Assert(char *expected, char *value, char *name, int line)
{
	if (strcmp(expected, value) == 0) {
		fprintf(stderr, "%s: passed\n", name);
		return;
	}
	else {
		fprintf(stderr, "%s: %d: test failed. expect '%s', actual '%s'\n",
			name, line,
			expected, value);
//		exit(1);
	}
}

/*
      AAAA
    PPPPPPNNNNNN
    might become
    PPNNNNNNNNNN
    case 4 below
*/
void case4(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 4);
	mem_unbind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case4", __LINE__);

	fin();
}

/*
       AAAA
 PPPPPPNNNNNN
 might become
 PPPPPPPPPPNN
 case 5 below
*/
void case5(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case5", __LINE__);

	fin();
}

/*
	    AAAA
	PPPPNNNNXXXX
	might become
	PPPPPPPPPPPP 6
*/
void case6(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("6", retbuf, "case6", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPPPPPXXXX 7
*/
void case7(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case7", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPNNNNNNNN 8
*/
void case8(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_interleave(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case8", __LINE__);

	fin();
}

void case_n(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	/* make redundunt mappings [0][1234][34][7] */
	mmap(mmap_addr + pagesize*4, pagesize*2, PROT_READ|PROT_WRITE,
	     MAP_FIXED|MAP_SHARED, mapped_fd, pagesize*3);

	/* Expect to do nothing. */
	mem_unbind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case_n", __LINE__);

	fin();
}

int main(int argc, char** argv)
{
	case4();
	case5();
	case6();
	case7();
	case8();
	case_n();

	return 0;
}
=============================================================

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Caspar Zhang <caspar@casparzhang.com>
---
 mm/mempolicy.c |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletions(-)


changed from v1:
 - added mpol_equal() check.
 - added an explanation of why current upstream code is broken.
 - added one testcase to reproduce Hannes says.

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index adc3954..c3fdbcb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -636,6 +636,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 	struct vm_area_struct *prev;
 	struct vm_area_struct *vma;
 	int err = 0;
+	pgoff_t pgoff;
 	unsigned long vmstart;
 	unsigned long vmend;
 
@@ -643,13 +644,21 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 	if (!vma || vma->vm_start > start)
 		return -EFAULT;
 
+	if (start > vma->vm_start)
+		prev = vma;
+
 	for (; vma && vma->vm_start < end; prev = vma, vma = next) {
 		next = vma->vm_next;
 		vmstart = max(start, vma->vm_start);
 		vmend   = min(end, vma->vm_end);
 
+		if (mpol_equal(vma_policy(vma), new_pol))
+			continue;
+
+		pgoff = vma->vm_pgoff +
+			((vmstart - vma->vm_start) >> PAGE_SHIFT);
 		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
-				  vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+				  vma->anon_vma, vma->vm_file, pgoff,
 				  new_pol);
 		if (prev) {
 			vma = prev;
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mempolicy: refix mbind_range() vma issue
  2011-12-12 11:20 ` Johannes Weiner
  2011-12-20 18:17   ` [PATCH] [v2] " kosaki.motohiro
@ 2011-12-20 18:19   ` KOSAKI Motohiro
  1 sibling, 0 replies; 7+ messages in thread
From: KOSAKI Motohiro @ 2011-12-20 18:19 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: linux-mm, linux-kernel, KOSAKI Motohiro, Minchan Kim,
	Caspar Zhang

>> +		pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start)>>  PAGE_SHIFT);
>>   		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
>> -				  vma->anon_vma, vma->vm_file, vma->vm_pgoff,
>> +				  vma->anon_vma, vma->vm_file, pgoff,
>>   				  new_pol);
>>   		if (prev) {
>>   			vma = prev;
>
> This is essentially a revert of the aforementioned commit.
>
> What you added instead is the fixing of @prev: only when mbind is
> vma-aligned can the new area be potentially merged into the preceding
> one.  Otherwise that original vma is the one we need to check for
> compatibility with the mbind range and leave the original prev alone:
>
> 	[prev         ][vma            ]
> 	                    |start
>
> 	[prev         ][vma][mbind vma ]
>
> This should NOT attempt to merge mbind vma with prev (and forget about
> and leak vma, iirc), but check if vma and the mbind vma are compatible
> or should be separate areas.
>
> Could you please add something to that extent to the changelog?

When making new test case, I've found one bug in my patch. So, I've
sent new patch w/ detailed bug explanaion. :)

Thanks.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] [v2] mempolicy: refix mbind_range() vma issue
  2011-12-20 18:17   ` [PATCH] [v2] " kosaki.motohiro
@ 2011-12-20 19:28     ` Johannes Weiner
  2011-12-20 23:37       ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Johannes Weiner @ 2011-12-20 19:28 UTC (permalink / raw)
  To: kosaki.motohiro
  Cc: linux-mm, linux-kernel, KOSAKI Motohiro, Minchan Kim,
	Caspar Zhang, Andrew Morton, Stephen Wilson, Andrea Arcangeli

On Tue, Dec 20, 2011 at 01:17:10PM -0500, kosaki.motohiro@gmail.com wrote:
> From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> commit 8aacc9f550 (mm/mempolicy.c: fix pgoff in mbind vma merge) is
> slightly incorrect fix.
> 
> Why? Think following case.
> 
> 1. map 4 pages of a file at offset 0
> 
>    [0123]
> 
> 2. map 2 pages just after the first mapping of the same file but with
>    page offset 2
> 
>    [0123][23]
> 
> 3. mbind() 2 pages from the first mapping at offset 2.
>    mbind_range() should treat new vma is,
> 
>    [0123][23]
>      |23|
>      mbind vma
> 
>    but it does
> 
>    [0123][23]
>      |01|
>      mbind vma
> 
>    Oops. then, it makes wrong vma merge and splitting ([01][0123] or similar).
> 
> This patch fixes it.
> 
> [testcase]
>   test result - before the patch
> 
> 	case4: 126: test failed. expect '2,4', actual '2,2,2'
>        	case5: passed
> 	case6: passed
> 	case7: passed
> 	case8: passed
> 	case_n: 246: test failed. expect '4,2', actual '1,4'
> 
> 	------------[ cut here ]------------
> 	kernel BUG at mm/filemap.c:135!
> 	invalid opcode: 0000 [#4] SMP DEBUG_PAGEALLOC
> 
> 	(snip long bug on messages)
> 
>   test result - after the patch
> 
> 	case4: passed
>        	case5: passed
> 	case6: passed
> 	case7: passed
> 	case8: passed
> 	case_n: passed

> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Caspar Zhang <caspar@casparzhang.com>

Looks good to me now, thanks.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Since this can corrupt virtual mappings and was released with 3.2, I
think we also want this:

Cc: stable@kernel.org [3.2.x]

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] [v2] mempolicy: refix mbind_range() vma issue
  2011-12-20 19:28     ` Johannes Weiner
@ 2011-12-20 23:37       ` Andrew Morton
  2011-12-20 23:54         ` Johannes Weiner
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2011-12-20 23:37 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: kosaki.motohiro, linux-mm, linux-kernel, KOSAKI Motohiro,
	Minchan Kim, Caspar Zhang, Stephen Wilson, Andrea Arcangeli

On Tue, 20 Dec 2011 20:28:50 +0100
Johannes Weiner <hannes@cmpxchg.org> wrote:

> > Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> > Cc: Johannes Weiner <hannes@cmpxchg.org>
> > Cc: Minchan Kim <minchan.kim@gmail.com>
> > CC: Caspar Zhang <caspar@casparzhang.com>
> 
> Looks good to me now, thanks.
> 
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> 
> Since this can corrupt virtual mappings and was released with 3.2, I
> think we also want this:
> 
> Cc: stable@kernel.org [3.2.x]

I assume you meant 3.1.x  And into mainline for 3.2?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] [v2] mempolicy: refix mbind_range() vma issue
  2011-12-20 23:37       ` Andrew Morton
@ 2011-12-20 23:54         ` Johannes Weiner
  0 siblings, 0 replies; 7+ messages in thread
From: Johannes Weiner @ 2011-12-20 23:54 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kosaki.motohiro, linux-mm, linux-kernel, KOSAKI Motohiro,
	Minchan Kim, Caspar Zhang, Stephen Wilson, Andrea Arcangeli

On Tue, Dec 20, 2011 at 03:37:57PM -0800, Andrew Morton wrote:
> On Tue, 20 Dec 2011 20:28:50 +0100
> Johannes Weiner <hannes@cmpxchg.org> wrote:
> 
> > > Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> > > Cc: Johannes Weiner <hannes@cmpxchg.org>
> > > Cc: Minchan Kim <minchan.kim@gmail.com>
> > > CC: Caspar Zhang <caspar@casparzhang.com>
> > 
> > Looks good to me now, thanks.
> > 
> > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> > 
> > Since this can corrupt virtual mappings and was released with 3.2, I
> > think we also want this:
> > 
> > Cc: stable@kernel.org [3.2.x]
> 
> I assume you meant 3.1.x  And into mainline for 3.2?

Yes.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-12-20 23:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-09 16:55 [PATCH] mempolicy: refix mbind_range() vma issue kosaki.motohiro
2011-12-12 11:20 ` Johannes Weiner
2011-12-20 18:17   ` [PATCH] [v2] " kosaki.motohiro
2011-12-20 19:28     ` Johannes Weiner
2011-12-20 23:37       ` Andrew Morton
2011-12-20 23:54         ` Johannes Weiner
2011-12-20 18:19   ` [PATCH] " KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).