diff for duplicates of <20161117074538.GA1713@aaronlu.sh.intel.com> diff --git a/a/2.txt b/N1/2.txt index 8b13789..951fca6 100644 --- a/a/2.txt +++ b/N1/2.txt @@ -1 +1,123 @@ +>From c529dfa6bdfc643a9c3debb4b61b9b0c13b0862e Mon Sep 17 00:00:00 2001 +From: Aaron Lu <aaron.lu@intel.com> +Date: Thu, 17 Nov 2016 15:11:08 +0800 +Subject: [PATCH] mremap: add a 2s delay for MAP_FIXED case +Add a 2s delay for MAP_FIXED case to enlarge the race window so that we +can hit the race in user space. + +Signed-off-by: Aaron Lu <aaron.lu@intel.com> +--- + fs/exec.c | 2 +- + include/linux/mm.h | 2 +- + mm/mremap.c | 19 ++++++++++++------- + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/fs/exec.c b/fs/exec.c +index 4e497b9ee71e..1e49ce9a23bd 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -619,7 +619,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) + * process cleanup to remove whatever mess we made. + */ + if (length != move_page_tables(vma, old_start, +- vma, new_start, length, false)) ++ vma, new_start, length, false, false)) + return -ENOMEM; + + lru_add_drain(); +diff --git a/include/linux/mm.h b/include/linux/mm.h +index a92c8d73aeaf..5e35fe3d914a 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1392,7 +1392,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); + extern unsigned long move_page_tables(struct vm_area_struct *vma, + unsigned long old_addr, struct vm_area_struct *new_vma, + unsigned long new_addr, unsigned long len, +- bool need_rmap_locks); ++ bool need_rmap_locks, bool delay); + extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgprot_t newprot, + int dirty_accountable, int prot_numa); +diff --git a/mm/mremap.c b/mm/mremap.c +index da22ad2a5678..8e35279ca622 100644 +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -22,6 +22,7 @@ + #include <linux/mmu_notifier.h> + #include <linux/uaccess.h> + #include <linux/mm-arch-hooks.h> ++#include <linux/delay.h> + + #include <asm/cacheflush.h> + #include <asm/tlbflush.h> +@@ -166,7 +167,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, + unsigned long move_page_tables(struct vm_area_struct *vma, + unsigned long old_addr, struct vm_area_struct *new_vma, + unsigned long new_addr, unsigned long len, +- bool need_rmap_locks) ++ bool need_rmap_locks, bool delay) + { + unsigned long extent, next, old_end; + pmd_t *old_pmd, *new_pmd; +@@ -224,8 +225,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma, + new_vma, new_pmd, new_addr, need_rmap_locks); + need_flush = true; + } +- if (likely(need_flush)) ++ if (likely(need_flush)) { ++ if (delay) ++ msleep(2000); + flush_tlb_range(vma, old_end-len, old_addr); ++ } + + mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); + +@@ -234,7 +238,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, + + static unsigned long move_vma(struct vm_area_struct *vma, + unsigned long old_addr, unsigned long old_len, +- unsigned long new_len, unsigned long new_addr, bool *locked) ++ unsigned long new_len, unsigned long new_addr, ++ bool *locked, bool delay) + { + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *new_vma; +@@ -273,7 +278,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, + return -ENOMEM; + + moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, +- need_rmap_locks); ++ need_rmap_locks, delay); + if (moved_len < old_len) { + err = -ENOMEM; + } else if (vma->vm_ops && vma->vm_ops->mremap) { +@@ -287,7 +292,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, + * and then proceed to unmap new area instead of old. + */ + move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, +- true); ++ true, delay); + vma = new_vma; + old_len = new_len; + old_addr = new_addr; +@@ -442,7 +447,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, + if (offset_in_page(ret)) + goto out1; + +- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked); ++ ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, true); + if (!(offset_in_page(ret))) + goto out; + out1: +@@ -576,7 +581,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + goto out; + } + +- ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked); ++ ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked, false); + } + out: + if (offset_in_page(ret)) { +-- +2.5.5 diff --git a/N1/3.hdr b/N1/3.hdr new file mode 100644 index 0000000..9800ddf --- /dev/null +++ b/N1/3.hdr @@ -0,0 +1,2 @@ +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename="raceremap.c" diff --git a/N1/3.txt b/N1/3.txt new file mode 100644 index 0000000..f2fe637 --- /dev/null +++ b/N1/3.txt @@ -0,0 +1,120 @@ +#define _GNU_SOURCE +#define _XOPEN_SOURCE 500 +#include <sched.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <assert.h> +#include <sys/io.h> + +#define BUFLEN 4096 + +static char wistmpfile[] = "/mnt/willitscale.XXXXXX"; + +char *testcase_description = "mremap"; + +char *buf; +char *newbuf = (char *)0x700000000000; +#define FILE_SIZE (4096*128) + +static void mdelay(int ms) +{ + int i; + + // gain io permission for the delay + assert(ioperm(0x80, 8, 1) == 0); + + for (i = 0; i < ms; i++) + inb(0x80); +} + +void testcase_prepare(void) +{ + int fd = mkstemp(wistmpfile); + + assert(fd >= 0); + assert(pwrite(fd, "X", 1, FILE_SIZE-1) == 1); + buf = mmap(NULL, FILE_SIZE, PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0); + assert(buf != (void *)-1); + close(fd); +} + +static volatile int step = 0; + +void testcase(unsigned long long *iterations) +{ + int cpu = sched_getcpu(); + int fd = open(wistmpfile, O_RDWR); + off_t offset = sched_getcpu() * BUFLEN; + long counterread = 0; + long *counterbuf = (void *)&buf[offset]; + assert(fd >= 0); + + printf("cpu%d runs\n", cpu); + + while (1) { + int ret; + + if (cpu == 0) { + void *tmpbuf; + + // wait for step 1 done + while (step < 1); + + // step 2: start mremap to have the old PTE emptied + printf("cpu%d: going to remap\n", cpu); + step = 2; + tmpbuf = mremap(buf, FILE_SIZE, FILE_SIZE, + MREMAP_FIXED | MREMAP_MAYMOVE, + newbuf); + assert(tmpbuf == newbuf); + printf("cpu%d: remap done\n", cpu); + pause(); + } + + // step 1: dirty the old PTE + *counterbuf = 1; + + step = 1; + while (step < 2); + + // step 3: clean this page + // delay a little while to give mremap some time + // to empty the old PTE and setup new PTE + mdelay(1000); + printf("cpu%d: going to clean the page\n", cpu); + posix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED); + + // step 4: now the page is cleaned, its new PTE is + // write protected but since mremap didn't flush tlb + // for the old PTE yet, we could still access the old + // addr and that will not dirty anything + printf("cpu%d: going to write 2\n", cpu); + *counterbuf = 2; + printf("cpu%d wrote 2\n", cpu); + + // step 5: drop this page from page cache and then + // read it back to verify if the last write gets lost + // munmap the page first, or the FADV_DONTNEED won't + // kick the page out of page cache + munmap(newbuf + offset, BUFLEN); + posix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED); + ret = pread(fd, &counterread, sizeof(counterread), offset); + assert(ret == sizeof(counterread)); + + if (counterread != 2) { + printf("*cpu%d wrote 2 gets lost\n", cpu); + fflush(stdout); + } + exit(0); + } +} + +void testcase_cleanup(void) +{ + unlink(wistmpfile); +} diff --git a/a/content_digest b/N1/content_digest index e7edd89..46240e8 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -288,5 +288,251 @@ "\01:2\0" "fn\00001-mremap-add-a-2s-delay-for-MAP_FIXED-case.patch\0" "b\0" + ">From c529dfa6bdfc643a9c3debb4b61b9b0c13b0862e Mon Sep 17 00:00:00 2001\n" + "From: Aaron Lu <aaron.lu@intel.com>\n" + "Date: Thu, 17 Nov 2016 15:11:08 +0800\n" + "Subject: [PATCH] mremap: add a 2s delay for MAP_FIXED case\n" + "\n" + "Add a 2s delay for MAP_FIXED case to enlarge the race window so that we\n" + "can hit the race in user space.\n" + "\n" + "Signed-off-by: Aaron Lu <aaron.lu@intel.com>\n" + "---\n" + " fs/exec.c | 2 +-\n" + " include/linux/mm.h | 2 +-\n" + " mm/mremap.c | 19 ++++++++++++-------\n" + " 3 files changed, 14 insertions(+), 9 deletions(-)\n" + "\n" + "diff --git a/fs/exec.c b/fs/exec.c\n" + "index 4e497b9ee71e..1e49ce9a23bd 100644\n" + "--- a/fs/exec.c\n" + "+++ b/fs/exec.c\n" + "@@ -619,7 +619,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)\n" + " \t * process cleanup to remove whatever mess we made.\n" + " \t */\n" + " \tif (length != move_page_tables(vma, old_start,\n" + "-\t\t\t\t vma, new_start, length, false))\n" + "+\t\t\t\t vma, new_start, length, false, false))\n" + " \t\treturn -ENOMEM;\n" + " \n" + " \tlru_add_drain();\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index a92c8d73aeaf..5e35fe3d914a 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -1392,7 +1392,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);\n" + " extern unsigned long move_page_tables(struct vm_area_struct *vma,\n" + " \t\tunsigned long old_addr, struct vm_area_struct *new_vma,\n" + " \t\tunsigned long new_addr, unsigned long len,\n" + "-\t\tbool need_rmap_locks);\n" + "+\t\tbool need_rmap_locks, bool delay);\n" + " extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,\n" + " \t\t\t unsigned long end, pgprot_t newprot,\n" + " \t\t\t int dirty_accountable, int prot_numa);\n" + "diff --git a/mm/mremap.c b/mm/mremap.c\n" + "index da22ad2a5678..8e35279ca622 100644\n" + "--- a/mm/mremap.c\n" + "+++ b/mm/mremap.c\n" + "@@ -22,6 +22,7 @@\n" + " #include <linux/mmu_notifier.h>\n" + " #include <linux/uaccess.h>\n" + " #include <linux/mm-arch-hooks.h>\n" + "+#include <linux/delay.h>\n" + " \n" + " #include <asm/cacheflush.h>\n" + " #include <asm/tlbflush.h>\n" + "@@ -166,7 +167,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,\n" + " unsigned long move_page_tables(struct vm_area_struct *vma,\n" + " \t\tunsigned long old_addr, struct vm_area_struct *new_vma,\n" + " \t\tunsigned long new_addr, unsigned long len,\n" + "-\t\tbool need_rmap_locks)\n" + "+\t\tbool need_rmap_locks, bool delay)\n" + " {\n" + " \tunsigned long extent, next, old_end;\n" + " \tpmd_t *old_pmd, *new_pmd;\n" + "@@ -224,8 +225,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma,\n" + " \t\t\t new_vma, new_pmd, new_addr, need_rmap_locks);\n" + " \t\tneed_flush = true;\n" + " \t}\n" + "-\tif (likely(need_flush))\n" + "+\tif (likely(need_flush)) {\n" + "+\t\tif (delay)\n" + "+\t\t\tmsleep(2000);\n" + " \t\tflush_tlb_range(vma, old_end-len, old_addr);\n" + "+\t}\n" + " \n" + " \tmmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);\n" + " \n" + "@@ -234,7 +238,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,\n" + " \n" + " static unsigned long move_vma(struct vm_area_struct *vma,\n" + " \t\tunsigned long old_addr, unsigned long old_len,\n" + "-\t\tunsigned long new_len, unsigned long new_addr, bool *locked)\n" + "+\t\tunsigned long new_len, unsigned long new_addr,\n" + "+\t\tbool *locked, bool delay)\n" + " {\n" + " \tstruct mm_struct *mm = vma->vm_mm;\n" + " \tstruct vm_area_struct *new_vma;\n" + "@@ -273,7 +278,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,\n" + " \t\treturn -ENOMEM;\n" + " \n" + " \tmoved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,\n" + "-\t\t\t\t need_rmap_locks);\n" + "+\t\t\t\t need_rmap_locks, delay);\n" + " \tif (moved_len < old_len) {\n" + " \t\terr = -ENOMEM;\n" + " \t} else if (vma->vm_ops && vma->vm_ops->mremap) {\n" + "@@ -287,7 +292,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,\n" + " \t\t * and then proceed to unmap new area instead of old.\n" + " \t\t */\n" + " \t\tmove_page_tables(new_vma, new_addr, vma, old_addr, moved_len,\n" + "-\t\t\t\t true);\n" + "+\t\t\t\t true, delay);\n" + " \t\tvma = new_vma;\n" + " \t\told_len = new_len;\n" + " \t\told_addr = new_addr;\n" + "@@ -442,7 +447,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,\n" + " \tif (offset_in_page(ret))\n" + " \t\tgoto out1;\n" + " \n" + "-\tret = move_vma(vma, addr, old_len, new_len, new_addr, locked);\n" + "+\tret = move_vma(vma, addr, old_len, new_len, new_addr, locked, true);\n" + " \tif (!(offset_in_page(ret)))\n" + " \t\tgoto out;\n" + " out1:\n" + "@@ -576,7 +581,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,\n" + " \t\t\tgoto out;\n" + " \t\t}\n" + " \n" + "-\t\tret = move_vma(vma, addr, old_len, new_len, new_addr, &locked);\n" + "+\t\tret = move_vma(vma, addr, old_len, new_len, new_addr, &locked, false);\n" + " \t}\n" + " out:\n" + " \tif (offset_in_page(ret)) {\n" + "-- \n" + 2.5.5 + "\01:3\0" + "fn\0raceremap.c\0" + "b\0" + "#define _GNU_SOURCE\n" + "#define _XOPEN_SOURCE 500\n" + "#include <sched.h>\n" + "#include <sys/mman.h>\n" + "#include <fcntl.h>\n" + "#include <stdio.h>\n" + "#include <stdlib.h>\n" + "#include <unistd.h>\n" + "#include <string.h>\n" + "#include <assert.h>\n" + "#include <sys/io.h>\n" + "\n" + "#define BUFLEN 4096\n" + "\n" + "static char wistmpfile[] = \"/mnt/willitscale.XXXXXX\";\n" + "\n" + "char *testcase_description = \"mremap\";\n" + "\n" + "char *buf;\n" + "char *newbuf = (char *)0x700000000000;\n" + "#define FILE_SIZE (4096*128)\n" + "\n" + "static void mdelay(int ms)\n" + "{\n" + "\tint i;\n" + "\n" + "\t// gain io permission for the delay\n" + "\tassert(ioperm(0x80, 8, 1) == 0);\n" + "\n" + "\tfor (i = 0; i < ms; i++)\n" + "\t\tinb(0x80);\n" + "}\n" + "\n" + "void testcase_prepare(void)\n" + "{\n" + "\tint fd = mkstemp(wistmpfile);\n" + "\n" + "\tassert(fd >= 0);\n" + "\tassert(pwrite(fd, \"X\", 1, FILE_SIZE-1) == 1);\n" + "\tbuf = mmap(NULL, FILE_SIZE, PROT_READ|PROT_WRITE,\n" + "\t\t\tMAP_SHARED, fd, 0);\n" + "\tassert(buf != (void *)-1);\n" + "\tclose(fd);\n" + "}\n" + "\n" + "static volatile int step = 0;\n" + "\n" + "void testcase(unsigned long long *iterations)\n" + "{\n" + "\tint cpu = sched_getcpu();\n" + "\tint fd = open(wistmpfile, O_RDWR);\n" + "\toff_t offset = sched_getcpu() * BUFLEN;\n" + "\tlong counterread = 0;\n" + "\tlong *counterbuf = (void *)&buf[offset];\n" + "\tassert(fd >= 0);\n" + "\n" + "\tprintf(\"cpu%d runs\\n\", cpu);\n" + "\n" + "\twhile (1) {\n" + "\t\tint ret;\n" + "\n" + "\t\tif (cpu == 0) {\n" + "\t\t\tvoid *tmpbuf;\n" + "\n" + "\t\t\t// wait for step 1 done\n" + "\t\t\twhile (step < 1);\n" + "\n" + "\t\t\t// step 2: start mremap to have the old PTE emptied\n" + "\t\t\tprintf(\"cpu%d: going to remap\\n\", cpu);\n" + "\t\t\tstep = 2;\n" + "\t\t\ttmpbuf = mremap(buf, FILE_SIZE, FILE_SIZE,\n" + "\t\t\t\t\tMREMAP_FIXED | MREMAP_MAYMOVE,\n" + "\t\t\t\t\tnewbuf);\n" + "\t\t\tassert(tmpbuf == newbuf);\n" + "\t\t\tprintf(\"cpu%d: remap done\\n\", cpu);\n" + "\t\t\tpause();\n" + "\t\t}\n" + "\n" + "\t\t// step 1: dirty the old PTE\n" + "\t\t*counterbuf = 1;\n" + "\n" + "\t\tstep = 1;\n" + "\t\twhile (step < 2);\n" + "\n" + "\t\t// step 3: clean this page\n" + "\t\t// delay a little while to give mremap some time\n" + "\t\t// to empty the old PTE and setup new PTE\n" + "\t\tmdelay(1000);\n" + "\t\tprintf(\"cpu%d: going to clean the page\\n\", cpu);\n" + "\t\tposix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED);\n" + "\n" + "\t\t// step 4: now the page is cleaned, its new PTE is\n" + "\t\t// write protected but since mremap didn't flush tlb\n" + "\t\t// for the old PTE yet, we could still access the old\n" + "\t\t// addr and that will not dirty anything\n" + "\t\tprintf(\"cpu%d: going to write 2\\n\", cpu);\n" + "\t\t*counterbuf = 2;\n" + "\t\tprintf(\"cpu%d wrote 2\\n\", cpu);\n" + "\n" + "\t\t// step 5: drop this page from page cache and then\n" + "\t\t// read it back to verify if the last write gets lost\n" + "\t\t// munmap the page first, or the FADV_DONTNEED won't\n" + "\t\t// kick the page out of page cache\n" + "\t\tmunmap(newbuf + offset, BUFLEN);\n" + "\t\tposix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED);\n" + "\t\tret = pread(fd, &counterread, sizeof(counterread), offset);\n" + "\t\tassert(ret == sizeof(counterread));\n" + "\n" + "\t\tif (counterread != 2) {\n" + "\t\t\tprintf(\"*cpu%d wrote 2 gets lost\\n\", cpu);\n" + "\t\t\tfflush(stdout);\n" + "\t\t}\n" + "\t\texit(0);\n" + "\t}\n" + "}\n" + "\n" + "void testcase_cleanup(void)\n" + "{\n" + "\tunlink(wistmpfile);\n" + } -2099d584f06473d669e209ac906b9b0a125b6a25e612b696af46e4d35335b643 +51e80e539a13dd3714a345190f5c6883fe92bd35081fc15501352fc50a9afc54
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.