* [PATCH] mm: Add MREMAP_DONTUNMAP to mremap(). @ 2020-01-23 1:46 Brian Geffon [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-27 4:46 ` [PATCH] " Dan Carpenter 0 siblings, 2 replies; 21+ messages in thread From: Brian Geffon @ 2020-01-23 1:46 UTC (permalink / raw) To: linux-mm-Bw31MaZKKs3YtjvyW6yDsg Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, Brian Geffon, Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA, Yu Zhao, Jesse Barnes MREMAP_DONTUNMAP is an additional flag that can be used with MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2) would then tear down the old vma so subsequent accesses to the vma cause a segfault. However, with this new flag it will keep the old vma with zapping PTEs so any access to the old VMA after that point will result in a pagefault. This feature will find a use in ChromeOS along with userfaultfd. Specifically we will want to register a VMA with userfaultfd and then pull it out from under a running process. By using MREMAP_DONTUNMAP we don't have to worry about mprotecting and then potentially racing with VMA permission changes from a running process. This feature also has a use case in Android, Lokesh Gidra has said that "As part of using userfaultfd for GC, We'll have to move the physical pages of the java heap to a separate location. For this purpose mremap will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java heap, its virtual mapping will be removed as well. Therefore, we'll require performing mmap immediately after. This is not only time consuming but also opens a time window where a native thread may call mmap and reserve the java heap's address range for its own usage. This flag solves the problem." Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> --- include/uapi/linux/mman.h | 5 +++-- mm/mremap.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index fc1a64c3447b..923cc162609c 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -5,8 +5,9 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 diff --git a/mm/mremap.c b/mm/mremap.c index 122938dcec15..bf97c3eb538b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, - bool *locked, struct vm_userfaultfd_ctx *uf, - struct list_head *uf_unmap) + bool *locked, unsigned long flags, + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_moved(vma); + if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { + if (vm_flags & VM_ACCOUNT) + vma->vm_flags |= VM_ACCOUNT; + + goto out; + } + if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { /* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } +out: if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; *locked = true; @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, - struct vm_userfaultfd_ctx *uf, + unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap_early, struct list_head *uf_unmap) { @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, old_len = new_len; } + /* + * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will + * check that we can expand by old_len and vma_to_resize will handle + * the vma growing. + */ + if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, + vma->vm_flags, old_len >> PAGE_SHIFT))) { + ret = -ENOMEM; + goto out; + } + vma = vma_to_resize(addr, old_len, new_len, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (IS_ERR_VALUE(ret)) goto out1; - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap); if (!(offset_in_page(ret))) goto out; @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, addr = untagged_addr(addr); new_addr = untagged_addr(new_addr); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) return ret; if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) return ret; + if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED)) + return ret; + if (offset_in_page(addr)) return ret; @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf, &uf_unmap_early, &uf_unmap); + &locked, flags, &uf, &uf_unmap_early, + &uf_unmap); goto out; } @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, &uf, &uf_unmap); + &locked, flags, &uf, &uf_unmap); } out: if (offset_in_page(ret)) { -- 2.25.0.341.g760bfbb309-goog ^ permalink raw reply related [flat|nested] 21+ messages in thread
[parent not found: <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> @ 2020-01-23 3:02 ` Andy Lutomirski 2020-01-23 19:03 ` Brian Geffon 2020-01-24 19:06 ` [PATCH v2] " Brian Geffon 2020-01-27 5:30 ` [PATCH v3] " Brian Geffon 2 siblings, 1 reply; 21+ messages in thread From: Andy Lutomirski @ 2020-01-23 3:02 UTC (permalink / raw) To: Brian Geffon Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA, Yu Zhao, Jesse Barnes > On Jan 22, 2020, at 5:46 PM, Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote: > > MREMAP_DONTUNMAP is an additional flag that can be used with > MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2) > would then tear down the old vma so subsequent accesses to the vma > cause a segfault. However, with this new flag it will keep the old > vma with zapping PTEs so any access to the old VMA after that point > will result in a pagefault. This needs a vastly better description. Perhaps: When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call. If a userfaultfd was watching the source, it will continue to watch the new mapping. For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail. Or is it something else? > > This feature will find a use in ChromeOS along with userfaultfd. > Specifically we will want to register a VMA with userfaultfd and then > pull it out from under a running process. By using MREMAP_DONTUNMAP we > don't have to worry about mprotecting and then potentially racing with > VMA permission changes from a running process. Does this mean you yank it out but you want to replace it simultaneously? > > This feature also has a use case in Android, Lokesh Gidra has said > that "As part of using userfaultfd for GC, We'll have to move the physical > pages of the java heap to a separate location. For this purpose mremap > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > heap, its virtual mapping will be removed as well. Therefore, we'll > require performing mmap immediately after. This is not only time consuming > but also opens a time window where a native thread may call mmap and > reserve the java heap's address range for its own usage. This flag > solves the problem." Cute. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-23 3:02 ` Andy Lutomirski @ 2020-01-23 19:03 ` Brian Geffon 0 siblings, 0 replies; 21+ messages in thread From: Brian Geffon @ 2020-01-23 19:03 UTC (permalink / raw) To: Andy Lutomirski Cc: linux-mm, Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra, LKML, linux-api, Yu Zhao, Jesse Barnes Andy, Thanks, yes, that's a much clearer description of the feature. I'll make sure to update the description with subsequent patches and with later man page updates. Brian On Wed, Jan 22, 2020 at 7:02 PM Andy Lutomirski <luto@amacapital.net> wrote: > > > > > On Jan 22, 2020, at 5:46 PM, Brian Geffon <bgeffon@google.com> wrote: > > > > MREMAP_DONTUNMAP is an additional flag that can be used with > > MREMAP_FIXED to move a mapping to a new address. Normally, mremap(2) > > would then tear down the old vma so subsequent accesses to the vma > > cause a segfault. However, with this new flag it will keep the old > > vma with zapping PTEs so any access to the old VMA after that point > > will result in a pagefault. > > This needs a vastly better description. Perhaps: > > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call. If a userfaultfd was watching the source, it will continue to watch the new mapping. For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail. > > Or is it something else? > > > > > This feature will find a use in ChromeOS along with userfaultfd. > > Specifically we will want to register a VMA with userfaultfd and then > > pull it out from under a running process. By using MREMAP_DONTUNMAP we > > don't have to worry about mprotecting and then potentially racing with > > VMA permission changes from a running process. > > Does this mean you yank it out but you want to replace it simultaneously? > > > > > This feature also has a use case in Android, Lokesh Gidra has said > > that "As part of using userfaultfd for GC, We'll have to move the physical > > pages of the java heap to a separate location. For this purpose mremap > > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > > heap, its virtual mapping will be removed as well. Therefore, we'll > > require performing mmap immediately after. This is not only time consuming > > but also opens a time window where a native thread may call mmap and > > reserve the java heap's address range for its own usage. This flag > > solves the problem." > > Cute. ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-23 3:02 ` Andy Lutomirski @ 2020-01-24 19:06 ` Brian Geffon [not found] ` <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-27 5:30 ` [PATCH v3] " Brian Geffon 2 siblings, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-01-24 19:06 UTC (permalink / raw) To: Andrew Morton Cc: Michael S . Tsirkin, Brian Geffon, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call. If a userfaultfd was watching the source, it will continue to watch the new mapping. For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is also used. The final result is two equally sized VMAs where the destination contains the PTEs of the source. We hope to use this in Chrome OS where with userfaultfd we could write an anonymous mapping to disk without having to STOP the process or worry about VMA permission changes. This feature also has a use case in Android, Lokesh Gidra has said that "As part of using userfaultfd for GC, We'll have to move the physical pages of the java heap to a separate location. For this purpose mremap will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java heap, its virtual mapping will be removed as well. Therefore, we'll require performing mmap immediately after. This is not only time consuming but also opens a time window where a native thread may call mmap and reserve the java heap's address range for its own usage. This flag solves the problem." Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> --- include/uapi/linux/mman.h | 5 +++-- mm/mremap.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index fc1a64c3447b..923cc162609c 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -5,8 +5,9 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 diff --git a/mm/mremap.c b/mm/mremap.c index 122938dcec15..bf97c3eb538b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, - bool *locked, struct vm_userfaultfd_ctx *uf, - struct list_head *uf_unmap) + bool *locked, unsigned long flags, + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_moved(vma); + if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { + if (vm_flags & VM_ACCOUNT) + vma->vm_flags |= VM_ACCOUNT; + + goto out; + } + if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { /* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } +out: if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; *locked = true; @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, - struct vm_userfaultfd_ctx *uf, + unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap_early, struct list_head *uf_unmap) { @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, old_len = new_len; } + /* + * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will + * check that we can expand by old_len and vma_to_resize will handle + * the vma growing. + */ + if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, + vma->vm_flags, old_len >> PAGE_SHIFT))) { + ret = -ENOMEM; + goto out; + } + vma = vma_to_resize(addr, old_len, new_len, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (IS_ERR_VALUE(ret)) goto out1; - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap); if (!(offset_in_page(ret))) goto out; @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, addr = untagged_addr(addr); new_addr = untagged_addr(new_addr); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) return ret; if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) return ret; + if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED)) + return ret; + if (offset_in_page(addr)) return ret; @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf, &uf_unmap_early, &uf_unmap); + &locked, flags, &uf, &uf_unmap_early, + &uf_unmap); goto out; } @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, &uf, &uf_unmap); + &locked, flags, &uf, &uf_unmap); } out: if (offset_in_page(ret)) { -- 2.25.0.341.g760bfbb309-goog ^ permalink raw reply related [flat|nested] 21+ messages in thread
[parent not found: <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> @ 2020-01-26 5:16 ` Nathan Chancellor 2020-01-27 2:21 ` Brian Geffon 2020-01-26 22:06 ` Kirill A. Shutemov 2020-01-27 10:13 ` Florian Weimer 2 siblings, 1 reply; 21+ messages in thread From: Nathan Chancellor @ 2020-01-26 5:16 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes, clang-built-linux-/JYPxA39Uh5TLH3MbocFFw Hi Brian, On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote: > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > set, the source mapping will not be removed. Instead it will be > cleared as if a brand new anonymous, private mapping had been created > atomically as part of the mremap() call. If a userfaultfd was watching > the source, it will continue to watch the new mapping. For a mapping > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > also used. The final result is two equally sized VMAs where the > destination contains the PTEs of the source. > > We hope to use this in Chrome OS where with userfaultfd we could write > an anonymous mapping to disk without having to STOP the process or worry > about VMA permission changes. > > This feature also has a use case in Android, Lokesh Gidra has said > that "As part of using userfaultfd for GC, We'll have to move the physical > pages of the java heap to a separate location. For this purpose mremap > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > heap, its virtual mapping will be removed as well. Therefore, we'll > require performing mmap immediately after. This is not only time consuming > but also opens a time window where a native thread may call mmap and > reserve the java heap's address range for its own usage. This flag > solves the problem." > > Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> > --- > include/uapi/linux/mman.h | 5 +++-- > mm/mremap.c | 37 ++++++++++++++++++++++++++++++------- > 2 files changed, 33 insertions(+), 9 deletions(-) > > diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h > index fc1a64c3447b..923cc162609c 100644 > --- a/include/uapi/linux/mman.h > +++ b/include/uapi/linux/mman.h > @@ -5,8 +5,9 @@ > #include <asm/mman.h> > #include <asm-generic/hugetlb_encode.h> > > -#define MREMAP_MAYMOVE 1 > -#define MREMAP_FIXED 2 > +#define MREMAP_MAYMOVE 1 > +#define MREMAP_FIXED 2 > +#define MREMAP_DONTUNMAP 4 > > #define OVERCOMMIT_GUESS 0 > #define OVERCOMMIT_ALWAYS 1 > diff --git a/mm/mremap.c b/mm/mremap.c > index 122938dcec15..bf97c3eb538b 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > static unsigned long move_vma(struct vm_area_struct *vma, > unsigned long old_addr, unsigned long old_len, > unsigned long new_len, unsigned long new_addr, > - bool *locked, struct vm_userfaultfd_ctx *uf, > - struct list_head *uf_unmap) > + bool *locked, unsigned long flags, > + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) > { > struct mm_struct *mm = vma->vm_mm; > struct vm_area_struct *new_vma; > @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma, > if (unlikely(vma->vm_flags & VM_PFNMAP)) > untrack_pfn_moved(vma); > > + if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { > + if (vm_flags & VM_ACCOUNT) > + vma->vm_flags |= VM_ACCOUNT; > + > + goto out; > + } > + > if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { > /* OOM: unable to split vma, just get accounts right */ > vm_unacct_memory(excess >> PAGE_SHIFT); > @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, > vma->vm_next->vm_flags |= VM_ACCOUNT; > } > > +out: > if (vm_flags & VM_LOCKED) { > mm->locked_vm += new_len >> PAGE_SHIFT; > *locked = true; > @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, > > static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > unsigned long new_addr, unsigned long new_len, bool *locked, > - struct vm_userfaultfd_ctx *uf, > + unsigned long flags, struct vm_userfaultfd_ctx *uf, > struct list_head *uf_unmap_early, > struct list_head *uf_unmap) > { > @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > old_len = new_len; > } > > + /* > + * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will > + * check that we can expand by old_len and vma_to_resize will handle > + * the vma growing. > + */ > + if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, > + vma->vm_flags, old_len >> PAGE_SHIFT))) { We received a Clang build report that vma is used uninitialized here (they aren't being publicly sent to LKML due to GCC vs Clang warning/error overlap): https://groups.google.com/d/msg/clang-built-linux/gE5wRaeHdSI/xVA0MBQVEgAJ Sure enough, vma is initialized first in the next block. Not sure if this section should be moved below that initialization or if something else should be done to resolve it but that dereference will obviously be fatal. Cheers, Nathan > + ret = -ENOMEM; > + goto out; > + } > + > vma = vma_to_resize(addr, old_len, new_len, &charged); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > if (IS_ERR_VALUE(ret)) > goto out1; > > - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, > + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, > uf_unmap); > if (!(offset_in_page(ret))) > goto out; > @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > addr = untagged_addr(addr); > new_addr = untagged_addr(new_addr); > > - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) > + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) > return ret; > > if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) > return ret; > > + if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED)) > + return ret; > + > if (offset_in_page(addr)) > return ret; > > @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > > if (flags & MREMAP_FIXED) { > ret = mremap_to(addr, old_len, new_addr, new_len, > - &locked, &uf, &uf_unmap_early, &uf_unmap); > + &locked, flags, &uf, &uf_unmap_early, > + &uf_unmap); > goto out; > } > > @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > } > > ret = move_vma(vma, addr, old_len, new_len, new_addr, > - &locked, &uf, &uf_unmap); > + &locked, flags, &uf, &uf_unmap); > } > out: > if (offset_in_page(ret)) { > -- > 2.25.0.341.g760bfbb309-goog > ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-26 5:16 ` Nathan Chancellor @ 2020-01-27 2:21 ` Brian Geffon 0 siblings, 0 replies; 21+ messages in thread From: Brian Geffon @ 2020-01-27 2:21 UTC (permalink / raw) To: Nathan Chancellor Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes, clang-built-linux-/JYPxA39Uh5TLH3MbocFFw Hi Nathan, Thank you! That was an oversight on my part. I'll address it in the next patch. Brian On Sat, Jan 25, 2020 at 9:16 PM Nathan Chancellor <natechancellor-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote: > > Hi Brian, > > On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote: > > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > > set, the source mapping will not be removed. Instead it will be > > cleared as if a brand new anonymous, private mapping had been created > > atomically as part of the mremap() call. If a userfaultfd was watching > > the source, it will continue to watch the new mapping. For a mapping > > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > > also used. The final result is two equally sized VMAs where the > > destination contains the PTEs of the source. > > > > We hope to use this in Chrome OS where with userfaultfd we could write > > an anonymous mapping to disk without having to STOP the process or worry > > about VMA permission changes. > > > > This feature also has a use case in Android, Lokesh Gidra has said > > that "As part of using userfaultfd for GC, We'll have to move the physical > > pages of the java heap to a separate location. For this purpose mremap > > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > > heap, its virtual mapping will be removed as well. Therefore, we'll > > require performing mmap immediately after. This is not only time consuming > > but also opens a time window where a native thread may call mmap and > > reserve the java heap's address range for its own usage. This flag > > solves the problem." > > > > Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> > > --- > > include/uapi/linux/mman.h | 5 +++-- > > mm/mremap.c | 37 ++++++++++++++++++++++++++++++------- > > 2 files changed, 33 insertions(+), 9 deletions(-) > > > > diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h > > index fc1a64c3447b..923cc162609c 100644 > > --- a/include/uapi/linux/mman.h > > +++ b/include/uapi/linux/mman.h > > @@ -5,8 +5,9 @@ > > #include <asm/mman.h> > > #include <asm-generic/hugetlb_encode.h> > > > > -#define MREMAP_MAYMOVE 1 > > -#define MREMAP_FIXED 2 > > +#define MREMAP_MAYMOVE 1 > > +#define MREMAP_FIXED 2 > > +#define MREMAP_DONTUNMAP 4 > > > > #define OVERCOMMIT_GUESS 0 > > #define OVERCOMMIT_ALWAYS 1 > > diff --git a/mm/mremap.c b/mm/mremap.c > > index 122938dcec15..bf97c3eb538b 100644 > > --- a/mm/mremap.c > > +++ b/mm/mremap.c > > @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > > static unsigned long move_vma(struct vm_area_struct *vma, > > unsigned long old_addr, unsigned long old_len, > > unsigned long new_len, unsigned long new_addr, > > - bool *locked, struct vm_userfaultfd_ctx *uf, > > - struct list_head *uf_unmap) > > + bool *locked, unsigned long flags, > > + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) > > { > > struct mm_struct *mm = vma->vm_mm; > > struct vm_area_struct *new_vma; > > @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma, > > if (unlikely(vma->vm_flags & VM_PFNMAP)) > > untrack_pfn_moved(vma); > > > > + if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { > > + if (vm_flags & VM_ACCOUNT) > > + vma->vm_flags |= VM_ACCOUNT; > > + > > + goto out; > > + } > > + > > if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { > > /* OOM: unable to split vma, just get accounts right */ > > vm_unacct_memory(excess >> PAGE_SHIFT); > > @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, > > vma->vm_next->vm_flags |= VM_ACCOUNT; > > } > > > > +out: > > if (vm_flags & VM_LOCKED) { > > mm->locked_vm += new_len >> PAGE_SHIFT; > > *locked = true; > > @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, > > > > static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > > unsigned long new_addr, unsigned long new_len, bool *locked, > > - struct vm_userfaultfd_ctx *uf, > > + unsigned long flags, struct vm_userfaultfd_ctx *uf, > > struct list_head *uf_unmap_early, > > struct list_head *uf_unmap) > > { > > @@ -545,6 +553,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > > old_len = new_len; > > } > > > > + /* > > + * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will > > + * check that we can expand by old_len and vma_to_resize will handle > > + * the vma growing. > > + */ > > + if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, > > + vma->vm_flags, old_len >> PAGE_SHIFT))) { > > We received a Clang build report that vma is used uninitialized here > (they aren't being publicly sent to LKML due to GCC vs Clang > warning/error overlap): > > https://groups.google.com/d/msg/clang-built-linux/gE5wRaeHdSI/xVA0MBQVEgAJ > > Sure enough, vma is initialized first in the next block. Not sure if > this section should be moved below that initialization or if something > else should be done to resolve it but that dereference will obviously be > fatal. > > Cheers, > Nathan > > > + ret = -ENOMEM; > > + goto out; > > + } > > + > > vma = vma_to_resize(addr, old_len, new_len, &charged); > > if (IS_ERR(vma)) { > > ret = PTR_ERR(vma); > > @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, > > if (IS_ERR_VALUE(ret)) > > goto out1; > > > > - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, > > + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, > > uf_unmap); > > if (!(offset_in_page(ret))) > > goto out; > > @@ -609,12 +628,15 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > > addr = untagged_addr(addr); > > new_addr = untagged_addr(new_addr); > > > > - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) > > + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) > > return ret; > > > > if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) > > return ret; > > > > + if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED)) > > + return ret; > > + > > if (offset_in_page(addr)) > > return ret; > > > > @@ -634,7 +656,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > > > > if (flags & MREMAP_FIXED) { > > ret = mremap_to(addr, old_len, new_addr, new_len, > > - &locked, &uf, &uf_unmap_early, &uf_unmap); > > + &locked, flags, &uf, &uf_unmap_early, > > + &uf_unmap); > > goto out; > > } > > > > @@ -712,7 +735,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > > } > > > > ret = move_vma(vma, addr, old_len, new_len, new_addr, > > - &locked, &uf, &uf_unmap); > > + &locked, flags, &uf, &uf_unmap); > > } > > out: > > if (offset_in_page(ret)) { > > -- > > 2.25.0.341.g760bfbb309-goog > > ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-26 5:16 ` Nathan Chancellor @ 2020-01-26 22:06 ` Kirill A. Shutemov 2020-01-28 1:35 ` Brian Geffon 2020-01-27 10:13 ` Florian Weimer 2 siblings, 1 reply; 21+ messages in thread From: Kirill A. Shutemov @ 2020-01-26 22:06 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote: > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > set, the source mapping will not be removed. Instead it will be > cleared as if a brand new anonymous, private mapping had been created > atomically as part of the mremap() call. If a userfaultfd was watching > the source, it will continue to watch the new mapping. For a mapping > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > also used. Implies? From code it looks like it requires MREMAP_FIXED. And MREMAP_FIXED requires MREMAP_MAYMOVE. That's strange flag chaining. I don't really see need in such dependencies. It maybe indeed implied, not requied. > The final result is two equally sized VMAs where the > destination contains the PTEs of the source. Could you clarify rmap situation here? How the rmap hierarchy will look like before and after the operation. Can the new VMA merge with the old one? Sounds fishy to me. > We hope to use this in Chrome OS where with userfaultfd we could write > an anonymous mapping to disk without having to STOP the process or worry > about VMA permission changes. > > This feature also has a use case in Android, Lokesh Gidra has said > that "As part of using userfaultfd for GC, We'll have to move the physical > pages of the java heap to a separate location. For this purpose mremap > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > heap, its virtual mapping will be removed as well. Therefore, we'll > require performing mmap immediately after. This is not only time consuming > but also opens a time window where a native thread may call mmap and > reserve the java heap's address range for its own usage. This flag > solves the problem." -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-26 22:06 ` Kirill A. Shutemov @ 2020-01-28 1:35 ` Brian Geffon [not found] ` <CADyq12xCK_3MhGi88Am5P6DVZvrW8vqtyJMHO0zjNhvhYegm1w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-01-28 1:35 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes Hi Kirill, Thanks for taking the time to look at this. I'll update the wording to make it clear that MREMAP_FIXED is required with MREMAP_DONTUNMAP. Regarding rmap, you're completely right I'm going to roll a new patch which will call unlink_anon_vmas() to make sure the rmap is correct, I'll also explicitly check that the vma is anonymous and not shared returning EINVAL if not, how does that sound? Brian On Sun, Jan 26, 2020 at 2:06 PM Kirill A. Shutemov <kirill@shutemov.name> wrote: > > On Fri, Jan 24, 2020 at 11:06:25AM -0800, Brian Geffon wrote: > > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > > set, the source mapping will not be removed. Instead it will be > > cleared as if a brand new anonymous, private mapping had been created > > atomically as part of the mremap() call. If a userfaultfd was watching > > the source, it will continue to watch the new mapping. For a mapping > > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > > also used. > > Implies? From code it looks like it requires MREMAP_FIXED. And > MREMAP_FIXED requires MREMAP_MAYMOVE. That's strange flag chaining. > I don't really see need in such dependencies. It maybe indeed implied, not > requied. > > > The final result is two equally sized VMAs where the > > destination contains the PTEs of the source. > > Could you clarify rmap situation here? How the rmap hierarchy will look > like before and after the operation. Can the new VMA merge with the old > one? Sounds fishy to me. > > > We hope to use this in Chrome OS where with userfaultfd we could write > > an anonymous mapping to disk without having to STOP the process or worry > > about VMA permission changes. > > > > This feature also has a use case in Android, Lokesh Gidra has said > > that "As part of using userfaultfd for GC, We'll have to move the physical > > pages of the java heap to a separate location. For this purpose mremap > > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > > heap, its virtual mapping will be removed as well. Therefore, we'll > > require performing mmap immediately after. This is not only time consuming > > but also opens a time window where a native thread may call mmap and > > reserve the java heap's address range for its own usage. This flag > > solves the problem." > > -- > Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <CADyq12xCK_3MhGi88Am5P6DVZvrW8vqtyJMHO0zjNhvhYegm1w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <CADyq12xCK_3MhGi88Am5P6DVZvrW8vqtyJMHO0zjNhvhYegm1w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2020-01-29 10:46 ` Kirill A. Shutemov 2020-02-01 21:03 ` Brian Geffon 2020-02-02 4:17 ` Brian Geffon 0 siblings, 2 replies; 21+ messages in thread From: Kirill A. Shutemov @ 2020-01-29 10:46 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes On Mon, Jan 27, 2020 at 05:35:40PM -0800, Brian Geffon wrote: > Hi Kirill, > Thanks for taking the time to look at this. I'll update the wording to > make it clear that MREMAP_FIXED is required with MREMAP_DONTUNMAP. I still think that chaining flags is strange. The new flag requires all existing. And based on the use case you probably don't really need 'fixed' semantics all the time. The user should be fine with moving the mapping *somewhere*, not neccessary to the given address. BTW, name of the flag is confusing. My initial reaction was that it is variant of MREMAP_FIXED that does't anything at the target address. Like MAP_FIXED vs. MAP_FIXED_NOREPLACE. Any better options for the flag name? (I have none) > Regarding rmap, you're completely right I'm going to roll a new patch > which will call unlink_anon_vmas() to make sure the rmap is correct, > I'll also explicitly check that the vma is anonymous and not shared > returning EINVAL if not, how does that sound? Fine, I guess. But let me see the end result. -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-29 10:46 ` Kirill A. Shutemov @ 2020-02-01 21:03 ` Brian Geffon 2020-02-02 4:17 ` Brian Geffon 1 sibling, 0 replies; 21+ messages in thread From: Brian Geffon @ 2020-02-01 21:03 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes Hi Kirill, On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov <kirill-oKw7cIdHH8eLwutG50LtGA@public.gmane.org> wrote: > And based on the use case you probably don't really need 'fixed' > semantics all the time. The user should be fine with moving the mapping > *somewhere*, not neccessary to the given address This is true and and it simplifies things a bit as for the outlined use cases the user would not be required to mmap the destination before hand. Part of the reason I chose to require MREMAP_FIXED was because mremap need not move the mapping if it can shrink/grow in place and it seemed a bit awkward to have "MUSTMOVE" behavior without MAP_FIXED. I'll make this change to drop the requirement on MREMAP_FIXED in my next patch. > BTW, name of the flag is confusing. My initial reaction was that it is > variant of MREMAP_FIXED that does't anything at the target address. > Like MAP_FIXED vs. MAP_FIXED_NOREPLACE. > > Any better options for the flag name? (I have none) I see your point. Perhaps MREMAP_MOVEPAGES or MREMAP_KEEP_SOURCE? I struggle to come up with a single name that encapsulates this behavior but I'll try to think of other ideas before I mail the next patch. Given that we will drop the requirement on MREMAP_FIXED, perhaps MOVEPAGES is the better option as it captures that the mapping WILL be moved? Thanks again for taking the time to look at this. Best, Brian ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-29 10:46 ` Kirill A. Shutemov 2020-02-01 21:03 ` Brian Geffon @ 2020-02-02 4:17 ` Brian Geffon 2020-02-03 13:09 ` Kirill A. Shutemov 1 sibling, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-02-02 4:17 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov <kirill@shutemov.name> wrote: > Any better options for the flag name? (I have none) The other option is that it's broken up into two new flags the first MREMAP_MUSTMOVE which can be used regardless of whether or not you're leaving the original mapping mapped. This would do exactly what it describes: move the mapping to a new address with or without MREMAP_FIXED, this keeps consistency with MAYMOVE. The second flag would be the new MREMAP_DONTUNMAP flag which requires MREMAP_MUSTMOVE, again with or without MREMAP_FIXED. What are your thoughts on this? ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-02-02 4:17 ` Brian Geffon @ 2020-02-03 13:09 ` Kirill A. Shutemov 2020-02-07 20:42 ` Brian Geffon 0 siblings, 1 reply; 21+ messages in thread From: Kirill A. Shutemov @ 2020-02-03 13:09 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes On Sun, Feb 02, 2020 at 05:17:53AM +0100, Brian Geffon wrote: > On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov > <kirill@shutemov.name> wrote: > > Any better options for the flag name? (I have none) > > The other option is that it's broken up into two new flags the first > MREMAP_MUSTMOVE which can be used regardless of whether or not you're > leaving the original mapping mapped. This would do exactly what it > describes: move the mapping to a new address with or without > MREMAP_FIXED, this keeps consistency with MAYMOVE. > > The second flag would be the new MREMAP_DONTUNMAP flag which requires > MREMAP_MUSTMOVE, again with or without MREMAP_FIXED. > > What are your thoughts on this? Sounds reasonable. MREMAP_DONTUNMAP doesn't really convey that you move pages to the new mapping, but leave empty mapping behind. But I guess there's only so much you can encode into the name. (Patch to the man page should do the rest) -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-02-03 13:09 ` Kirill A. Shutemov @ 2020-02-07 20:42 ` Brian Geffon [not found] ` <CADyq12x98QspiWSqNui1OH8+FEUzVyJwxia+ho00S2+Q+PmTjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-02-07 20:42 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes Hi Kirill, I started a new thread https://lkml.org/lkml/2020/2/7/640 for my v4 patch. But I wanted to quickly address your comments. Regarding the concern around the rmap, no changes actually need to be made. If we were to unlink_anon_vma(vma) and then set vma->anon_vma = NULL, that would be fine but then as soon as there was a fault the same anon_vma would be attached since it's a private anonymous mapping. So there is really nothing to do regarding the rmap. I considered the two flag approach but since I could not come up with a concrete use case of MREMAP_MUSTMOVE I decided to just leave the single MREMAP_DONTUNMAP flag, the two flag approach would be only for clarifying the operations so I'm not sure it's worth it. (Still trying to come up with a better name). But I've attached a man page diff to the latest patch. Thanks, Brian On Mon, Feb 3, 2020 at 5:09 AM Kirill A. Shutemov <kirill@shutemov.name> wrote: > > On Sun, Feb 02, 2020 at 05:17:53AM +0100, Brian Geffon wrote: > > On Wed, Jan 29, 2020 at 11:46 AM Kirill A. Shutemov > > <kirill@shutemov.name> wrote: > > > Any better options for the flag name? (I have none) > > > > The other option is that it's broken up into two new flags the first > > MREMAP_MUSTMOVE which can be used regardless of whether or not you're > > leaving the original mapping mapped. This would do exactly what it > > describes: move the mapping to a new address with or without > > MREMAP_FIXED, this keeps consistency with MAYMOVE. > > > > The second flag would be the new MREMAP_DONTUNMAP flag which requires > > MREMAP_MUSTMOVE, again with or without MREMAP_FIXED. > > > > What are your thoughts on this? > > Sounds reasonable. > > MREMAP_DONTUNMAP doesn't really convey that you move pages to the new > mapping, but leave empty mapping behind. But I guess there's only so much > you can encode into the name. (Patch to the man page should do the rest) > > -- > Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <CADyq12x98QspiWSqNui1OH8+FEUzVyJwxia+ho00S2+Q+PmTjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <CADyq12x98QspiWSqNui1OH8+FEUzVyJwxia+ho00S2+Q+PmTjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2020-02-10 10:35 ` Kirill A. Shutemov 0 siblings, 0 replies; 21+ messages in thread From: Kirill A. Shutemov @ 2020-02-10 10:35 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes On Fri, Feb 07, 2020 at 12:42:15PM -0800, Brian Geffon wrote: > Hi Kirill, > I started a new thread https://lkml.org/lkml/2020/2/7/640 for my v4 > patch. But I wanted to quickly address your comments. Regarding the > concern around the rmap, no changes actually need to be made. If we > were to unlink_anon_vma(vma) and then set vma->anon_vma = NULL, that > would be fine but then as soon as there was a fault the same anon_vma > would be attached since it's a private anonymous mapping. So there is > really nothing to do regarding the rmap. Okay. My worry was that we create a new VMA with the same anon_vma *and* vm_pgoff, but I just realized we can do the same with the current mremap(2) plus following mmap(2) in the old place. So it's not regression. I guess we are fine here. > I considered the two flag approach but since I could not come up with > a concrete use case of MREMAP_MUSTMOVE I decided to just leave the > single MREMAP_DONTUNMAP flag, the two flag approach would be only for > clarifying the operations so I'm not sure it's worth it. (Still trying > to come up with a better name). But I've attached a man page diff to > the latest patch. At least it doesn't have 'FIXED' semantics forced on user. It's fine with me. -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-26 5:16 ` Nathan Chancellor 2020-01-26 22:06 ` Kirill A. Shutemov @ 2020-01-27 10:13 ` Florian Weimer [not found] ` <87imkxxl5d.fsf-fjB847h8rq1N9UpBYOmNkhcY2uh10dtjAL8bYrjMMd8@public.gmane.org> 2 siblings, 1 reply; 21+ messages in thread From: Florian Weimer @ 2020-01-27 10:13 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes * Brian Geffon: > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > set, the source mapping will not be removed. Instead it will be > cleared as if a brand new anonymous, private mapping had been created > atomically as part of the mremap() call. If a userfaultfd was watching > the source, it will continue to watch the new mapping. For a mapping > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > also used. The final result is two equally sized VMAs where the > destination contains the PTEs of the source. What will be the protection flags of the source mapping? Will they remain unchanged? Or PROT_NONE? Thanks, Florian ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <87imkxxl5d.fsf-fjB847h8rq1N9UpBYOmNkhcY2uh10dtjAL8bYrjMMd8@public.gmane.org>]
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <87imkxxl5d.fsf-fjB847h8rq1N9UpBYOmNkhcY2uh10dtjAL8bYrjMMd8@public.gmane.org> @ 2020-01-27 22:33 ` Brian Geffon [not found] ` <CADyq12xCpTzLpYC16FjnM60tHhCfnccNfg6JJuqcBd_6ACDGcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-01-27 22:33 UTC (permalink / raw) To: Florian Weimer Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes Hi Florian, copy_vma will make a copy of the existing VMA leaving the old VMA unchanged, so the source keeps its existing protections, this is what makes it very useful along with userfaultfd. Thanks, Brian On Mon, Jan 27, 2020 at 2:13 AM Florian Weimer <fweimer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote: > > * Brian Geffon: > > > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > > set, the source mapping will not be removed. Instead it will be > > cleared as if a brand new anonymous, private mapping had been created > > atomically as part of the mremap() call. If a userfaultfd was watching > > the source, it will continue to watch the new mapping. For a mapping > > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > > mremap() call to fail. MREMAP_DONTUNMAP implies that MREMAP_FIXED is > > also used. The final result is two equally sized VMAs where the > > destination contains the PTEs of the source. > > What will be the protection flags of the source mapping? Will they > remain unchanged? Or PROT_NONE? > > Thanks, > Florian > ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <CADyq12xCpTzLpYC16FjnM60tHhCfnccNfg6JJuqcBd_6ACDGcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH v2] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <CADyq12xCpTzLpYC16FjnM60tHhCfnccNfg6JJuqcBd_6ACDGcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2020-01-30 12:19 ` Florian Weimer 0 siblings, 0 replies; 21+ messages in thread From: Florian Weimer @ 2020-01-30 12:19 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, LKML, linux-mm, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes * Brian Geffon: > Hi Florian, > copy_vma will make a copy of the existing VMA leaving the old VMA > unchanged, so the source keeps its existing protections, this is what > makes it very useful along with userfaultfd. I see. On the other hand, it's impossible to get the PROT_NONE behavior by a subsequent mprotect call because the mremap has to fail in some cases in vm.overcommit_memory=2 mode. But maybe that other behavior can be provided with a different flag if it turns out to be useful in the future. Thanks, Florian ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-23 3:02 ` Andy Lutomirski 2020-01-24 19:06 ` [PATCH v2] " Brian Geffon @ 2020-01-27 5:30 ` Brian Geffon [not found] ` <20200127053056.213679-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2 siblings, 1 reply; 21+ messages in thread From: Brian Geffon @ 2020-01-27 5:30 UTC (permalink / raw) To: Andrew Morton Cc: Michael S . Tsirkin, Brian Geffon, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes, Nathan Chancellor When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. Instead it will be cleared as if a brand new anonymous, private mapping had been created atomically as part of the mremap() call. If a userfaultfd was watching the source, it will continue to watch the new mapping. For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is also used. The final result is two equally sized VMAs where the destination contains the PTEs of the source. We hope to use this in Chrome OS where with userfaultfd we could write an anonymous mapping to disk without having to STOP the process or worry about VMA permission changes. This feature also has a use case in Android, Lokesh Gidra has said that "As part of using userfaultfd for GC, We'll have to move the physical pages of the java heap to a separate location. For this purpose mremap will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java heap, its virtual mapping will be removed as well. Therefore, we'll require performing mmap immediately after. This is not only time consuming but also opens a time window where a native thread may call mmap and reserve the java heap's address range for its own usage. This flag solves the problem." Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> --- include/uapi/linux/mman.h | 5 +++-- mm/mremap.c | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index fc1a64c3447b..923cc162609c 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -5,8 +5,9 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 diff --git a/mm/mremap.c b/mm/mremap.c index 122938dcec15..1d164e5fdff0 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, - bool *locked, struct vm_userfaultfd_ctx *uf, - struct list_head *uf_unmap) + bool *locked, unsigned long flags, + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; @@ -408,6 +408,13 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_moved(vma); + if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { + if (vm_flags & VM_ACCOUNT) + vma->vm_flags |= VM_ACCOUNT; + + goto out; + } + if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { /* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); @@ -422,6 +429,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } +out: if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; *locked = true; @@ -497,7 +505,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, - struct vm_userfaultfd_ctx *uf, + unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap_early, struct list_head *uf_unmap) { @@ -551,6 +559,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, goto out; } + /* + * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will + * check that we can expand by old_len and vma_to_resize will handle + * the vma growing. + */ + if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, + vma->vm_flags, old_len >> PAGE_SHIFT))) { + ret = -ENOMEM; + goto out; + } + map_flags = MAP_FIXED; if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; @@ -561,7 +580,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (IS_ERR_VALUE(ret)) goto out1; - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap); if (!(offset_in_page(ret))) goto out; @@ -609,12 +628,16 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, addr = untagged_addr(addr); new_addr = untagged_addr(new_addr); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) { return ret; + } if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) return ret; + if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_FIXED)) + return ret; + if (offset_in_page(addr)) return ret; @@ -634,7 +657,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf, &uf_unmap_early, &uf_unmap); + &locked, flags, &uf, &uf_unmap_early, + &uf_unmap); goto out; } @@ -712,7 +736,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, &uf, &uf_unmap); + &locked, flags, &uf, &uf_unmap); } out: if (offset_in_page(ret)) { -- 2.25.0.341.g760bfbb309-goog ^ permalink raw reply related [flat|nested] 21+ messages in thread
[parent not found: <20200127053056.213679-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap(). [not found] ` <20200127053056.213679-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> @ 2020-01-28 15:26 ` Will Deacon 2020-01-30 10:12 ` Will Deacon 0 siblings, 1 reply; 21+ messages in thread From: Will Deacon @ 2020-01-28 15:26 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes, Nathan Chancellor Hi Brian, On Sun, Jan 26, 2020 at 09:30:56PM -0800, Brian Geffon wrote: > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > set, the source mapping will not be removed. Instead it will be > cleared as if a brand new anonymous, private mapping had been created > atomically as part of the mremap() call. If a userfaultfd was watching > the source, it will continue to watch the new mapping. For a mapping > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is > also used. The final result is two equally sized VMAs where the > destination contains the PTEs of the source. > > We hope to use this in Chrome OS where with userfaultfd we could write > an anonymous mapping to disk without having to STOP the process or worry > about VMA permission changes. > > This feature also has a use case in Android, Lokesh Gidra has said > that "As part of using userfaultfd for GC, We'll have to move the physical > pages of the java heap to a separate location. For this purpose mremap > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > heap, its virtual mapping will be removed as well. Therefore, we'll > require performing mmap immediately after. This is not only time consuming > but also opens a time window where a native thread may call mmap and > reserve the java heap's address range for its own usage. This flag > solves the problem." Hmm, this sounds like you're dealing with a multi-threaded environment, yet your change only supports private mappings. How does that work? It's also worrying because, with two private mappings of the same anonymous memory live simultaneously, you run the risk of hitting D-cache aliasing issues on some architectures and losing coherency between them as a result (even in a single-threaded scenario). Is userspace just supposed to deal with this, or should we be enforcing SHMLBA alignment? > Signed-off-by: Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> > --- > include/uapi/linux/mman.h | 5 +++-- > mm/mremap.c | 38 +++++++++++++++++++++++++++++++------- > 2 files changed, 34 insertions(+), 9 deletions(-) Could you also a include a patch to update the mremap man page, please? https://www.kernel.org/doc/man-pages/patches.html Cheers, Will ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v3] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-28 15:26 ` Will Deacon @ 2020-01-30 10:12 ` Will Deacon 0 siblings, 0 replies; 21+ messages in thread From: Will Deacon @ 2020-01-30 10:12 UTC (permalink / raw) To: Brian Geffon Cc: Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski, Andrea Arcangeli, Sonny Rao, Minchan Kim, Joel Fernandes, Yu Zhao, Jesse Barnes, Nathan Chancellor On Tue, Jan 28, 2020 at 03:26:41PM +0000, Will Deacon wrote: > On Sun, Jan 26, 2020 at 09:30:56PM -0800, Brian Geffon wrote: > > When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is > > set, the source mapping will not be removed. Instead it will be > > cleared as if a brand new anonymous, private mapping had been created > > atomically as part of the mremap() call. If a userfaultfd was watching > > the source, it will continue to watch the new mapping. For a mapping > > that is shared or not anonymous, MREMAP_DONTUNMAP will cause the > > mremap() call to fail. MREMAP_DONTUNMAP requires that MREMAP_FIXED is > > also used. The final result is two equally sized VMAs where the > > destination contains the PTEs of the source. > > > > We hope to use this in Chrome OS where with userfaultfd we could write > > an anonymous mapping to disk without having to STOP the process or worry > > about VMA permission changes. > > > > This feature also has a use case in Android, Lokesh Gidra has said > > that "As part of using userfaultfd for GC, We'll have to move the physical > > pages of the java heap to a separate location. For this purpose mremap > > will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java > > heap, its virtual mapping will be removed as well. Therefore, we'll > > require performing mmap immediately after. This is not only time consuming > > but also opens a time window where a native thread may call mmap and > > reserve the java heap's address range for its own usage. This flag > > solves the problem." > > Hmm, this sounds like you're dealing with a multi-threaded environment, > yet your change only supports private mappings. How does that work? Sorry, this was badly worded. I was trying to understand how the GC is implememented, and whether everything was part of the same process or if things like memfds or something else were being used to share memory. Having spoken to Brian off-list, it's all one process... > It's also worrying because, with two private mappings of the same anonymous > memory live simultaneously, you run the risk of hitting D-cache aliasing > issues on some architectures and losing coherency between them as a result > (even in a single-threaded scenario). Is userspace just supposed to deal > with this, or should we be enforcing SHMLBA alignment? ... and this was me completely misreading the patch. The old mapping is still torn down, but then replaced with a new private mapping rather than being unmapped. However, looks like there are some issues handling shared mappings with this patch (and possibly mlock()?), so I'll wait for a new spin. Will ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] mm: Add MREMAP_DONTUNMAP to mremap(). 2020-01-23 1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> @ 2020-01-27 4:46 ` Dan Carpenter 1 sibling, 0 replies; 21+ messages in thread From: Dan Carpenter @ 2020-01-27 4:46 UTC (permalink / raw) To: kbuild Cc: kbuild-all, linux-mm, Andrew Morton, Michael S . Tsirkin, Arnd Bergmann, Brian Geffon, Sonny Rao, Minchan Kim, Joel Fernandes, Lokesh Gidra, linux-kernel, linux-api, Yu Zhao, Jesse Barnes Hi Brian, url: https://github.com/0day-ci/linux/commits/Brian-Geffon/mm-Add-MREMAP_DONTUNMAP-to-mremap/20200125-013342 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 4703d9119972bf586d2cca76ec6438f819ffa30e If you fix the issue, kindly add following tag Reported-by: kbuild test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> smatch warnings: mm/mremap.c:561 mremap_to() error: potentially dereferencing uninitialized 'vma'. # https://github.com/0day-ci/linux/commit/98663ca05501623c3da7f0f30be8ba7d632cf010 git remote add linux-review https://github.com/0day-ci/linux git remote update linux-review git checkout 98663ca05501623c3da7f0f30be8ba7d632cf010 vim +/vma +561 mm/mremap.c 81909b842107ef Michel Lespinasse 2013-02-22 506 static unsigned long mremap_to(unsigned long addr, unsigned long old_len, 72f87654c69690 Pavel Emelyanov 2017-02-22 507 unsigned long new_addr, unsigned long new_len, bool *locked, 98663ca0550162 Brian Geffon 2020-01-22 508 unsigned long flags, struct vm_userfaultfd_ctx *uf, b22823719302e8 Mike Rapoport 2017-08-02 509 struct list_head *uf_unmap_early, 897ab3e0c49e24 Mike Rapoport 2017-02-24 510 struct list_head *uf_unmap) ecc1a8993751de Al Viro 2009-11-24 511 { ecc1a8993751de Al Viro 2009-11-24 512 struct mm_struct *mm = current->mm; ecc1a8993751de Al Viro 2009-11-24 513 struct vm_area_struct *vma; ecc1a8993751de Al Viro 2009-11-24 514 unsigned long ret = -EINVAL; ecc1a8993751de Al Viro 2009-11-24 515 unsigned long charged = 0; 097eed103862f9 Al Viro 2009-11-24 516 unsigned long map_flags; ecc1a8993751de Al Viro 2009-11-24 517 f19cb115a25f3f Alexander Kuleshov 2015-11-05 518 if (offset_in_page(new_addr)) ecc1a8993751de Al Viro 2009-11-24 519 goto out; ecc1a8993751de Al Viro 2009-11-24 520 ecc1a8993751de Al Viro 2009-11-24 521 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) ecc1a8993751de Al Viro 2009-11-24 522 goto out; ecc1a8993751de Al Viro 2009-11-24 523 9943242ca46814 Oleg Nesterov 2015-09-04 524 /* Ensure the old/new locations do not overlap */ 9943242ca46814 Oleg Nesterov 2015-09-04 525 if (addr + old_len > new_addr && new_addr + new_len > addr) ecc1a8993751de Al Viro 2009-11-24 526 goto out; ecc1a8993751de Al Viro 2009-11-24 527 ea2c3f6f554561 Oscar Salvador 2019-03-05 528 /* ea2c3f6f554561 Oscar Salvador 2019-03-05 529 * move_vma() need us to stay 4 maps below the threshold, otherwise ea2c3f6f554561 Oscar Salvador 2019-03-05 530 * it will bail out at the very beginning. ea2c3f6f554561 Oscar Salvador 2019-03-05 531 * That is a problem if we have already unmaped the regions here ea2c3f6f554561 Oscar Salvador 2019-03-05 532 * (new_addr, and old_addr), because userspace will not know the ea2c3f6f554561 Oscar Salvador 2019-03-05 533 * state of the vma's after it gets -ENOMEM. ea2c3f6f554561 Oscar Salvador 2019-03-05 534 * So, to avoid such scenario we can pre-compute if the whole ea2c3f6f554561 Oscar Salvador 2019-03-05 535 * operation has high chances to success map-wise. ea2c3f6f554561 Oscar Salvador 2019-03-05 536 * Worst-scenario case is when both vma's (new_addr and old_addr) get ea2c3f6f554561 Oscar Salvador 2019-03-05 537 * split in 3 before unmaping it. ea2c3f6f554561 Oscar Salvador 2019-03-05 538 * That means 2 more maps (1 for each) to the ones we already hold. ea2c3f6f554561 Oscar Salvador 2019-03-05 539 * Check whether current map count plus 2 still leads us to 4 maps below ea2c3f6f554561 Oscar Salvador 2019-03-05 540 * the threshold, otherwise return -ENOMEM here to be more safe. ea2c3f6f554561 Oscar Salvador 2019-03-05 541 */ ea2c3f6f554561 Oscar Salvador 2019-03-05 542 if ((mm->map_count + 2) >= sysctl_max_map_count - 3) ea2c3f6f554561 Oscar Salvador 2019-03-05 543 return -ENOMEM; ea2c3f6f554561 Oscar Salvador 2019-03-05 544 b22823719302e8 Mike Rapoport 2017-08-02 545 ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); ecc1a8993751de Al Viro 2009-11-24 546 if (ret) ecc1a8993751de Al Viro 2009-11-24 547 goto out; ecc1a8993751de Al Viro 2009-11-24 548 ecc1a8993751de Al Viro 2009-11-24 549 if (old_len >= new_len) { 897ab3e0c49e24 Mike Rapoport 2017-02-24 550 ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap); ecc1a8993751de Al Viro 2009-11-24 551 if (ret && old_len != new_len) ecc1a8993751de Al Viro 2009-11-24 552 goto out; ecc1a8993751de Al Viro 2009-11-24 553 old_len = new_len; ecc1a8993751de Al Viro 2009-11-24 554 } ecc1a8993751de Al Viro 2009-11-24 555 98663ca0550162 Brian Geffon 2020-01-22 556 /* 98663ca0550162 Brian Geffon 2020-01-22 557 * MREMAP_DONTUNMAP expands by old_len + (new_len - old_len), we will 98663ca0550162 Brian Geffon 2020-01-22 558 * check that we can expand by old_len and vma_to_resize will handle 98663ca0550162 Brian Geffon 2020-01-22 559 * the vma growing. 98663ca0550162 Brian Geffon 2020-01-22 560 */ 98663ca0550162 Brian Geffon 2020-01-22 @561 if (unlikely(flags & MREMAP_DONTUNMAP && !may_expand_vm(mm, 98663ca0550162 Brian Geffon 2020-01-22 562 vma->vm_flags, old_len >> PAGE_SHIFT))) { ^^^^^^^^^^^^^ 98663ca0550162 Brian Geffon 2020-01-22 563 ret = -ENOMEM; 98663ca0550162 Brian Geffon 2020-01-22 564 goto out; 98663ca0550162 Brian Geffon 2020-01-22 565 } 98663ca0550162 Brian Geffon 2020-01-22 566 ecc1a8993751de Al Viro 2009-11-24 567 vma = vma_to_resize(addr, old_len, new_len, &charged); ^^^^^^^^^^^^^^^^^^^^ ecc1a8993751de Al Viro 2009-11-24 568 if (IS_ERR(vma)) { ecc1a8993751de Al Viro 2009-11-24 569 ret = PTR_ERR(vma); ecc1a8993751de Al Viro 2009-11-24 570 goto out; ecc1a8993751de Al Viro 2009-11-24 571 } ecc1a8993751de Al Viro 2009-11-24 572 097eed103862f9 Al Viro 2009-11-24 573 map_flags = MAP_FIXED; ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2020-02-10 10:35 UTC | newest] Thread overview: 21+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-01-23 1:46 [PATCH] mm: Add MREMAP_DONTUNMAP to mremap() Brian Geffon [not found] ` <20200123014627.71720-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-23 3:02 ` Andy Lutomirski 2020-01-23 19:03 ` Brian Geffon 2020-01-24 19:06 ` [PATCH v2] " Brian Geffon [not found] ` <20200124190625.257659-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-26 5:16 ` Nathan Chancellor 2020-01-27 2:21 ` Brian Geffon 2020-01-26 22:06 ` Kirill A. Shutemov 2020-01-28 1:35 ` Brian Geffon [not found] ` <CADyq12xCK_3MhGi88Am5P6DVZvrW8vqtyJMHO0zjNhvhYegm1w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2020-01-29 10:46 ` Kirill A. Shutemov 2020-02-01 21:03 ` Brian Geffon 2020-02-02 4:17 ` Brian Geffon 2020-02-03 13:09 ` Kirill A. Shutemov 2020-02-07 20:42 ` Brian Geffon [not found] ` <CADyq12x98QspiWSqNui1OH8+FEUzVyJwxia+ho00S2+Q+PmTjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2020-02-10 10:35 ` Kirill A. Shutemov 2020-01-27 10:13 ` Florian Weimer [not found] ` <87imkxxl5d.fsf-fjB847h8rq1N9UpBYOmNkhcY2uh10dtjAL8bYrjMMd8@public.gmane.org> 2020-01-27 22:33 ` Brian Geffon [not found] ` <CADyq12xCpTzLpYC16FjnM60tHhCfnccNfg6JJuqcBd_6ACDGcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2020-01-30 12:19 ` Florian Weimer 2020-01-27 5:30 ` [PATCH v3] " Brian Geffon [not found] ` <20200127053056.213679-1-bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2020-01-28 15:26 ` Will Deacon 2020-01-30 10:12 ` Will Deacon 2020-01-27 4:46 ` [PATCH] " Dan Carpenter
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).