From mboxrd@z Thu Jan 1 00:00:00 1970 From: Oleg Nesterov Subject: Re: [PATCH 1/1] mm: mempolicy: fix mbind_range() && vma_adjust() interaction Date: Tue, 9 Jul 2013 21:43:21 +0200 Message-ID: <20130709194321.GA31104@redhat.com> References: <1372901537-31033-1-git-send-email-ccross@android.com> <20130704202232.GA19287@redhat.com> <20130708180424.GA6490@redhat.com> <20130708180501.GB6490@redhat.com> <20130709152836.GA10033@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Return-path: Content-Disposition: inline In-Reply-To: <20130709152836.GA10033@redhat.com> Sender: owner-linux-mm@kvack.org To: KOSAKI Motohiro Cc: Colin Cross , Andrew Morton , Hugh Dickins , Linus Torvalds , "Hampson, Steven T" , lkml , Kyungmin Park , Christoph Hellwig , John Stultz , Rob Landley , Arnd Bergmann , Cyrill Gorcunov , David Rientjes , Davidlohr Bueso , Kees Cook , Al Viro , Mel Gorman , Michel Lespinasse , Rik van Riel , Konstantin Khlebnikov , Peter Zijlstra List-Id: linux-arch.vger.kernel.org On 07/09, Oleg Nesterov wrote: > > I can be easily wrong, but to me vma_adjust() and its usage looks a bit > overcomplicated. Perhaps it makes sense to distinguish mmapped/hole cases. > mbind_range/madvise/etc need vma_join(vma, ...), not prev/anon_vma/file. > Perhaps. not sure. And I am just curious if something like below makes any sense... Suppose we add the new helper, vma_update(vma, new). Note that "new" is the fake vma, it just describes how do want to change vma. static bool can_merge_vma(struct vm_area_struct *prev, struct vm_area_struct *next) { if (prev->vm_end != next->vm_start) return false; if (prev->vm_pgoff + vma_pages(prev) != next->vm_pgoff) return false; if (prev->vm_file != next->vm_file) return false; if (prev->vm_flags != next->vm_flags) return false; if (!mpol_equal(vma_policy(prev), vma_policy(next))) return false; if (prev->anon_vma != next->anon_vma) /* WRONG, FIXME !!! */ return false; if (next->vm_ops && next->vm_ops->close) return false; return true; } struct vm_area_struct * vma_update(struct vm_area_struct *vma, struct vm_area_struct *new) { struct vm_area_struct *prev = vma->vm_prev; struct vm_area_struct *next = vma->vm_next; struct vm_area_struct *new_ret; unsigned long new_end; int err; /* prev/next != vma means we can merge with prev/next */ if (!prev || !can_merge_vma(prev, new)) prev = vma; if (!next || !can_merge_vma(new, next)) next = vma; if (new->vm_start > vma->vm_start) { /* prev == vma */ if (next != vma) { /* vma shrinks, next grows, case 4 */ new_end = new->vm_start; new_ret = next; goto adjust; } err = split_vma(vma->vm_mm, vma, new->vm_start, 1); if (err) return ERR_PTR(err); } if (new->vm_end < vma->vm_end) { /* next == vma */ if (prev != vma) { /* prev grows, vma shrinks, case 5 */ new_end = new->vm_end; new_ret = vma; goto adjust; } err = split_vma(vma->vm_mm, vma, new->vm_end, 0); if (err) return ERR_PTR(err); } if (prev == next) /* true if split_vma() was called */ return vma; new_end = next->vm_end; new_ret = prev; adjust: err = vma_adjust(prev, prev->vm_start, new_end, prev->vm_pgoff, NULL); if (err) return ERR_PTR(err); khugepaged_enter_vma_merge(new_ret); return new_ret; } Now we can change madvise_behavior() and other similar users as below. As for mmap_region() we can add another helper which simply tries to expand prev/next (case 1-3). Oleg. --- a/mm/madvise.c +++ b/mm/madvise.c @@ -46,10 +46,9 @@ static long madvise_behavior(struct vm_area_struct * vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { - struct mm_struct * mm = vma->vm_mm; - int error = 0; - pgoff_t pgoff; unsigned long new_flags = vma->vm_flags; + struct vm_area_struct new; + int error = 0; switch (behavior) { case MADV_NORMAL: @@ -100,34 +99,21 @@ static long madvise_behavior(struct vm_area_struct * vma, goto out; } - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, - vma->vm_file, pgoff, vma_policy(vma)); - if (*prev) { - vma = *prev; - goto success; - } - - *prev = vma; + new = *vma; + new.vm_flags = new_flags; + new.vm_start = start; + new.vm_end = end; + vma = vma_update(vma, &new); - if (start != vma->vm_start) { - error = split_vma(mm, vma, start, 1); - if (error) - goto out; - } - - if (end != vma->vm_end) { - error = split_vma(mm, vma, end, 0); - if (error) - goto out; + if (IS_ERR(vma)) { + error = PTR_ERR(vma); + goto out; } - -success: /* * vm_flags is protected by the mmap_sem held in write mode. */ vma->vm_flags = new_flags; - + *prev = vma; out: if (error == -ENOMEM) error = -EAGAIN; -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org