From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758155AbYFTPB6 (ORCPT ); Fri, 20 Jun 2008 11:01:58 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1755920AbYFTPBu (ORCPT ); Fri, 20 Jun 2008 11:01:50 -0400 Received: from e5.ny.us.ibm.com ([32.97.182.145]:55768 "EHLO e5.ny.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755624AbYFTPBt (ORCPT ); Fri, 20 Jun 2008 11:01:49 -0400 From: Balbir Singh To: Andrew Morton Cc: YAMAMOTO Takashi , Paul Menage , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Balbir Singh , KAMEZAWA Hiroyuki Date: Fri, 20 Jun 2008 20:31:32 +0530 Message-Id: <20080620150132.16094.29151.sendpatchset@localhost.localdomain> Subject: [0/2] memrlimit improve error handling Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org memrlimit cgroup does not handle error cases after may_expand_vm(). This BUG was reported by Kamezawa, with the test case below to reproduce it [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 71921664 [root@iridium kamezawa]# ulimit -s 3 [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ulimit -s unlimited [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 72368128 [root@iridium kamezawa]# This patch adds better handling support to fix the reported problem. Reported-By: kamezawa.hiroyu@jp.fujitsu.com Signed-off-by: Balbir Singh --- mm/mmap.c | 36 +++++++++++++++++++++++++----------- mm/mremap.c | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff -puN mm/mmap.c~memrlimit-cgroup-add-better-error-handling mm/mmap.c --- linux-2.6.26-rc5/mm/mmap.c~memrlimit-cgroup-add-better-error-handling 2008-06-19 21:12:46.000000000 +0530 +++ linux-2.6.26-rc5-balbir/mm/mmap.c 2008-06-19 21:39:45.000000000 +0530 @@ -1123,7 +1123,7 @@ munmap_back: */ charged = len >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) - return -ENOMEM; + goto undo_charge; vm_flags |= VM_ACCOUNT; } } @@ -1245,6 +1245,8 @@ free_vma: unacct_error: if (charged) vm_unacct_memory(charged); +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); return error; } @@ -1540,14 +1542,15 @@ static int acct_stack_growth(struct vm_a struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; unsigned long new_start; + int ret = -ENOMEM; /* address space limit tests */ if (!may_expand_vm(mm, grow)) - return -ENOMEM; + goto out; /* Stack limit test */ if (size > rlim[RLIMIT_STACK].rlim_cur) - return -ENOMEM; + goto undo_charge; /* mlock limit tests */ if (vma->vm_flags & VM_LOCKED) { @@ -1556,21 +1559,23 @@ static int acct_stack_growth(struct vm_a locked = mm->locked_vm + grow; limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) - return -ENOMEM; + goto undo_charge; } /* Check to ensure the stack will not grow into a hugetlb-only region */ new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : vma->vm_end - size; - if (is_hugepage_only_range(vma->vm_mm, new_start, size)) - return -EFAULT; + if (is_hugepage_only_range(vma->vm_mm, new_start, size)) { + ret = -EFAULT; + goto undo_charge; + } /* * Overcommit.. This must be the final test, as it will * update security statistics. */ if (security_vm_enough_memory(grow)) - return -ENOMEM; + goto undo_charge; /* Ok, everything looks good - let it rip */ mm->total_vm += grow; @@ -1578,6 +1583,11 @@ static int acct_stack_growth(struct vm_a mm->locked_vm += grow; vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; +undo_charge: + /* Undo memrlimit charge */ + memrlimit_cgroup_uncharge_as(mm, grow); +out: + return ret; } #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) @@ -1982,6 +1992,7 @@ unsigned long do_brk(unsigned long addr, struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; + int ret = -ENOMEM; len = PAGE_ALIGN(len); if (!len) @@ -2035,13 +2046,13 @@ unsigned long do_brk(unsigned long addr, /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) - return -ENOMEM; + return ret; if (mm->map_count > sysctl_max_map_count) - return -ENOMEM; + goto undo_charge; if (security_vm_enough_memory(len >> PAGE_SHIFT)) - return -ENOMEM; + goto undo_charge; /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, @@ -2055,7 +2066,7 @@ unsigned long do_brk(unsigned long addr, vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); - return -ENOMEM; + goto undo_charge; } vma->vm_mm = mm; @@ -2073,6 +2084,9 @@ out: mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages; } return addr; +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); + return ret; } EXPORT_SYMBOL(do_brk); diff -puN mm/mremap.c~memrlimit-cgroup-add-better-error-handling mm/mremap.c --- linux-2.6.26-rc5/mm/mremap.c~memrlimit-cgroup-add-better-error-handling 2008-06-19 21:12:46.000000000 +0530 +++ linux-2.6.26-rc5-balbir/mm/mremap.c 2008-06-19 22:00:02.000000000 +0530 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -256,6 +257,7 @@ unsigned long do_mremap(unsigned long ad struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + int vm_expanded = 0; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -349,6 +351,7 @@ unsigned long do_mremap(unsigned long ad goto out; } + vm_expanded = 1; if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) @@ -411,6 +414,9 @@ out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); out_nc: + if (vm_expanded) + memrlimit_cgroup_uncharge_as(mm, + (new_len - old_len) >> PAGE_SHIFT); return ret; } _ -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL