From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: linux-mm@kvack.org
Cc: Hugh Dickins <hugh@veritas.com>,
Sudhir Kumar <skumar@linux.vnet.ibm.com>,
YAMAMOTO Takashi <yamamoto@valinux.co.jp>,
Paul Menage <menage@google.com>,
lizf@cn.fujitsu.com, linux-kernel@vger.kernel.org,
taka@valinux.co.jp, David Rientjes <rientjes@google.com>,
Pavel Emelianov <xemul@openvz.org>,
Balbir Singh <balbir@linux.vnet.ibm.com>,
Andrew Morton <akpm@linux-foundation.org>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [RFC][2/3] Account and control virtual address space allocations
Date: Sun, 16 Mar 2008 23:00:05 +0530 [thread overview]
Message-ID: <20080316173005.8812.88290.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20080316172942.8812.56051.sendpatchset@localhost.localdomain>
This patch implements accounting and control of virtual address space.
Accounting is done when the virtual address space of any task/mm_struct
belonging to the cgroup is incremented or decremented. This patch
fails the expansion if the cgroup goes over its limit. A new function
mem_cgroup_update_as() is added to deal with the accounting of the virtual
address space usage of cgroups.
TODOs
1. IA64 has code in perfmon.c pfm_smpl_buffer_alloc(), which increments
the total_vm of the mm_struct. This code has not yet been brought into
virtual address space control
2. Only when CONFIG_MMU is enabled, is the virtual address space control
enabled. Should we do this for nommu cases as well? My suspicion is
that we don't have to.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---
arch/x86/kernel/ptrace.c | 10 +++++++++-
include/linux/memcontrol.h | 7 +++++++
init/Kconfig | 4 +++-
kernel/fork.c | 9 +++++++--
mm/memcontrol.c | 37 +++++++++++++++++++++++++++++++++++++
mm/memory.c | 5 +++++
mm/mmap.c | 22 ++++++++++++++++++++--
mm/mremap.c | 21 ++++++++++++++++++---
8 files changed, 106 insertions(+), 9 deletions(-)
diff -puN mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control mm/memcontrol.c
--- linux-2.6.25-rc5/mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memcontrol.c 2008-03-16 22:57:40.000000000 +0530
@@ -525,6 +525,32 @@ unsigned long mem_cgroup_isolate_pages(u
}
/*
+ * Check if the current cgroup exceeds its address space limit.
+ * Returns 0 on success and 1 on failure.
+ */
+int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+ int ret = 0;
+ struct mem_cgroup *mem;
+ if (mem_cgroup_subsys.disabled)
+ return ret;
+
+ rcu_read_lock();
+ mem = rcu_dereference(mm->mem_cgroup);
+ css_get(&mem->css);
+ rcu_read_unlock();
+
+ if (nr_pages > 0) {
+ if (res_counter_charge(&mem->as_res, (nr_pages * PAGE_SIZE)))
+ ret = 1;
+ } else
+ res_counter_uncharge(&mem->as_res, (-nr_pages * PAGE_SIZE));
+
+ css_put(&mem->css);
+ return ret;
+}
+
+/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
@@ -1103,6 +1129,17 @@ static void mem_cgroup_move_task(struct
goto out;
css_get(&mem->css);
+ /*
+ * For address space accounting, the charges are migrated.
+ * We need to migrate it since all the future uncharge/charge will
+ * now happen to the new cgroup. For consistency, we need to migrate
+ * all charges, otherwise we could end up dropping charges from
+ * the new cgroup (even though they were incurred in the current
+ * group).
+ */
+ if (res_counter_charge(&mem->as_res, mm->total_vm))
+ goto out;
+ res_counter_uncharge(&old_mem->as_res, mm->total_vm);
rcu_assign_pointer(mm->mem_cgroup, mem);
css_put(&old_mem->css);
diff -puN include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control include/linux/memcontrol.h
--- linux-2.6.25-rc5/include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/include/linux/memcontrol.h 2008-03-16 22:57:40.000000000 +0530
@@ -54,6 +54,7 @@ int task_in_mem_cgroup(struct task_struc
extern int mem_cgroup_prepare_migration(struct page *page);
extern void mem_cgroup_end_migration(struct page *page);
extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages);
/*
* For memory reclaim.
@@ -172,6 +173,12 @@ static inline long mem_cgroup_calc_recla
{
return 0;
}
+
+static inline int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+ return 0;
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */
diff -puN mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control mm/mmap.c
--- linux-2.6.25-rc5/mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mmap.c 2008-03-16 22:57:40.000000000 +0530
@@ -1117,6 +1117,9 @@ munmap_back:
}
}
+ if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
/*
* Can we just expand an old private anonymous mapping?
* The VM_SHARED test is necessary because shmem_zero_setup
@@ -1226,8 +1229,11 @@ unmap_and_free_vma:
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
- if (charged)
+ if (charged) {
+ mem_cgroup_update_as(mm, -charged);
vm_unacct_memory(charged);
+ }
+unacct_as_error:
return error;
}
@@ -1555,6 +1561,9 @@ static int acct_stack_growth(struct vm_a
if (security_vm_enough_memory(grow))
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, grow))
+ return -ENOMEM;
+
/* Ok, everything looks good - let it rip */
mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
@@ -2003,9 +2012,14 @@ unsigned long do_brk(unsigned long addr,
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
- if (security_vm_enough_memory(len >> PAGE_SHIFT))
+ if (mem_cgroup_update_as(mm, (len >> PAGE_SHIFT)))
return -ENOMEM;
+ if (security_vm_enough_memory(len >> PAGE_SHIFT)) {
+ mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
+ return -ENOMEM;
+ }
+
/* Can we just expand an old private anonymous mapping? */
if (vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL))
@@ -2236,6 +2250,9 @@ int install_special_mapping(struct mm_st
if (unlikely(vma == NULL))
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
@@ -2248,6 +2265,7 @@ int install_special_mapping(struct mm_st
if (unlikely(insert_vm_struct(mm, vma))) {
kmem_cache_free(vm_area_cachep, vma);
+ mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
return -ENOMEM;
}
diff -puN arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control arch/x86/kernel/ptrace.c
--- linux-2.6.25-rc5/arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/arch/x86/kernel/ptrace.c 2008-03-16 22:57:40.000000000 +0530
@@ -20,6 +20,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/memcontrol.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -787,6 +788,8 @@ static int ptrace_bts_realloc(struct tas
current->mm->total_vm -= old_size;
current->mm->locked_vm -= old_size;
+ mem_cgroup_update_as(current->mm, -old_size);
+
if (size == 0)
goto out;
@@ -816,10 +819,15 @@ static int ptrace_bts_realloc(struct tas
goto out;
}
+ if (mem_cgroup_update_as(current->mm, size))
+ goto out;
+
ret = ds_allocate((void **)&child->thread.ds_area_msr,
size << PAGE_SHIFT);
- if (ret < 0)
+ if (ret < 0) {
+ mem_cgroup_update_as(current->mm, -size);
goto out;
+ }
current->mm->total_vm += size;
current->mm->locked_vm += size;
diff -puN kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control kernel/fork.c
--- linux-2.6.25-rc5/kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/kernel/fork.c 2008-03-16 22:57:40.000000000 +0530
@@ -53,6 +53,7 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <linux/memcontrol.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -237,6 +238,7 @@ static int dup_mmap(struct mm_struct *mm
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
+ unsigned int len = vma_pages(mpnt);
if (mpnt->vm_flags & VM_DONTCOPY) {
long pages = vma_pages(mpnt);
@@ -247,11 +249,12 @@ static int dup_mmap(struct mm_struct *mm
}
charge = 0;
if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
if (security_vm_enough_memory(len))
goto fail_nomem;
charge = len;
}
+ if (mem_cgroup_update_as(mm, len))
+ goto fail_nomem_as;
tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!tmp)
goto fail_nomem;
@@ -311,8 +314,10 @@ out:
fail_nomem_policy:
kmem_cache_free(vm_area_cachep, tmp);
fail_nomem:
- retval = -ENOMEM;
+ mem_cgroup_update_as(mm, -charge);
vm_unacct_memory(charge);
+fail_nomem_as:
+ retval = -ENOMEM;
goto out;
}
diff -puN mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control mm/mremap.c
--- linux-2.6.25-rc5/mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mremap.c 2008-03-16 22:57:40.000000000 +0530
@@ -174,10 +174,15 @@ static unsigned long move_vma(struct vm_
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, new_len >> PAGE_SHIFT))
+ return -ENOMEM;
+
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
- if (!new_vma)
+ if (!new_vma) {
+ mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
return -ENOMEM;
+ }
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
if (moved_len < old_len) {
@@ -187,6 +192,7 @@ static unsigned long move_vma(struct vm_
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
+ mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
@@ -347,10 +353,17 @@ unsigned long do_mremap(unsigned long ad
goto out;
}
+ if (mem_cgroup_update_as(mm, (new_len - old_len) >> PAGE_SHIFT)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (vma->vm_flags & VM_ACCOUNT) {
charged = (new_len - old_len) >> PAGE_SHIFT;
- if (security_vm_enough_memory(charged))
+ if (security_vm_enough_memory(charged)) {
+ mem_cgroup_update_as(mm, -charged);
goto out_nc;
+ }
}
/* old_len exactly to the end of the area..
@@ -406,8 +419,10 @@ unsigned long do_mremap(unsigned long ad
ret = move_vma(vma, addr, old_len, new_len, new_addr);
}
out:
- if (ret & ~PAGE_MASK)
+ if (ret & ~PAGE_MASK) {
vm_unacct_memory(charged);
+ mem_cgroup_update_as(mm, -charged);
+ }
out_nc:
return ret;
}
diff -puN init/Kconfig~memory-controller-virtual-address-space-accounting-and-control init/Kconfig
--- linux-2.6.25-rc5/init/Kconfig~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/init/Kconfig 2008-03-16 22:57:40.000000000 +0530
@@ -369,7 +369,9 @@ config CGROUP_MEM_RES_CTLR
depends on CGROUPS && RESOURCE_COUNTERS
help
Provides a memory resource controller that manages both page cache and
- RSS memory.
+ RSS memory. It also provide accounting and control of address
+ space allocations (along the lines of RLIMIT_AS) for cgroups
+ when CONFIG_MMU is enabled.
Note that setting this option increases fixed memory overhead
associated with each page of memory in the system by 4/8 bytes
diff -puN mm/swapfile.c~memory-controller-virtual-address-space-accounting-and-control mm/swapfile.c
diff -puN mm/memory.c~memory-controller-virtual-address-space-accounting-and-control mm/memory.c
--- linux-2.6.25-rc5/mm/memory.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memory.c 2008-03-16 22:57:40.000000000 +0530
@@ -838,6 +838,11 @@ unsigned long unmap_vmas(struct mmu_gath
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
+ /*
+ * Unaccount used virtual memory for cgroups
+ */
+ mem_cgroup_update_as(vma->vm_mm,
+ ((long)(start - end)) >> PAGE_SHIFT);
while (start != end) {
if (!tlb_start_valid) {
_
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
WARNING: multiple messages have this Message-ID (diff)
From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: linux-mm@kvack.org
Cc: Hugh Dickins <hugh@veritas.com>,
Sudhir Kumar <skumar@linux.vnet.ibm.com>,
YAMAMOTO Takashi <yamamoto@valinux.co.jp>,
Paul Menage <menage@google.com>,
lizf@cn.fujitsu.com, linux-kernel@vger.kernel.org,
taka@valinux.co.jp, David Rientjes <rientjes@google.com>,
Pavel Emelianov <xemul@openvz.org>,
Balbir Singh <balbir@linux.vnet.ibm.com>,
Andrew Morton <akpm@linux-foundation.org>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [RFC][2/3] Account and control virtual address space allocations
Date: Sun, 16 Mar 2008 23:00:05 +0530 [thread overview]
Message-ID: <20080316173005.8812.88290.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20080316172942.8812.56051.sendpatchset@localhost.localdomain>
This patch implements accounting and control of virtual address space.
Accounting is done when the virtual address space of any task/mm_struct
belonging to the cgroup is incremented or decremented. This patch
fails the expansion if the cgroup goes over its limit. A new function
mem_cgroup_update_as() is added to deal with the accounting of the virtual
address space usage of cgroups.
TODOs
1. IA64 has code in perfmon.c pfm_smpl_buffer_alloc(), which increments
the total_vm of the mm_struct. This code has not yet been brought into
virtual address space control
2. Only when CONFIG_MMU is enabled, is the virtual address space control
enabled. Should we do this for nommu cases as well? My suspicion is
that we don't have to.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---
arch/x86/kernel/ptrace.c | 10 +++++++++-
include/linux/memcontrol.h | 7 +++++++
init/Kconfig | 4 +++-
kernel/fork.c | 9 +++++++--
mm/memcontrol.c | 37 +++++++++++++++++++++++++++++++++++++
mm/memory.c | 5 +++++
mm/mmap.c | 22 ++++++++++++++++++++--
mm/mremap.c | 21 ++++++++++++++++++---
8 files changed, 106 insertions(+), 9 deletions(-)
diff -puN mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control mm/memcontrol.c
--- linux-2.6.25-rc5/mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memcontrol.c 2008-03-16 22:57:40.000000000 +0530
@@ -525,6 +525,32 @@ unsigned long mem_cgroup_isolate_pages(u
}
/*
+ * Check if the current cgroup exceeds its address space limit.
+ * Returns 0 on success and 1 on failure.
+ */
+int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+ int ret = 0;
+ struct mem_cgroup *mem;
+ if (mem_cgroup_subsys.disabled)
+ return ret;
+
+ rcu_read_lock();
+ mem = rcu_dereference(mm->mem_cgroup);
+ css_get(&mem->css);
+ rcu_read_unlock();
+
+ if (nr_pages > 0) {
+ if (res_counter_charge(&mem->as_res, (nr_pages * PAGE_SIZE)))
+ ret = 1;
+ } else
+ res_counter_uncharge(&mem->as_res, (-nr_pages * PAGE_SIZE));
+
+ css_put(&mem->css);
+ return ret;
+}
+
+/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
@@ -1103,6 +1129,17 @@ static void mem_cgroup_move_task(struct
goto out;
css_get(&mem->css);
+ /*
+ * For address space accounting, the charges are migrated.
+ * We need to migrate it since all the future uncharge/charge will
+ * now happen to the new cgroup. For consistency, we need to migrate
+ * all charges, otherwise we could end up dropping charges from
+ * the new cgroup (even though they were incurred in the current
+ * group).
+ */
+ if (res_counter_charge(&mem->as_res, mm->total_vm))
+ goto out;
+ res_counter_uncharge(&old_mem->as_res, mm->total_vm);
rcu_assign_pointer(mm->mem_cgroup, mem);
css_put(&old_mem->css);
diff -puN include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control include/linux/memcontrol.h
--- linux-2.6.25-rc5/include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/include/linux/memcontrol.h 2008-03-16 22:57:40.000000000 +0530
@@ -54,6 +54,7 @@ int task_in_mem_cgroup(struct task_struc
extern int mem_cgroup_prepare_migration(struct page *page);
extern void mem_cgroup_end_migration(struct page *page);
extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages);
/*
* For memory reclaim.
@@ -172,6 +173,12 @@ static inline long mem_cgroup_calc_recla
{
return 0;
}
+
+static inline int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+ return 0;
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */
diff -puN mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control mm/mmap.c
--- linux-2.6.25-rc5/mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mmap.c 2008-03-16 22:57:40.000000000 +0530
@@ -1117,6 +1117,9 @@ munmap_back:
}
}
+ if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
/*
* Can we just expand an old private anonymous mapping?
* The VM_SHARED test is necessary because shmem_zero_setup
@@ -1226,8 +1229,11 @@ unmap_and_free_vma:
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
- if (charged)
+ if (charged) {
+ mem_cgroup_update_as(mm, -charged);
vm_unacct_memory(charged);
+ }
+unacct_as_error:
return error;
}
@@ -1555,6 +1561,9 @@ static int acct_stack_growth(struct vm_a
if (security_vm_enough_memory(grow))
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, grow))
+ return -ENOMEM;
+
/* Ok, everything looks good - let it rip */
mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
@@ -2003,9 +2012,14 @@ unsigned long do_brk(unsigned long addr,
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
- if (security_vm_enough_memory(len >> PAGE_SHIFT))
+ if (mem_cgroup_update_as(mm, (len >> PAGE_SHIFT)))
return -ENOMEM;
+ if (security_vm_enough_memory(len >> PAGE_SHIFT)) {
+ mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
+ return -ENOMEM;
+ }
+
/* Can we just expand an old private anonymous mapping? */
if (vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL))
@@ -2236,6 +2250,9 @@ int install_special_mapping(struct mm_st
if (unlikely(vma == NULL))
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
@@ -2248,6 +2265,7 @@ int install_special_mapping(struct mm_st
if (unlikely(insert_vm_struct(mm, vma))) {
kmem_cache_free(vm_area_cachep, vma);
+ mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
return -ENOMEM;
}
diff -puN arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control arch/x86/kernel/ptrace.c
--- linux-2.6.25-rc5/arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/arch/x86/kernel/ptrace.c 2008-03-16 22:57:40.000000000 +0530
@@ -20,6 +20,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/memcontrol.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -787,6 +788,8 @@ static int ptrace_bts_realloc(struct tas
current->mm->total_vm -= old_size;
current->mm->locked_vm -= old_size;
+ mem_cgroup_update_as(current->mm, -old_size);
+
if (size == 0)
goto out;
@@ -816,10 +819,15 @@ static int ptrace_bts_realloc(struct tas
goto out;
}
+ if (mem_cgroup_update_as(current->mm, size))
+ goto out;
+
ret = ds_allocate((void **)&child->thread.ds_area_msr,
size << PAGE_SHIFT);
- if (ret < 0)
+ if (ret < 0) {
+ mem_cgroup_update_as(current->mm, -size);
goto out;
+ }
current->mm->total_vm += size;
current->mm->locked_vm += size;
diff -puN kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control kernel/fork.c
--- linux-2.6.25-rc5/kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/kernel/fork.c 2008-03-16 22:57:40.000000000 +0530
@@ -53,6 +53,7 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <linux/memcontrol.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -237,6 +238,7 @@ static int dup_mmap(struct mm_struct *mm
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
+ unsigned int len = vma_pages(mpnt);
if (mpnt->vm_flags & VM_DONTCOPY) {
long pages = vma_pages(mpnt);
@@ -247,11 +249,12 @@ static int dup_mmap(struct mm_struct *mm
}
charge = 0;
if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
if (security_vm_enough_memory(len))
goto fail_nomem;
charge = len;
}
+ if (mem_cgroup_update_as(mm, len))
+ goto fail_nomem_as;
tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!tmp)
goto fail_nomem;
@@ -311,8 +314,10 @@ out:
fail_nomem_policy:
kmem_cache_free(vm_area_cachep, tmp);
fail_nomem:
- retval = -ENOMEM;
+ mem_cgroup_update_as(mm, -charge);
vm_unacct_memory(charge);
+fail_nomem_as:
+ retval = -ENOMEM;
goto out;
}
diff -puN mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control mm/mremap.c
--- linux-2.6.25-rc5/mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mremap.c 2008-03-16 22:57:40.000000000 +0530
@@ -174,10 +174,15 @@ static unsigned long move_vma(struct vm_
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
+ if (mem_cgroup_update_as(mm, new_len >> PAGE_SHIFT))
+ return -ENOMEM;
+
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
- if (!new_vma)
+ if (!new_vma) {
+ mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
return -ENOMEM;
+ }
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
if (moved_len < old_len) {
@@ -187,6 +192,7 @@ static unsigned long move_vma(struct vm_
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
+ mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
@@ -347,10 +353,17 @@ unsigned long do_mremap(unsigned long ad
goto out;
}
+ if (mem_cgroup_update_as(mm, (new_len - old_len) >> PAGE_SHIFT)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (vma->vm_flags & VM_ACCOUNT) {
charged = (new_len - old_len) >> PAGE_SHIFT;
- if (security_vm_enough_memory(charged))
+ if (security_vm_enough_memory(charged)) {
+ mem_cgroup_update_as(mm, -charged);
goto out_nc;
+ }
}
/* old_len exactly to the end of the area..
@@ -406,8 +419,10 @@ unsigned long do_mremap(unsigned long ad
ret = move_vma(vma, addr, old_len, new_len, new_addr);
}
out:
- if (ret & ~PAGE_MASK)
+ if (ret & ~PAGE_MASK) {
vm_unacct_memory(charged);
+ mem_cgroup_update_as(mm, -charged);
+ }
out_nc:
return ret;
}
diff -puN init/Kconfig~memory-controller-virtual-address-space-accounting-and-control init/Kconfig
--- linux-2.6.25-rc5/init/Kconfig~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/init/Kconfig 2008-03-16 22:57:40.000000000 +0530
@@ -369,7 +369,9 @@ config CGROUP_MEM_RES_CTLR
depends on CGROUPS && RESOURCE_COUNTERS
help
Provides a memory resource controller that manages both page cache and
- RSS memory.
+ RSS memory. It also provide accounting and control of address
+ space allocations (along the lines of RLIMIT_AS) for cgroups
+ when CONFIG_MMU is enabled.
Note that setting this option increases fixed memory overhead
associated with each page of memory in the system by 4/8 bytes
diff -puN mm/swapfile.c~memory-controller-virtual-address-space-accounting-and-control mm/swapfile.c
diff -puN mm/memory.c~memory-controller-virtual-address-space-accounting-and-control mm/memory.c
--- linux-2.6.25-rc5/mm/memory.c~memory-controller-virtual-address-space-accounting-and-control 2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memory.c 2008-03-16 22:57:40.000000000 +0530
@@ -838,6 +838,11 @@ unsigned long unmap_vmas(struct mmu_gath
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
+ /*
+ * Unaccount used virtual memory for cgroups
+ */
+ mem_cgroup_update_as(vma->vm_mm,
+ ((long)(start - end)) >> PAGE_SHIFT);
while (start != end) {
if (!tlb_start_valid) {
_
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-16 17:31 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-16 17:29 [RFC][0/3] Virtual address space control for cgroups Balbir Singh
2008-03-16 17:29 ` Balbir Singh
2008-03-16 17:29 ` [RFC][1/3] Add user interface for virtual address space control Balbir Singh
2008-03-16 17:29 ` Balbir Singh
2008-03-16 17:30 ` Balbir Singh [this message]
2008-03-16 17:30 ` [RFC][2/3] Account and control virtual address space allocations Balbir Singh
2008-03-17 2:02 ` Paul Menage
2008-03-17 2:02 ` Paul Menage
2008-03-17 2:57 ` Balbir Singh
2008-03-17 2:57 ` Balbir Singh
2008-03-17 3:03 ` Paul Menage
2008-03-17 3:03 ` Paul Menage
2008-03-17 11:36 ` Pavel Emelyanov
2008-03-17 11:36 ` Pavel Emelyanov
2008-03-17 12:29 ` Balbir Singh
2008-03-17 12:29 ` Balbir Singh
2008-03-17 12:40 ` Pavel Emelyanov
2008-03-17 12:40 ` Pavel Emelyanov
2008-03-17 12:51 ` Balbir Singh
2008-03-17 12:51 ` Balbir Singh
2008-03-17 13:01 ` Pavel Emelyanov
2008-03-17 13:01 ` Pavel Emelyanov
2008-03-17 14:39 ` Balbir Singh
2008-03-17 14:39 ` Balbir Singh
2008-03-17 16:53 ` Dave Hansen
2008-03-17 16:53 ` Dave Hansen
2008-03-18 1:14 ` Balbir Singh
2008-03-18 1:14 ` Balbir Singh
2008-03-18 17:11 ` Dave Hansen
2008-03-18 17:11 ` Dave Hansen
2008-03-18 17:58 ` Balbir Singh
2008-03-18 17:58 ` Balbir Singh
2008-03-17 23:35 ` YAMAMOTO Takashi
2008-03-17 23:35 ` YAMAMOTO Takashi
2008-03-18 1:10 ` Balbir Singh
2008-03-18 1:10 ` Balbir Singh
2008-03-16 17:30 ` [RFC][3/3] Update documentation for virtual address space control Balbir Singh
2008-03-16 17:30 ` Balbir Singh
2008-03-16 18:32 ` Randy Dunlap
2008-03-16 18:32 ` Randy Dunlap
2008-03-17 1:33 ` Balbir Singh
2008-03-17 1:33 ` Balbir Singh
2008-03-16 23:26 ` [RFC][0/3] Virtual address space control for cgroups Paul Menage
2008-03-16 23:26 ` Paul Menage
2008-03-17 1:47 ` Li Zefan
2008-03-17 1:47 ` Li Zefan
2008-03-17 1:57 ` Paul Menage
2008-03-17 1:57 ` Paul Menage
2008-03-17 5:08 ` Balbir Singh
2008-03-17 5:08 ` Balbir Singh
2008-03-17 5:22 ` Paul Menage
2008-03-17 5:22 ` Paul Menage
2008-03-17 15:15 ` Balbir Singh
2008-03-17 15:15 ` Balbir Singh
2008-03-17 1:50 ` Balbir Singh
2008-03-17 1:50 ` Balbir Singh
2008-03-17 1:55 ` Paul Menage
2008-03-17 1:55 ` Paul Menage
2008-03-17 3:12 ` Balbir Singh
2008-03-17 3:12 ` Balbir Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080316173005.8812.88290.sendpatchset@localhost.localdomain \
--to=balbir@linux.vnet.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=hugh@veritas.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lizf@cn.fujitsu.com \
--cc=menage@google.com \
--cc=rientjes@google.com \
--cc=skumar@linux.vnet.ibm.com \
--cc=taka@valinux.co.jp \
--cc=xemul@openvz.org \
--cc=yamamoto@valinux.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.