* [PATCH] memory.min_usage
@ 2007-12-04 4:09 YAMAMOTO Takashi
[not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
0 siblings, 1 reply; 22+ messages in thread
From: YAMAMOTO Takashi @ 2007-12-04 4:09 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
hi,
here's a patch to implement memory.min_usage,
which controls the minimum memory usage for a cgroup.
it works similarly to mlock;
global memory reclamation doesn't reclaim memory from
cgroups whose memory usage is below the value.
setting it too high is a dangerous operation.
it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
but it's logically independent from the swappiness patch.
todo:
- restrict non-root user's operation ragardless of owner of cgroupfs files?
- make oom killer aware of this?
YAMAMOTO Takashi
Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
---
--- linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h.BACKUP2 2007-12-03 13:52:37.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h 2007-12-03 14:07:45.000000000 +0900
@@ -47,6 +47,7 @@ extern int mem_cgroup_cache_charge(struc
gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
+extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem);
static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
{
--- linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c.BACKUP2 2007-12-03 13:52:13.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c 2007-12-04 11:42:07.000000000 +0900
@@ -134,6 +134,7 @@ struct mem_cgroup {
unsigned long control_type; /* control RSS or RSS+Pagecache */
int prev_priority; /* for recording reclaim priority */
unsigned int swappiness; /* swappiness */
+ unsigned long long min_usage; /* XXX should be a part of res_counter? */
/*
* statistics.
*/
@@ -1096,6 +1097,22 @@ static u64 mem_cgroup_swappiness_read(st
return mem->swappiness;
}
+static int mem_cgroup_min_usage_write(struct cgroup *cont, struct cftype *cft,
+ u64 val)
+{
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+ mem->min_usage = val;
+ return 0;
+}
+
+static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft)
+{
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+ return mem->min_usage;
+}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -1132,6 +1149,11 @@ static struct cftype mem_cgroup_files[]
.write_uint = mem_cgroup_swappiness_write,
.read_uint = mem_cgroup_swappiness_read,
},
+ {
+ .name = "min_usage_in_bytes",
+ .write_uint = mem_cgroup_min_usage_write,
+ .read_uint = mem_cgroup_min_usage_read,
+ },
};
/* XXX probably it's better to move try_to_free_mem_cgroup_pages to
@@ -1142,6 +1164,36 @@ int mem_cgroup_swappiness(struct mem_cgr
return mem->swappiness;
}
+int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
+{
+ struct page_cgroup *pc;
+ int result = 1;
+
+ if (mem1 != NULL)
+ return 1;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ struct mem_cgroup *mem2 = pc->mem_cgroup;
+ unsigned long long min_usage;
+
+ BUG_ON(mem2 == NULL);
+ min_usage = mem2->min_usage;
+ if (min_usage != 0) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&mem2->res.lock, flags);
+ if (mem2->res.usage <= min_usage)
+ result = 0;
+ spin_unlock_irqrestore(&mem2->res.lock, flags);
+ }
+ }
+ unlock_page_cgroup(page);
+
+ return result;
+}
+
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
{
struct mem_cgroup_per_node *pn;
--- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2 2007-12-03 13:52:22.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c 2007-12-03 14:11:42.000000000 +0900
@@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st
referenced && page_mapping_inuse(page))
goto activate_locked;
+#ifdef CONFIG_CGROUP_MEM_CONT
+ if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
+ goto activate_locked;
+#endif /* CONFIG_CGROUP_MEM_CONT */
+
#ifdef CONFIG_SWAP
/*
* Anonymous process memory has backing store?
@@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned
list_add(&page->lru, &l_active);
continue;
}
+#ifdef CONFIG_CGROUP_MEM_CONT
+ if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
+ list_add(&page->lru, &l_active);
+ continue;
+ }
+#endif /* CONFIG_CGROUP_MEM_CONT */
list_add(&page->lru, &l_inactive);
}
^ permalink raw reply [flat|nested] 22+ messages in thread[parent not found: <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>]
* Re: [PATCH] memory.min_usage [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org> @ 2007-12-04 5:58 ` KAMEZAWA Hiroyuki [not found] ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 2007-12-04 6:03 ` [Devel] [PATCH] memory.min_usage Paul Menage ` (2 subsequent siblings) 3 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-04 5:58 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Tue, 4 Dec 2007 13:09:34 +0900 (JST) yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote: > hi, > > here's a patch to implement memory.min_usage, > which controls the minimum memory usage for a cgroup. > Thanks. > todo: > - restrict non-root user's operation ragardless of owner of cgroupfs files? I like 'only-for-root' user. or CAP_SYS_RESOURCE. > - make oom killer aware of this? > For OOM-Killer, IMHO, no care is necessary (now). This function can be considered as a kind of mlock(). A user can be aware of memory usage. But, we may need some logic/reason to request this function. > + unsigned long long min_usage; /* XXX should be a part of res_counter? */ > /* Maybe res_counter is better. (Added CC: to Pavel Emelianov) > +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1) > +{ > + struct page_cgroup *pc; > + int result = 1; > + > + if (mem1 != NULL) > + return 1; > + > + lock_page_cgroup(page); > + pc = page_get_page_cgroup(page); > + if (pc) { > + struct mem_cgroup *mem2 = pc->mem_cgroup; > + unsigned long long min_usage; > + > + BUG_ON(mem2 == NULL); > + min_usage = mem2->min_usage; > + if (min_usage != 0) { > + unsigned long flags; > + > + spin_lock_irqsave(&mem2->res.lock, flags); > + if (mem2->res.usage <= min_usage) > + result = 0; > + spin_unlock_irqrestore(&mem2->res.lock, flags); > + } > + } > + unlock_page_cgroup(page); > + > + return result; > +} > + How about adding lock_and_get_page_cgroup(page)/put_page_cgroup() ? == struct page_cgroup *pc lock_and_get_page_cgroup(page) { struct page_cgroup *pc; lock_page_cgroup(page); pc = page_get_page_cgroup(page); if (atomic_inc_not_zero(&pc->ref_cnt)) pc = NULL; unlock_page_cgroup(page); return pc; } struct page_cgroup *pc put_page_cgroup(struct page_cgroup *pc) { mem_cgroup_uncharge(pc); } == BTW, how about add a status to res_counter ? My (based on your) current patch uses watermark_state. Maybe we can change it to be resource_state and show following status. RES_STATE_HIT_LIMIT, RES_STATE_ABOVE_HWATER, RES_STATE_ABOVE_LWATER, RES_STATE_STABLE, ? RES_STATE_BELOW_MIN, Useful ? > static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) > { > struct mem_cgroup_per_node *pn; > --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2 2007-12-03 13:52:22.000000000 +0900 > +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c 2007-12-03 14:11:42.000000000 +0900 > @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st > referenced && page_mapping_inuse(page)) > goto activate_locked; > > +#ifdef CONFIG_CGROUP_MEM_CONT > + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) > + goto activate_locked; > +#endif /* CONFIG_CGROUP_MEM_CONT */ > + Maybe == if (scan_global_lru(sc) && ! mem_cgroup_canreclaim(page, sc->mem-cgroup)) goto activate_locked: == > #ifdef CONFIG_SWAP > /* > * Anonymous process memory has backing store? > @@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned > list_add(&page->lru, &l_active); > continue; > } > +#ifdef CONFIG_CGROUP_MEM_CONT > + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) { > + list_add(&page->lru, &l_active); > + continue; > + } here too. Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage [not found] ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-04 7:01 ` YAMAMOTO Takashi [not found] ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: YAMAMOTO Takashi @ 2007-12-04 7:01 UTC (permalink / raw) To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A > > +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1) > > +{ > > + struct page_cgroup *pc; > > + int result = 1; > > + > > + if (mem1 != NULL) > > + return 1; > > + > > + lock_page_cgroup(page); > > + pc = page_get_page_cgroup(page); > > + if (pc) { > > + struct mem_cgroup *mem2 = pc->mem_cgroup; > > + unsigned long long min_usage; > > + > > + BUG_ON(mem2 == NULL); > > + min_usage = mem2->min_usage; > > + if (min_usage != 0) { > > + unsigned long flags; > > + > > + spin_lock_irqsave(&mem2->res.lock, flags); > > + if (mem2->res.usage <= min_usage) > > + result = 0; > > + spin_unlock_irqrestore(&mem2->res.lock, flags); > > + } > > + } > > + unlock_page_cgroup(page); > > + > > + return result; > > +} > > + > > How about adding lock_and_get_page_cgroup(page)/put_page_cgroup() ? > == > struct page_cgroup *pc lock_and_get_page_cgroup(page) > { > struct page_cgroup *pc; > > lock_page_cgroup(page); > pc = page_get_page_cgroup(page); > if (atomic_inc_not_zero(&pc->ref_cnt)) > pc = NULL; > unlock_page_cgroup(page); > return pc; > } > > struct page_cgroup *pc put_page_cgroup(struct page_cgroup *pc) > { > mem_cgroup_uncharge(pc); > } > == i'm not sure if it's a good idea given that reference counting (atomic_foo) has its own costs. > BTW, how about add a status to res_counter ? > My (based on your) current patch uses watermark_state. > > Maybe we can change it to be resource_state and show following status. > > RES_STATE_HIT_LIMIT, > RES_STATE_ABOVE_HWATER, > RES_STATE_ABOVE_LWATER, > RES_STATE_STABLE, ? > RES_STATE_BELOW_MIN, > > Useful ? how about making res_counter use seq_lock? > > static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) > > { > > struct mem_cgroup_per_node *pn; > > --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2 2007-12-03 13:52:22.000000000 +0900 > > +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c 2007-12-03 14:11:42.000000000 +0900 > > @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st > > referenced && page_mapping_inuse(page)) > > goto activate_locked; > > > > +#ifdef CONFIG_CGROUP_MEM_CONT > > + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) > > + goto activate_locked; > > +#endif /* CONFIG_CGROUP_MEM_CONT */ > > + > > Maybe > == > if (scan_global_lru(sc) && ! > mem_cgroup_canreclaim(page, sc->mem-cgroup)) > goto activate_locked: > == i don't think the decision belongs to callers. (at least it wasn't my intention.) YAMAMOTO Takashi ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org>]
* Re: [PATCH] memory.min_usage [not found] ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org> @ 2007-12-04 7:27 ` KAMEZAWA Hiroyuki [not found] ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-04 7:27 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A On Tue, 4 Dec 2007 16:01:21 +0900 (JST) yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote: > > BTW, how about add a status to res_counter ? > > My (based on your) current patch uses watermark_state. > > > > Maybe we can change it to be resource_state and show following status. > > > > RES_STATE_HIT_LIMIT, > > RES_STATE_ABOVE_HWATER, > > RES_STATE_ABOVE_LWATER, > > RES_STATE_STABLE, ? > > RES_STATE_BELOW_MIN, > > > > Useful ? > > how about making res_counter use seq_lock? > I thought of that when I wrote hi/low watermark patches. But I didn't try.... Hmm, but maybe worth trying. Pavel, can we change res_counter->lock to seq_lock ? If we can, I'll write a patch. Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage [not found] ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-04 7:46 ` KAMEZAWA Hiroyuki [not found] ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-04 7:46 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Tue, 4 Dec 2007 16:27:53 +0900 KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote: > I thought of that when I wrote hi/low watermark patches. > But I didn't try.... > Hmm, but maybe worth trying. > But even with seqlock, we'll have to disable irq. And maybe my current high/low patch needs smp_rmb(). I'll consider a bit more. Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage [not found] ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-04 7:58 ` YAMAMOTO Takashi [not found] ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: YAMAMOTO Takashi @ 2007-12-04 7:58 UTC (permalink / raw) To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A > But even with seqlock, we'll have to disable irq. for writers, sure. readers don't need to disable irq. YAMAMOTO Takashi ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org> @ 2007-12-04 10:54 ` KAMEZAWA Hiroyuki [not found] ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-04 10:54 UTC (permalink / raw) To: xemul-GEFAQzZX7r8dnm+yROfE0A Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 This is seqlock version res_counter. Maybe this this will reduce # of spin_lock. Pavel-san, How about this ? -Kame == resource counter's spinlock can be seqlock. (res_counter doesn't include any pointers.) And it will be good to avoid atomic ops if we can when we just checks value. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> include/linux/res_counter.h | 24 +++++++----------------- kernel/res_counter.c | 14 +++++++------- 2 files changed, 14 insertions(+), 24 deletions(-) Index: linux-2.6.24-rc3-mm2/include/linux/res_counter.h =================================================================== --- linux-2.6.24-rc3-mm2.orig/include/linux/res_counter.h +++ linux-2.6.24-rc3-mm2/include/linux/res_counter.h @@ -12,6 +12,7 @@ */ #include <linux/cgroup.h> +#include <linux/seqlock.h> /* * The core object. the cgroup that wishes to account for some @@ -36,7 +37,7 @@ struct res_counter { * the lock to protect all of the above. * the routines below consider this to be IRQ-safe */ - spinlock_t lock; + seqlock_t lock; }; /* @@ -101,27 +102,17 @@ int res_counter_charge(struct res_counte void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); void res_counter_uncharge(struct res_counter *counter, unsigned long val); -static inline bool res_counter_limit_check_locked(struct res_counter *cnt) -{ - if (cnt->usage < cnt->limit) - return true; - - return false; -} - -/* - * Helper function to detect if the cgroup is within it's limit or - * not. It's currently called from cgroup_rss_prepare() - */ static inline bool res_counter_check_under_limit(struct res_counter *cnt) { bool ret; - unsigned long flags; + unsigned long seq; - spin_lock_irqsave(&cnt->lock, flags); - ret = res_counter_limit_check_locked(cnt); - spin_unlock_irqrestore(&cnt->lock, flags); + do { + seq = read_seqbegin(&cnt->lock); + ret = (cnt->usage < cnt->limit); + } while (read_seqretry(&cnt->lock, seq)); return ret; } + #endif Index: linux-2.6.24-rc3-mm2/kernel/res_counter.c =================================================================== --- linux-2.6.24-rc3-mm2.orig/kernel/res_counter.c +++ linux-2.6.24-rc3-mm2/kernel/res_counter.c @@ -15,7 +15,7 @@ void res_counter_init(struct res_counter *counter) { - spin_lock_init(&counter->lock); + seqlock_init(&counter->lock); counter->limit = (unsigned long long)LLONG_MAX; } @@ -35,9 +35,9 @@ int res_counter_charge(struct res_counte int ret; unsigned long flags; - spin_lock_irqsave(&counter->lock, flags); + write_seqlock_irqsave(&counter->lock, flags); ret = res_counter_charge_locked(counter, val); - spin_unlock_irqrestore(&counter->lock, flags); + write_sequnlock_irqrestore(&counter->lock, flags); return ret; } @@ -53,9 +53,9 @@ void res_counter_uncharge(struct res_cou { unsigned long flags; - spin_lock_irqsave(&counter->lock, flags); + write_seqlock_irqsave(&counter->lock, flags); res_counter_uncharge_locked(counter, val); - spin_unlock_irqrestore(&counter->lock, flags); + write_sequnlock_irqrestore(&counter->lock, flags); } @@ -122,10 +122,10 @@ ssize_t res_counter_write(struct res_cou if (*end != '\0') goto out_free; } - spin_lock_irqsave(&counter->lock, flags); + write_seqlock_irqsave(&counter->lock, flags); val = res_counter_member(counter, member); *val = tmp; - spin_unlock_irqrestore(&counter->lock, flags); + write_sequnlock_irqrestore(&counter->lock, flags); ret = nbytes; out_free: kfree(buf); ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-04 11:10 ` Pavel Emelyanov [not found] ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: Pavel Emelyanov @ 2007-12-04 11:10 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 KAMEZAWA Hiroyuki wrote: > This is seqlock version res_counter. > Maybe this this will reduce # of spin_lock. > > Pavel-san, How about this ? AFAIS the readlock is used only in the check_under_limit(), but I think, that even if we read usage and limit values in this case non-atomically, this won't result in any dramatic sequence at all. No? On the other hand, we make the charge/uncharge routines work longer :( > -Kame > == > resource counter's spinlock can be seqlock. (res_counter doesn't include > any pointers.) > > And it will be good to avoid atomic ops if we can when we just checks > value. > > Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> > > > > > include/linux/res_counter.h | 24 +++++++----------------- > kernel/res_counter.c | 14 +++++++------- > 2 files changed, 14 insertions(+), 24 deletions(-) > > Index: linux-2.6.24-rc3-mm2/include/linux/res_counter.h > =================================================================== > --- linux-2.6.24-rc3-mm2.orig/include/linux/res_counter.h > +++ linux-2.6.24-rc3-mm2/include/linux/res_counter.h > @@ -12,6 +12,7 @@ > */ > > #include <linux/cgroup.h> > +#include <linux/seqlock.h> > > /* > * The core object. the cgroup that wishes to account for some > @@ -36,7 +37,7 @@ struct res_counter { > * the lock to protect all of the above. > * the routines below consider this to be IRQ-safe > */ > - spinlock_t lock; > + seqlock_t lock; > }; > > /* > @@ -101,27 +102,17 @@ int res_counter_charge(struct res_counte > void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); > void res_counter_uncharge(struct res_counter *counter, unsigned long val); > > -static inline bool res_counter_limit_check_locked(struct res_counter *cnt) > -{ > - if (cnt->usage < cnt->limit) > - return true; > - > - return false; > -} > - > -/* > - * Helper function to detect if the cgroup is within it's limit or > - * not. It's currently called from cgroup_rss_prepare() > - */ > static inline bool res_counter_check_under_limit(struct res_counter *cnt) > { > bool ret; > - unsigned long flags; > + unsigned long seq; > > - spin_lock_irqsave(&cnt->lock, flags); > - ret = res_counter_limit_check_locked(cnt); > - spin_unlock_irqrestore(&cnt->lock, flags); > + do { > + seq = read_seqbegin(&cnt->lock); > + ret = (cnt->usage < cnt->limit); > + } while (read_seqretry(&cnt->lock, seq)); > return ret; > } > > + > #endif > Index: linux-2.6.24-rc3-mm2/kernel/res_counter.c > =================================================================== > --- linux-2.6.24-rc3-mm2.orig/kernel/res_counter.c > +++ linux-2.6.24-rc3-mm2/kernel/res_counter.c > @@ -15,7 +15,7 @@ > > void res_counter_init(struct res_counter *counter) > { > - spin_lock_init(&counter->lock); > + seqlock_init(&counter->lock); > counter->limit = (unsigned long long)LLONG_MAX; > } > > @@ -35,9 +35,9 @@ int res_counter_charge(struct res_counte > int ret; > unsigned long flags; > > - spin_lock_irqsave(&counter->lock, flags); > + write_seqlock_irqsave(&counter->lock, flags); > ret = res_counter_charge_locked(counter, val); > - spin_unlock_irqrestore(&counter->lock, flags); > + write_sequnlock_irqrestore(&counter->lock, flags); > return ret; > } > > @@ -53,9 +53,9 @@ void res_counter_uncharge(struct res_cou > { > unsigned long flags; > > - spin_lock_irqsave(&counter->lock, flags); > + write_seqlock_irqsave(&counter->lock, flags); > res_counter_uncharge_locked(counter, val); > - spin_unlock_irqrestore(&counter->lock, flags); > + write_sequnlock_irqrestore(&counter->lock, flags); > } > > > @@ -122,10 +122,10 @@ ssize_t res_counter_write(struct res_cou > if (*end != '\0') > goto out_free; > } > - spin_lock_irqsave(&counter->lock, flags); > + write_seqlock_irqsave(&counter->lock, flags); > val = res_counter_member(counter, member); > *val = tmp; > - spin_unlock_irqrestore(&counter->lock, flags); > + write_sequnlock_irqrestore(&counter->lock, flags); > ret = nbytes; > out_free: > kfree(buf); > > ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2007-12-05 0:34 ` KAMEZAWA Hiroyuki [not found] ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-05 0:34 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Tue, 04 Dec 2007 14:10:42 +0300 Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > KAMEZAWA Hiroyuki wrote: > > This is seqlock version res_counter. > > Maybe this this will reduce # of spin_lock. > > > > Pavel-san, How about this ? > > AFAIS the readlock is used only in the check_under_limit(), > but I think, that even if we read usage and limit values > in this case non-atomically, this won't result in any > dramatic sequence at all. No? > Reader can detect *any* changes in res_counter member which happens while they access res_counter between seq_begin/seq_retry. Memory barrier and "sequence" of seq_lock guarantees this. So..there is no dramatical situation. (it's used for accesing xtime.) I'm sorry if I miss your point. Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-05 9:12 ` Pavel Emelyanov [not found] ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: Pavel Emelyanov @ 2007-12-05 9:12 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 KAMEZAWA Hiroyuki wrote: > On Tue, 04 Dec 2007 14:10:42 +0300 > Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > >> KAMEZAWA Hiroyuki wrote: >>> This is seqlock version res_counter. >>> Maybe this this will reduce # of spin_lock. >>> >>> Pavel-san, How about this ? >> AFAIS the readlock is used only in the check_under_limit(), >> but I think, that even if we read usage and limit values >> in this case non-atomically, this won't result in any >> dramatic sequence at all. No? >> > Reader can detect *any* changes in res_counter member which happens > while they access res_counter between seq_begin/seq_retry. > Memory barrier and "sequence" of seq_lock guarantees this. > So..there is no dramatical situation. > (it's used for accesing xtime.) > > I'm sorry if I miss your point. Sorry, let me explain it in other words. I think, that protection in reader, that guarantees that it will see the valid result, is not very important - even if we compare usage and limit not atomically nothing serious will happen (in this particular case) > Thanks, > -Kame > > ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2007-12-05 9:29 ` KAMEZAWA Hiroyuki [not found] ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2007-12-05 9:29 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Wed, 05 Dec 2007 12:12:22 +0300 Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > Sorry, let me explain it in other words. > > I think, that protection in reader, that guarantees that it > will see the valid result, is not very important - even if > we compare usage and limit not atomically nothing serious > will happen (in this particular case) > Maybe there is no serious situation (now). But programmers don't assume that the function may not return trustable result. And I think it shouldn be trustable AMAP. I'd like to use seq_lock or res_counter_state, here. BTW, I'm wondering I should hold off my patches until 2.6.25-rc series if they make things complex. Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [PATCH] memory.min_usage (seqlock for res_counter) [not found] ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> @ 2007-12-05 9:32 ` Pavel Emelyanov 0 siblings, 0 replies; 22+ messages in thread From: Pavel Emelyanov @ 2007-12-05 9:32 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: Linux Containers, minoura-jCdQPDEk3idL9jVzuh4AOg, YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 KAMEZAWA Hiroyuki wrote: > On Wed, 05 Dec 2007 12:12:22 +0300 > Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > >> Sorry, let me explain it in other words. >> >> I think, that protection in reader, that guarantees that it >> will see the valid result, is not very important - even if >> we compare usage and limit not atomically nothing serious >> will happen (in this particular case) >> > Maybe there is no serious situation (now). > But programmers don't assume that the function may not return trustable result. > And I think it shouldn be trustable AMAP. Well... OK. Among other possible ways to achieve this goal seqlocks is the most preferable one from my POV. Thanks :) > I'd like to use seq_lock or res_counter_state, here. > > BTW, I'm wondering I should hold off my patches until 2.6.25-rc series if they > make things complex. Actually, Andrew wrote that he will pay little attention to new functionality till 2.6.24 release, so I think that serious patches should really be held off. That's why I don't send the kmem controller yet :( > Thanks, > -Kame Thanks, Pavel ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Devel] [PATCH] memory.min_usage [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org> 2007-12-04 5:58 ` KAMEZAWA Hiroyuki @ 2007-12-04 6:03 ` Paul Menage [not found] ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2007-12-04 13:30 ` Balbir Singh 2008-09-10 8:44 ` [PATCH][RFC] memory.min_usage again YAMAMOTO Takashi 3 siblings, 1 reply; 22+ messages in thread From: Paul Menage @ 2007-12-04 6:03 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Dec 3, 2007 8:09 PM, YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> wrote: > +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft) > +{ > + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); > + > + return mem->min_usage; > +} > + This won't be atomic on a 32-bit arch, and I don't think you have any locking. Paul ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [Devel] [PATCH] memory.min_usage [not found] ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2007-12-04 7:02 ` YAMAMOTO Takashi 0 siblings, 0 replies; 22+ messages in thread From: YAMAMOTO Takashi @ 2007-12-04 7:02 UTC (permalink / raw) To: menage-hpIqsD4AKlfQT0dZR+AlfA Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 > On Dec 3, 2007 8:09 PM, YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> wrote: > > +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft) > > +{ > > + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); > > + > > + return mem->min_usage; > > +} > > + > > This won't be atomic on a 32-bit arch, and I don't think you have any locking. > > Paul sure. probably it's better to make it a part of res_counter. YAMAMOTO Takashi ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] memory.min_usage [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org> 2007-12-04 5:58 ` KAMEZAWA Hiroyuki 2007-12-04 6:03 ` [Devel] [PATCH] memory.min_usage Paul Menage @ 2007-12-04 13:30 ` Balbir Singh 2008-09-10 8:44 ` [PATCH][RFC] memory.min_usage again YAMAMOTO Takashi 3 siblings, 0 replies; 22+ messages in thread From: Balbir Singh @ 2007-12-04 13:30 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg YAMAMOTO Takashi wrote: > hi, > > here's a patch to implement memory.min_usage, > which controls the minimum memory usage for a cgroup. > > it works similarly to mlock; > global memory reclamation doesn't reclaim memory from > cgroups whose memory usage is below the value. > setting it too high is a dangerous operation. > > it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday. > but it's logically independent from the swappiness patch. > Hi, I think it's a good approach, but we need to make the reclaim dependent on priority. I think that if we stop reclaiming from a cgroup even though there is high global pressure, it might be a bad idea. > todo: > - restrict non-root user's operation ragardless of owner of cgroupfs files? Sorry, I don't understand this and I think using file permissions is a good idea. > - make oom killer aware of this? > Yes, we might also need a memory controller version of 1. overcommit 2. oom_adj* parameters > YAMAMOTO Takashi > > > Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> > --- > > --- linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h.BACKUP2 2007-12-03 13:52:37.000000000 +0900 > +++ linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h 2007-12-03 14:07:45.000000000 +0900 > @@ -47,6 +47,7 @@ extern int mem_cgroup_cache_charge(struc > gfp_t gfp_mask); > int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); > extern int mem_cgroup_swappiness(struct mem_cgroup *mem); > +extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem); > > static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm) > { > --- linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c.BACKUP2 2007-12-03 13:52:13.000000000 +0900 > +++ linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c 2007-12-04 11:42:07.000000000 +0900 > @@ -134,6 +134,7 @@ struct mem_cgroup { > unsigned long control_type; /* control RSS or RSS+Pagecache */ > int prev_priority; /* for recording reclaim priority */ > unsigned int swappiness; /* swappiness */ > + unsigned long long min_usage; /* XXX should be a part of res_counter? */ > /* > * statistics. > */ > @@ -1096,6 +1097,22 @@ static u64 mem_cgroup_swappiness_read(st > return mem->swappiness; > } > > +static int mem_cgroup_min_usage_write(struct cgroup *cont, struct cftype *cft, > + u64 val) > +{ > + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); > + > + mem->min_usage = val; > + return 0; > +} > + > +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft) > +{ > + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); > + > + return mem->min_usage; > +} > + > static struct cftype mem_cgroup_files[] = { > { > .name = "usage_in_bytes", > @@ -1132,6 +1149,11 @@ static struct cftype mem_cgroup_files[] > .write_uint = mem_cgroup_swappiness_write, > .read_uint = mem_cgroup_swappiness_read, > }, > + { > + .name = "min_usage_in_bytes", > + .write_uint = mem_cgroup_min_usage_write, > + .read_uint = mem_cgroup_min_usage_read, > + }, > }; > > /* XXX probably it's better to move try_to_free_mem_cgroup_pages to > @@ -1142,6 +1164,36 @@ int mem_cgroup_swappiness(struct mem_cgr > return mem->swappiness; > } > > +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1) > +{ > + struct page_cgroup *pc; > + int result = 1; > + > + if (mem1 != NULL) > + return 1; > + > + lock_page_cgroup(page); > + pc = page_get_page_cgroup(page); > + if (pc) { > + struct mem_cgroup *mem2 = pc->mem_cgroup; > + unsigned long long min_usage; > + > + BUG_ON(mem2 == NULL); > + min_usage = mem2->min_usage; > + if (min_usage != 0) { > + unsigned long flags; > + > + spin_lock_irqsave(&mem2->res.lock, flags); > + if (mem2->res.usage <= min_usage) > + result = 0; > + spin_unlock_irqrestore(&mem2->res.lock, flags); > + } > + } > + unlock_page_cgroup(page); > + > + return result; > +} > + > static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) > { > struct mem_cgroup_per_node *pn; > --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2 2007-12-03 13:52:22.000000000 +0900 > +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c 2007-12-03 14:11:42.000000000 +0900 > @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st > referenced && page_mapping_inuse(page)) > goto activate_locked; > > +#ifdef CONFIG_CGROUP_MEM_CONT > + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) > + goto activate_locked; > +#endif /* CONFIG_CGROUP_MEM_CONT */ > + > #ifdef CONFIG_SWAP > /* > * Anonymous process memory has backing store? > @@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned > list_add(&page->lru, &l_active); > continue; > } > +#ifdef CONFIG_CGROUP_MEM_CONT > + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) { > + list_add(&page->lru, &l_active); > + continue; > + } > +#endif /* CONFIG_CGROUP_MEM_CONT */ > list_add(&page->lru, &l_inactive); > } > > _______________________________________________ > Containers mailing list > Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org > https://lists.linux-foundation.org/mailman/listinfo/containers -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH][RFC] memory.min_usage again [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org> ` (2 preceding siblings ...) 2007-12-04 13:30 ` Balbir Singh @ 2008-09-10 8:44 ` YAMAMOTO Takashi [not found] ` <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org> 3 siblings, 1 reply; 22+ messages in thread From: YAMAMOTO Takashi @ 2008-09-10 8:44 UTC (permalink / raw) To: linux-mm-Bw31MaZKKs3YtjvyW6yDsg Cc: containers-qjLDD68F18O7TbgM5vRIOg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 hi, > hi, > > here's a patch to implement memory.min_usage, > which controls the minimum memory usage for a cgroup. > > it works similarly to mlock; > global memory reclamation doesn't reclaim memory from > cgroups whose memory usage is below the value. > setting it too high is a dangerous operation. > > it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday. > but it's logically independent from the swappiness patch. > > todo: > - restrict non-root user's operation ragardless of owner of cgroupfs files? > - make oom killer aware of this? > > YAMAMOTO Takashi here's a new version adapted to 2.6.27-rc5-mm1. YAMAMOTO Takashi Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> --- diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ee1b2fc..fdf35bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -72,6 +72,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem); + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void page_reset_bad_cgroup(struct page *page) { @@ -163,6 +165,13 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, { return 0; } + +static inline int mem_cgroup_canreclaim(struct page *page, + struct mem_cgroup *mem) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2979d22..a567bdb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -129,6 +129,7 @@ struct mem_cgroup { struct mem_cgroup_lru_info info; int prev_priority; /* for recording reclaim priority */ + unsigned long long min_usage; /* XXX should be a part of res_counter? */ /* * statistics. */ @@ -1004,6 +1005,28 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, return 0; } +static int mem_cgroup_min_usage_write(struct cgroup *cg, struct cftype *cft, + const char *buffer) +{ + struct mem_cgroup *mem = mem_cgroup_from_cont(cg); + unsigned long long val; + int error; + + error = res_counter_memparse_write_strategy(buffer, &val); + if (error) + return error; + + mem->min_usage = val; + return 0; +} + +static u64 mem_cgroup_min_usage_read(struct cgroup *cg, struct cftype *cft) +{ + struct mem_cgroup *mem = mem_cgroup_from_cont(cg); + + return mem->min_usage; +} + static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -1036,8 +1059,43 @@ static struct cftype mem_cgroup_files[] = { .name = "stat", .read_map = mem_control_stat_show, }, + { + .name = "min_usage_in_bytes", + .write_string = mem_cgroup_min_usage_write, + .read_u64 = mem_cgroup_min_usage_read, + }, }; +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1) +{ + struct page_cgroup *pc; + int result = 1; + + if (mem1 != NULL) + return 1; + + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); + if (pc) { + struct mem_cgroup *mem2 = pc->mem_cgroup; + unsigned long long min_usage; + + BUG_ON(mem2 == NULL); + min_usage = mem2->min_usage; + if (min_usage != 0) { + unsigned long flags; + + spin_lock_irqsave(&mem2->res.lock, flags); + if (mem2->res.usage <= min_usage) + result = 0; + spin_unlock_irqrestore(&mem2->res.lock, flags); + } + } + unlock_page_cgroup(page); + + return result; +} + static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { struct mem_cgroup_per_node *pn; diff --git a/mm/vmscan.c b/mm/vmscan.c index 33e4319..ef37968 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -673,6 +673,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, referenced && page_mapping_inuse(page)) goto activate_locked; + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) + goto activate_locked; + #ifdef CONFIG_SWAP /* * Anonymous process memory has backing store? @@ -1294,7 +1297,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, continue; } - if (page_referenced(page, 0, sc->mem_cgroup)) { + if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) { + list_add(&page->lru, &l_active); + } else if (page_referenced(page, 0, sc->mem_cgroup)) { pgmoved++; if (file) { /* Referenced file pages stay active. */ ^ permalink raw reply related [flat|nested] 22+ messages in thread
[parent not found: <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org>]
* Re: [PATCH][RFC] memory.min_usage again [not found] ` <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org> @ 2008-09-10 8:53 ` KOSAKI Motohiro 2008-09-10 15:32 ` Balbir Singh 1 sibling, 0 replies; 22+ messages in thread From: KOSAKI Motohiro @ 2008-09-10 8:53 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hi >> here's a patch to implement memory.min_usage, >> which controls the minimum memory usage for a cgroup. >> >> it works similarly to mlock; >> global memory reclamation doesn't reclaim memory from >> cgroups whose memory usage is below the value. >> setting it too high is a dangerous operation. >> >> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday. >> but it's logically independent from the swappiness patch. >> >> todo: >> - restrict non-root user's operation ragardless of owner of cgroupfs files? >> - make oom killer aware of this? This is really no good patch description. You should write - Why you think it is useful. - Who need it. A reviewer oftern want check to match coder's intention and actual code. ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH][RFC] memory.min_usage again [not found] ` <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org> 2008-09-10 8:53 ` KOSAKI Motohiro @ 2008-09-10 15:32 ` Balbir Singh [not found] ` <48C7E87F.2080706-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> 1 sibling, 1 reply; 22+ messages in thread From: Balbir Singh @ 2008-09-10 15:32 UTC (permalink / raw) To: YAMAMOTO Takashi Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A YAMAMOTO Takashi wrote: > hi, > >> hi, >> >> here's a patch to implement memory.min_usage, >> which controls the minimum memory usage for a cgroup. >> >> it works similarly to mlock; >> global memory reclamation doesn't reclaim memory from >> cgroups whose memory usage is below the value. >> setting it too high is a dangerous operation. >> Looking through the code I am a little worried, what if every cgroup is below minimum value and the system is under memory pressure, do we OOM, while we could have easily reclaimed? I would prefer to see some heuristics around such a feature, mostly around the priority that do_try_to_free_pages() to determine how desperate we are for reclaiming memory. -- Balbir ^ permalink raw reply [flat|nested] 22+ messages in thread
[parent not found: <48C7E87F.2080706-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>]
* Re: [PATCH][RFC] memory.min_usage again [not found] ` <48C7E87F.2080706-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> @ 2008-09-12 9:46 ` KAMEZAWA Hiroyuki 2008-09-29 0:43 ` YAMAMOTO Takashi 0 siblings, 1 reply; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-09-12 9:46 UTC (permalink / raw) To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, YAMAMOTO Takashi, xemul-GEFAQzZX7r8dnm+yROfE0A On Wed, 10 Sep 2008 08:32:15 -0700 Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote: > YAMAMOTO Takashi wrote: > > hi, > > > >> hi, > >> > >> here's a patch to implement memory.min_usage, > >> which controls the minimum memory usage for a cgroup. > >> > >> it works similarly to mlock; > >> global memory reclamation doesn't reclaim memory from > >> cgroups whose memory usage is below the value. > >> setting it too high is a dangerous operation. > >> > > Looking through the code I am a little worried, what if every cgroup is below > minimum value and the system is under memory pressure, do we OOM, while we could > have easily reclaimed? > > I would prefer to see some heuristics around such a feature, mostly around the > priority that do_try_to_free_pages() to determine how desperate we are for > reclaiming memory. > Taking "priority" of memory reclaim path into account is good. == static unsigned long shrink_inactive_list(unsigned long max_scan, struct zone *zone, struct scan_control *sc, int priority, int file) == How about ignore min_usage if "priority < DEF_PRIORITY - 2" ? Thanks, -Kame ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH][RFC] memory.min_usage again 2008-09-12 9:46 ` KAMEZAWA Hiroyuki @ 2008-09-29 0:43 ` YAMAMOTO Takashi 2008-09-29 2:21 ` Balbir Singh 2008-09-29 2:55 ` KAMEZAWA Hiroyuki 0 siblings, 2 replies; 22+ messages in thread From: YAMAMOTO Takashi @ 2008-09-29 0:43 UTC (permalink / raw) To: kamezawa.hiroyu; +Cc: balbir, linux-mm, containers, xemul hi, > On Wed, 10 Sep 2008 08:32:15 -0700 > Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > > > YAMAMOTO Takashi wrote: > > > hi, > > > > > >> hi, > > >> > > >> here's a patch to implement memory.min_usage, > > >> which controls the minimum memory usage for a cgroup. > > >> > > >> it works similarly to mlock; > > >> global memory reclamation doesn't reclaim memory from > > >> cgroups whose memory usage is below the value. > > >> setting it too high is a dangerous operation. > > >> > > > > Looking through the code I am a little worried, what if every cgroup is below > > minimum value and the system is under memory pressure, do we OOM, while we could > > have easily reclaimed? i'm not sure what you are worring about. can you explain a little more? under the configuration, OOM is an expected behaviour. > > > > I would prefer to see some heuristics around such a feature, mostly around the > > priority that do_try_to_free_pages() to determine how desperate we are for > > reclaiming memory. > > > Taking "priority" of memory reclaim path into account is good. > > == > static unsigned long shrink_inactive_list(unsigned long max_scan, > struct zone *zone, struct scan_control *sc, > int priority, int file) > == > How about ignore min_usage if "priority < DEF_PRIORITY - 2" ? are you suggesting ignoring mlock etc as well in that case? YAMAMOTO Takashi > > > Thanks, > -Kame > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@kvack.org. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH][RFC] memory.min_usage again 2008-09-29 0:43 ` YAMAMOTO Takashi @ 2008-09-29 2:21 ` Balbir Singh 2008-09-29 2:55 ` KAMEZAWA Hiroyuki 1 sibling, 0 replies; 22+ messages in thread From: Balbir Singh @ 2008-09-29 2:21 UTC (permalink / raw) To: YAMAMOTO Takashi; +Cc: kamezawa.hiroyu, linux-mm, containers, xemul YAMAMOTO Takashi wrote: > hi, > >> On Wed, 10 Sep 2008 08:32:15 -0700 >> Balbir Singh <balbir@linux.vnet.ibm.com> wrote: >> >>> YAMAMOTO Takashi wrote: >>>> hi, >>>> >>>>> hi, >>>>> >>>>> here's a patch to implement memory.min_usage, >>>>> which controls the minimum memory usage for a cgroup. >>>>> >>>>> it works similarly to mlock; >>>>> global memory reclamation doesn't reclaim memory from >>>>> cgroups whose memory usage is below the value. >>>>> setting it too high is a dangerous operation. >>>>> >>> Looking through the code I am a little worried, what if every cgroup is below >>> minimum value and the system is under memory pressure, do we OOM, while we could >>> have easily reclaimed? > > i'm not sure what you are worring about. can you explain a little more? > under the configuration, OOM is an expected behaviour. > Yes, but an OOM will violate the min_memory right? We promise not to reclaim, but we can OOM. I would rather implement them as watermarks (best effort service, rather than a guarantee). OOMing the system sounds bad, specially if memory can be reclaimed.. No? >>> I would prefer to see some heuristics around such a feature, mostly around the >>> priority that do_try_to_free_pages() to determine how desperate we are for >>> reclaiming memory. >>> >> Taking "priority" of memory reclaim path into account is good. >> >> == >> static unsigned long shrink_inactive_list(unsigned long max_scan, >> struct zone *zone, struct scan_control *sc, >> int priority, int file) >> == >> How about ignore min_usage if "priority < DEF_PRIORITY - 2" ? > > are you suggesting ignoring mlock etc as well in that case? > No.. not at all, we will get an mlock controller as well. > YAMAMOTO Takashi > -- Balbir -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH][RFC] memory.min_usage again 2008-09-29 0:43 ` YAMAMOTO Takashi 2008-09-29 2:21 ` Balbir Singh @ 2008-09-29 2:55 ` KAMEZAWA Hiroyuki 1 sibling, 0 replies; 22+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-09-29 2:55 UTC (permalink / raw) To: YAMAMOTO Takashi; +Cc: balbir, linux-mm, containers, xemul On Mon, 29 Sep 2008 09:43:32 +0900 (JST) yamamoto@valinux.co.jp (YAMAMOTO Takashi) wrote: > > > > > > I would prefer to see some heuristics around such a feature, mostly around the > > > priority that do_try_to_free_pages() to determine how desperate we are for > > > reclaiming memory. > > > > > Taking "priority" of memory reclaim path into account is good. > > > > == > > static unsigned long shrink_inactive_list(unsigned long max_scan, > > struct zone *zone, struct scan_control *sc, > > int priority, int file) > > == > > How about ignore min_usage if "priority < DEF_PRIORITY - 2" ? > > are you suggesting ignoring mlock etc as well in that case? > No. Just freeing pages, which are usually freed is good. == int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1, int priority) { struct page_cgroup *pc; int result = 1; if (mem1 != NULL) return 1; /* global lru is busy ? */ if (priority < DEF_PEIORITY - 1) return 1; .... } == Maybe min_usage can works as *soft* mlock by this. Or another idea. Making memory.min_usage as memory.reclaim_priority_level and allows priority_level == 0 => can_reclaim() returns 1 always. priority_level == 1 => can_reclaim returns 1 if priority < DEF_PRIORITY-1. priority_level == 2 => can_reclaim returns 1 if priority < DEF_PRIORITY-2. (and only 0,1,2 are allowed.) setting min_usage will not be prefered by lru management people. This can work as "advice" to global lru. Hmm ? Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2008-09-29 2:55 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-04 4:09 [PATCH] memory.min_usage YAMAMOTO Takashi
[not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
2007-12-04 5:58 ` KAMEZAWA Hiroyuki
[not found] ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04 7:01 ` YAMAMOTO Takashi
[not found] ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org>
2007-12-04 7:27 ` KAMEZAWA Hiroyuki
[not found] ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04 7:46 ` KAMEZAWA Hiroyuki
[not found] ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04 7:58 ` YAMAMOTO Takashi
[not found] ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org>
2007-12-04 10:54 ` [PATCH] memory.min_usage (seqlock for res_counter) KAMEZAWA Hiroyuki
[not found] ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04 11:10 ` Pavel Emelyanov
[not found] ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2007-12-05 0:34 ` KAMEZAWA Hiroyuki
[not found] ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-05 9:12 ` Pavel Emelyanov
[not found] ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2007-12-05 9:29 ` KAMEZAWA Hiroyuki
[not found] ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-05 9:32 ` Pavel Emelyanov
2007-12-04 6:03 ` [Devel] [PATCH] memory.min_usage Paul Menage
[not found] ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-04 7:02 ` YAMAMOTO Takashi
2007-12-04 13:30 ` Balbir Singh
2008-09-10 8:44 ` [PATCH][RFC] memory.min_usage again YAMAMOTO Takashi
[not found] ` <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org>
2008-09-10 8:53 ` KOSAKI Motohiro
2008-09-10 15:32 ` Balbir Singh
[not found] ` <48C7E87F.2080706-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2008-09-12 9:46 ` KAMEZAWA Hiroyuki
2008-09-29 0:43 ` YAMAMOTO Takashi
2008-09-29 2:21 ` Balbir Singh
2008-09-29 2:55 ` KAMEZAWA Hiroyuki
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox