All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] memory.min_usage
@ 2007-12-04  4:09 YAMAMOTO Takashi
       [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: YAMAMOTO Takashi @ 2007-12-04  4:09 UTC (permalink / raw)
  To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg

hi,

here's a patch to implement memory.min_usage,
which controls the minimum memory usage for a cgroup.

it works similarly to mlock;
global memory reclamation doesn't reclaim memory from
cgroups whose memory usage is below the value.
setting it too high is a dangerous operation.

it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
but it's logically independent from the swappiness patch.

todo:
- restrict non-root user's operation ragardless of owner of cgroupfs files?
- make oom killer aware of this?

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
---

--- linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h.BACKUP2	2007-12-03 13:52:37.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h	2007-12-03 14:07:45.000000000 +0900
@@ -47,6 +47,7 @@ extern int mem_cgroup_cache_charge(struc
 					gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
+extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem);
 
 static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
 {
--- linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c.BACKUP2	2007-12-03 13:52:13.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c	2007-12-04 11:42:07.000000000 +0900
@@ -134,6 +134,7 @@ struct mem_cgroup {
 	unsigned long control_type;	/* control RSS or RSS+Pagecache */
 	int	prev_priority;	/* for recording reclaim priority */
 	unsigned int swappiness;	/* swappiness */
+	unsigned long long min_usage; /* XXX should be a part of res_counter? */
 	/*
 	 * statistics.
 	 */
@@ -1096,6 +1097,22 @@ static u64 mem_cgroup_swappiness_read(st
 	return mem->swappiness;
 }
 
+static int mem_cgroup_min_usage_write(struct cgroup *cont, struct cftype *cft,
+				       u64 val)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+	mem->min_usage = val;
+	return 0;
+}
+
+static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+	return mem->min_usage;
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -1132,6 +1149,11 @@ static struct cftype mem_cgroup_files[] 
 		.write_uint = mem_cgroup_swappiness_write,
 		.read_uint = mem_cgroup_swappiness_read,
 	},
+	{
+		.name = "min_usage_in_bytes",
+		.write_uint = mem_cgroup_min_usage_write,
+		.read_uint = mem_cgroup_min_usage_read,
+	},
 };
 
 /* XXX probably it's better to move try_to_free_mem_cgroup_pages to
@@ -1142,6 +1164,36 @@ int mem_cgroup_swappiness(struct mem_cgr
 	return mem->swappiness;
 }
 
+int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
+{
+	struct page_cgroup *pc;
+	int result = 1;
+
+	if (mem1 != NULL)
+		return 1;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	if (pc) {
+		struct mem_cgroup *mem2 = pc->mem_cgroup;
+		unsigned long long min_usage;
+
+		BUG_ON(mem2 == NULL);
+		min_usage = mem2->min_usage;
+		if (min_usage != 0) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&mem2->res.lock, flags);
+			if (mem2->res.usage <= min_usage)
+				result = 0;
+			spin_unlock_irqrestore(&mem2->res.lock, flags);
+		}
+	}
+	unlock_page_cgroup(page);
+
+	return result;
+}
+
 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
 	struct mem_cgroup_per_node *pn;
--- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2	2007-12-03 13:52:22.000000000 +0900
+++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c	2007-12-03 14:11:42.000000000 +0900
@@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st
 					referenced && page_mapping_inuse(page))
 			goto activate_locked;
 
+#ifdef CONFIG_CGROUP_MEM_CONT
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
+			goto activate_locked;
+#endif /* CONFIG_CGROUP_MEM_CONT */
+
 #ifdef CONFIG_SWAP
 		/*
 		 * Anonymous process memory has backing store?
@@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned 
 			list_add(&page->lru, &l_active);
 			continue;
 		}
+#ifdef CONFIG_CGROUP_MEM_CONT
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
+			list_add(&page->lru, &l_active);
+			continue;
+		}
+#endif /* CONFIG_CGROUP_MEM_CONT */
 		list_add(&page->lru, &l_inactive);
 	}

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
@ 2007-12-04  5:58   ` KAMEZAWA Hiroyuki
       [not found]     ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  2007-12-04  6:03   ` [Devel] [PATCH] memory.min_usage Paul Menage
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-04  5:58 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	xemul-GEFAQzZX7r8dnm+yROfE0A,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Tue,  4 Dec 2007 13:09:34 +0900 (JST)
yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:

> hi,
> 
> here's a patch to implement memory.min_usage,
> which controls the minimum memory usage for a cgroup.
> 
Thanks.

> todo:
> - restrict non-root user's operation ragardless of owner of cgroupfs files?

I like 'only-for-root' user. or CAP_SYS_RESOURCE.

> - make oom killer aware of this?
>
For OOM-Killer, IMHO, no care is necessary (now).
This function can be considered as a kind of mlock(). A user can be aware of
memory usage.
But, we may need some logic/reason to request this function.

> +	unsigned long long min_usage; /* XXX should be a part of res_counter? */
>  	/*
Maybe res_counter is better. (Added CC: to Pavel Emelianov)


> +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
> +{
> +	struct page_cgroup *pc;
> +	int result = 1;
> +
> +	if (mem1 != NULL)
> +		return 1;
> +
> +	lock_page_cgroup(page);
> +	pc = page_get_page_cgroup(page);
> +	if (pc) {
> +		struct mem_cgroup *mem2 = pc->mem_cgroup;
> +		unsigned long long min_usage;
> +
> +		BUG_ON(mem2 == NULL);
> +		min_usage = mem2->min_usage;
> +		if (min_usage != 0) {
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&mem2->res.lock, flags);
> +			if (mem2->res.usage <= min_usage)
> +				result = 0;
> +			spin_unlock_irqrestore(&mem2->res.lock, flags);
> +		}
> +	}
> +	unlock_page_cgroup(page);
> +
> +	return result;
> +}
> +

How about adding lock_and_get_page_cgroup(page)/put_page_cgroup() ?
==
struct page_cgroup *pc lock_and_get_page_cgroup(page)
{
	struct page_cgroup *pc;

	lock_page_cgroup(page);
	pc = page_get_page_cgroup(page);
	if (atomic_inc_not_zero(&pc->ref_cnt)) 
		pc = NULL;
	unlock_page_cgroup(page);
	return pc;
}

struct page_cgroup *pc put_page_cgroup(struct page_cgroup *pc)
{
	mem_cgroup_uncharge(pc);
}
==

BTW, how about add a status to res_counter ?
My (based on your) current patch uses watermark_state.

Maybe we can change it to be resource_state and show following status.

RES_STATE_HIT_LIMIT,
RES_STATE_ABOVE_HWATER,
RES_STATE_ABOVE_LWATER,
RES_STATE_STABLE, ?
RES_STATE_BELOW_MIN,

Useful ?


>  static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
>  {
>  	struct mem_cgroup_per_node *pn;
> --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2	2007-12-03 13:52:22.000000000 +0900
> +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c	2007-12-03 14:11:42.000000000 +0900
> @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st
>  					referenced && page_mapping_inuse(page))
>  			goto activate_locked;
>  
> +#ifdef CONFIG_CGROUP_MEM_CONT
> +		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
> +			goto activate_locked;
> +#endif /* CONFIG_CGROUP_MEM_CONT */
> +

Maybe 
==
if (scan_global_lru(sc) && !
    mem_cgroup_canreclaim(page, sc->mem-cgroup))
    goto activate_locked:
==



>  #ifdef CONFIG_SWAP

>  		/*
>  		 * Anonymous process memory has backing store?
> @@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned 
>  			list_add(&page->lru, &l_active);
>  			continue;
>  		}
> +#ifdef CONFIG_CGROUP_MEM_CONT
> +		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
> +			list_add(&page->lru, &l_active);
> +			continue;
> +		}
here too.

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Devel] [PATCH] memory.min_usage
       [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
  2007-12-04  5:58   ` KAMEZAWA Hiroyuki
@ 2007-12-04  6:03   ` Paul Menage
       [not found]     ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2007-12-04 13:30   ` Balbir Singh
  2008-09-10  8:44     ` YAMAMOTO Takashi
  3 siblings, 1 reply; 26+ messages in thread
From: Paul Menage @ 2007-12-04  6:03 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Dec 3, 2007 8:09 PM, YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> wrote:
> +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft)
> +{
> +       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
> +
> +       return mem->min_usage;
> +}
> +

This won't be atomic on a 32-bit arch, and I don't think you have any locking.

Paul

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found]     ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-04  7:01       ` YAMAMOTO Takashi
       [not found]         ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: YAMAMOTO Takashi @ 2007-12-04  7:01 UTC (permalink / raw)
  To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	xemul-GEFAQzZX7r8dnm+yROfE0A

> > +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
> > +{
> > +	struct page_cgroup *pc;
> > +	int result = 1;
> > +
> > +	if (mem1 != NULL)
> > +		return 1;
> > +
> > +	lock_page_cgroup(page);
> > +	pc = page_get_page_cgroup(page);
> > +	if (pc) {
> > +		struct mem_cgroup *mem2 = pc->mem_cgroup;
> > +		unsigned long long min_usage;
> > +
> > +		BUG_ON(mem2 == NULL);
> > +		min_usage = mem2->min_usage;
> > +		if (min_usage != 0) {
> > +			unsigned long flags;
> > +
> > +			spin_lock_irqsave(&mem2->res.lock, flags);
> > +			if (mem2->res.usage <= min_usage)
> > +				result = 0;
> > +			spin_unlock_irqrestore(&mem2->res.lock, flags);
> > +		}
> > +	}
> > +	unlock_page_cgroup(page);
> > +
> > +	return result;
> > +}
> > +
> 
> How about adding lock_and_get_page_cgroup(page)/put_page_cgroup() ?
> ==
> struct page_cgroup *pc lock_and_get_page_cgroup(page)
> {
> 	struct page_cgroup *pc;
> 
> 	lock_page_cgroup(page);
> 	pc = page_get_page_cgroup(page);
> 	if (atomic_inc_not_zero(&pc->ref_cnt)) 
> 		pc = NULL;
> 	unlock_page_cgroup(page);
> 	return pc;
> }
> 
> struct page_cgroup *pc put_page_cgroup(struct page_cgroup *pc)
> {
> 	mem_cgroup_uncharge(pc);
> }
> ==

i'm not sure if it's a good idea given that reference counting (atomic_foo)
has its own costs.

> BTW, how about add a status to res_counter ?
> My (based on your) current patch uses watermark_state.
> 
> Maybe we can change it to be resource_state and show following status.
> 
> RES_STATE_HIT_LIMIT,
> RES_STATE_ABOVE_HWATER,
> RES_STATE_ABOVE_LWATER,
> RES_STATE_STABLE, ?
> RES_STATE_BELOW_MIN,
> 
> Useful ?

how about making res_counter use seq_lock?

> >  static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
> >  {
> >  	struct mem_cgroup_per_node *pn;
> > --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2	2007-12-03 13:52:22.000000000 +0900
> > +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c	2007-12-03 14:11:42.000000000 +0900
> > @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st
> >  					referenced && page_mapping_inuse(page))
> >  			goto activate_locked;
> >  
> > +#ifdef CONFIG_CGROUP_MEM_CONT
> > +		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
> > +			goto activate_locked;
> > +#endif /* CONFIG_CGROUP_MEM_CONT */
> > +
> 
> Maybe 
> ==
> if (scan_global_lru(sc) && !
>     mem_cgroup_canreclaim(page, sc->mem-cgroup))
>     goto activate_locked:
> ==

i don't think the decision belongs to callers.
(at least it wasn't my intention.)

YAMAMOTO Takashi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Devel] [PATCH] memory.min_usage
       [not found]     ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-12-04  7:02       ` YAMAMOTO Takashi
  0 siblings, 0 replies; 26+ messages in thread
From: YAMAMOTO Takashi @ 2007-12-04  7:02 UTC (permalink / raw)
  To: menage-hpIqsD4AKlfQT0dZR+AlfA
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

> On Dec 3, 2007 8:09 PM, YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org> wrote:
> > +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft)
> > +{
> > +       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
> > +
> > +       return mem->min_usage;
> > +}
> > +
> 
> This won't be atomic on a 32-bit arch, and I don't think you have any locking.
> 
> Paul

sure.  probably it's better to make it a part of res_counter.

YAMAMOTO Takashi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found]         ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org>
@ 2007-12-04  7:27           ` KAMEZAWA Hiroyuki
       [not found]             ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-04  7:27 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	xemul-GEFAQzZX7r8dnm+yROfE0A

On Tue,  4 Dec 2007 16:01:21 +0900 (JST)
yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:

> > BTW, how about add a status to res_counter ?
> > My (based on your) current patch uses watermark_state.
> > 
> > Maybe we can change it to be resource_state and show following status.
> > 
> > RES_STATE_HIT_LIMIT,
> > RES_STATE_ABOVE_HWATER,
> > RES_STATE_ABOVE_LWATER,
> > RES_STATE_STABLE, ?
> > RES_STATE_BELOW_MIN,
> > 
> > Useful ?
> 
> how about making res_counter use seq_lock?
> 
I thought of that when I wrote hi/low watermark patches.
But I didn't try....
Hmm, but maybe worth trying.

Pavel, can we change res_counter->lock to seq_lock ?
If we can, I'll write a patch.

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found]             ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-04  7:46               ` KAMEZAWA Hiroyuki
       [not found]                 ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-04  7:46 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, xemul-GEFAQzZX7r8dnm+yROfE0A,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Tue, 4 Dec 2007 16:27:53 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote:
> I thought of that when I wrote hi/low watermark patches.
> But I didn't try....
> Hmm, but maybe worth trying.
> 
But even with seqlock, we'll have to disable irq.
And maybe my current high/low patch needs smp_rmb().

I'll consider a bit more.

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found]                 ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-04  7:58                   ` YAMAMOTO Takashi
       [not found]                     ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: YAMAMOTO Takashi @ 2007-12-04  7:58 UTC (permalink / raw)
  To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	xemul-GEFAQzZX7r8dnm+yROfE0A

> But even with seqlock, we'll have to disable irq.

for writers, sure.
readers don't need to disable irq.

YAMAMOTO Takashi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                     ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org>
@ 2007-12-04 10:54                       ` KAMEZAWA Hiroyuki
       [not found]                         ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-04 10:54 UTC (permalink / raw)
  To: xemul-GEFAQzZX7r8dnm+yROfE0A
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

This is seqlock version res_counter.
Maybe this this will reduce # of spin_lock.

Pavel-san, How about this ?

-Kame
==
resource counter's spinlock can be seqlock. (res_counter doesn't include
any pointers.)

And it will be good to avoid atomic ops if we can when we just checks
value.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>




 include/linux/res_counter.h |   24 +++++++-----------------
 kernel/res_counter.c        |   14 +++++++-------
 2 files changed, 14 insertions(+), 24 deletions(-)

Index: linux-2.6.24-rc3-mm2/include/linux/res_counter.h
===================================================================
--- linux-2.6.24-rc3-mm2.orig/include/linux/res_counter.h
+++ linux-2.6.24-rc3-mm2/include/linux/res_counter.h
@@ -12,6 +12,7 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/seqlock.h>
 
 /*
  * The core object. the cgroup that wishes to account for some
@@ -36,7 +37,7 @@ struct res_counter {
 	 * the lock to protect all of the above.
 	 * the routines below consider this to be IRQ-safe
 	 */
-	spinlock_t lock;
+	seqlock_t lock;
 };
 
 /*
@@ -101,27 +102,17 @@ int res_counter_charge(struct res_counte
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
 void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
-static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
-{
-	if (cnt->usage < cnt->limit)
-		return true;
-
-	return false;
-}
-
-/*
- * Helper function to detect if the cgroup is within it's limit or
- * not. It's currently called from cgroup_rss_prepare()
- */
 static inline bool res_counter_check_under_limit(struct res_counter *cnt)
 {
 	bool ret;
-	unsigned long flags;
+	unsigned long seq;
 
-	spin_lock_irqsave(&cnt->lock, flags);
-	ret = res_counter_limit_check_locked(cnt);
-	spin_unlock_irqrestore(&cnt->lock, flags);
+	do {
+		seq = read_seqbegin(&cnt->lock);
+		ret = (cnt->usage < cnt->limit);
+	} while (read_seqretry(&cnt->lock, seq));
 	return ret;
 }
 
+
 #endif
Index: linux-2.6.24-rc3-mm2/kernel/res_counter.c
===================================================================
--- linux-2.6.24-rc3-mm2.orig/kernel/res_counter.c
+++ linux-2.6.24-rc3-mm2/kernel/res_counter.c
@@ -15,7 +15,7 @@
 
 void res_counter_init(struct res_counter *counter)
 {
-	spin_lock_init(&counter->lock);
+	seqlock_init(&counter->lock);
 	counter->limit = (unsigned long long)LLONG_MAX;
 }
 
@@ -35,9 +35,9 @@ int res_counter_charge(struct res_counte
 	int ret;
 	unsigned long flags;
 
-	spin_lock_irqsave(&counter->lock, flags);
+	write_seqlock_irqsave(&counter->lock, flags);
 	ret = res_counter_charge_locked(counter, val);
-	spin_unlock_irqrestore(&counter->lock, flags);
+	write_sequnlock_irqrestore(&counter->lock, flags);
 	return ret;
 }
 
@@ -53,9 +53,9 @@ void res_counter_uncharge(struct res_cou
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&counter->lock, flags);
+	write_seqlock_irqsave(&counter->lock, flags);
 	res_counter_uncharge_locked(counter, val);
-	spin_unlock_irqrestore(&counter->lock, flags);
+	write_sequnlock_irqrestore(&counter->lock, flags);
 }
 
 
@@ -122,10 +122,10 @@ ssize_t res_counter_write(struct res_cou
 		if (*end != '\0')
 			goto out_free;
 	}
-	spin_lock_irqsave(&counter->lock, flags);
+	write_seqlock_irqsave(&counter->lock, flags);
 	val = res_counter_member(counter, member);
 	*val = tmp;
-	spin_unlock_irqrestore(&counter->lock, flags);
+	write_sequnlock_irqrestore(&counter->lock, flags);
 	ret = nbytes;
 out_free:
 	kfree(buf);

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                         ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-04 11:10                           ` Pavel Emelyanov
       [not found]                             ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Pavel Emelyanov @ 2007-12-04 11:10 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

KAMEZAWA Hiroyuki wrote:
> This is seqlock version res_counter.
> Maybe this this will reduce # of spin_lock.
> 
> Pavel-san, How about this ?

AFAIS the readlock is used only in the check_under_limit(),
but I think, that even if we read usage and limit values
in this case non-atomically, this won't result in any 
dramatic sequence at all. No?

On the other hand, we make the charge/uncharge routines
work longer :(

> -Kame
> ==
> resource counter's spinlock can be seqlock. (res_counter doesn't include
> any pointers.)
> 
> And it will be good to avoid atomic ops if we can when we just checks
> value.
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
> 
> 
> 
> 
>  include/linux/res_counter.h |   24 +++++++-----------------
>  kernel/res_counter.c        |   14 +++++++-------
>  2 files changed, 14 insertions(+), 24 deletions(-)
> 
> Index: linux-2.6.24-rc3-mm2/include/linux/res_counter.h
> ===================================================================
> --- linux-2.6.24-rc3-mm2.orig/include/linux/res_counter.h
> +++ linux-2.6.24-rc3-mm2/include/linux/res_counter.h
> @@ -12,6 +12,7 @@
>   */
>  
>  #include <linux/cgroup.h>
> +#include <linux/seqlock.h>
>  
>  /*
>   * The core object. the cgroup that wishes to account for some
> @@ -36,7 +37,7 @@ struct res_counter {
>  	 * the lock to protect all of the above.
>  	 * the routines below consider this to be IRQ-safe
>  	 */
> -	spinlock_t lock;
> +	seqlock_t lock;
>  };
>  
>  /*
> @@ -101,27 +102,17 @@ int res_counter_charge(struct res_counte
>  void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
>  void res_counter_uncharge(struct res_counter *counter, unsigned long val);
>  
> -static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
> -{
> -	if (cnt->usage < cnt->limit)
> -		return true;
> -
> -	return false;
> -}
> -
> -/*
> - * Helper function to detect if the cgroup is within it's limit or
> - * not. It's currently called from cgroup_rss_prepare()
> - */
>  static inline bool res_counter_check_under_limit(struct res_counter *cnt)
>  {
>  	bool ret;
> -	unsigned long flags;
> +	unsigned long seq;
>  
> -	spin_lock_irqsave(&cnt->lock, flags);
> -	ret = res_counter_limit_check_locked(cnt);
> -	spin_unlock_irqrestore(&cnt->lock, flags);
> +	do {
> +		seq = read_seqbegin(&cnt->lock);
> +		ret = (cnt->usage < cnt->limit);
> +	} while (read_seqretry(&cnt->lock, seq));
>  	return ret;
>  }
>  
> +
>  #endif
> Index: linux-2.6.24-rc3-mm2/kernel/res_counter.c
> ===================================================================
> --- linux-2.6.24-rc3-mm2.orig/kernel/res_counter.c
> +++ linux-2.6.24-rc3-mm2/kernel/res_counter.c
> @@ -15,7 +15,7 @@
>  
>  void res_counter_init(struct res_counter *counter)
>  {
> -	spin_lock_init(&counter->lock);
> +	seqlock_init(&counter->lock);
>  	counter->limit = (unsigned long long)LLONG_MAX;
>  }
>  
> @@ -35,9 +35,9 @@ int res_counter_charge(struct res_counte
>  	int ret;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&counter->lock, flags);
> +	write_seqlock_irqsave(&counter->lock, flags);
>  	ret = res_counter_charge_locked(counter, val);
> -	spin_unlock_irqrestore(&counter->lock, flags);
> +	write_sequnlock_irqrestore(&counter->lock, flags);
>  	return ret;
>  }
>  
> @@ -53,9 +53,9 @@ void res_counter_uncharge(struct res_cou
>  {
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&counter->lock, flags);
> +	write_seqlock_irqsave(&counter->lock, flags);
>  	res_counter_uncharge_locked(counter, val);
> -	spin_unlock_irqrestore(&counter->lock, flags);
> +	write_sequnlock_irqrestore(&counter->lock, flags);
>  }
>  
>  
> @@ -122,10 +122,10 @@ ssize_t res_counter_write(struct res_cou
>  		if (*end != '\0')
>  			goto out_free;
>  	}
> -	spin_lock_irqsave(&counter->lock, flags);
> +	write_seqlock_irqsave(&counter->lock, flags);
>  	val = res_counter_member(counter, member);
>  	*val = tmp;
> -	spin_unlock_irqrestore(&counter->lock, flags);
> +	write_sequnlock_irqrestore(&counter->lock, flags);
>  	ret = nbytes;
>  out_free:
>  	kfree(buf);
> 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage
       [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
  2007-12-04  5:58   ` KAMEZAWA Hiroyuki
  2007-12-04  6:03   ` [Devel] [PATCH] memory.min_usage Paul Menage
@ 2007-12-04 13:30   ` Balbir Singh
  2008-09-10  8:44     ` YAMAMOTO Takashi
  3 siblings, 0 replies; 26+ messages in thread
From: Balbir Singh @ 2007-12-04 13:30 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg

YAMAMOTO Takashi wrote:
> hi,
> 
> here's a patch to implement memory.min_usage,
> which controls the minimum memory usage for a cgroup.
> 
> it works similarly to mlock;
> global memory reclamation doesn't reclaim memory from
> cgroups whose memory usage is below the value.
> setting it too high is a dangerous operation.
> 
> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
> but it's logically independent from the swappiness patch.
> 

Hi,

I think it's a good approach, but we need to make the reclaim dependent
on priority. I think that if we stop reclaiming from a cgroup even
though there is high global pressure, it might be a bad idea.

> todo:
> - restrict non-root user's operation ragardless of owner of cgroupfs files?

Sorry, I don't understand this and I think using file permissions is a
good idea.

> - make oom killer aware of this?
> 

Yes, we might also need a memory controller version of

1. overcommit
2. oom_adj* parameters


> YAMAMOTO Takashi
> 
> 
> Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
> ---
> 
> --- linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h.BACKUP2	2007-12-03 13:52:37.000000000 +0900
> +++ linux-2.6.24-rc3-mm2-swappiness/include/linux/memcontrol.h	2007-12-03 14:07:45.000000000 +0900
> @@ -47,6 +47,7 @@ extern int mem_cgroup_cache_charge(struc
>  					gfp_t gfp_mask);
>  int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
>  extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
> +extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem);
> 
>  static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
>  {
> --- linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c.BACKUP2	2007-12-03 13:52:13.000000000 +0900
> +++ linux-2.6.24-rc3-mm2-swappiness/mm/memcontrol.c	2007-12-04 11:42:07.000000000 +0900
> @@ -134,6 +134,7 @@ struct mem_cgroup {
>  	unsigned long control_type;	/* control RSS or RSS+Pagecache */
>  	int	prev_priority;	/* for recording reclaim priority */
>  	unsigned int swappiness;	/* swappiness */
> +	unsigned long long min_usage; /* XXX should be a part of res_counter? */
>  	/*
>  	 * statistics.
>  	 */
> @@ -1096,6 +1097,22 @@ static u64 mem_cgroup_swappiness_read(st
>  	return mem->swappiness;
>  }
> 
> +static int mem_cgroup_min_usage_write(struct cgroup *cont, struct cftype *cft,
> +				       u64 val)
> +{
> +	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
> +
> +	mem->min_usage = val;
> +	return 0;
> +}
> +
> +static u64 mem_cgroup_min_usage_read(struct cgroup *cont, struct cftype *cft)
> +{
> +	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
> +
> +	return mem->min_usage;
> +}
> +
>  static struct cftype mem_cgroup_files[] = {
>  	{
>  		.name = "usage_in_bytes",
> @@ -1132,6 +1149,11 @@ static struct cftype mem_cgroup_files[] 
>  		.write_uint = mem_cgroup_swappiness_write,
>  		.read_uint = mem_cgroup_swappiness_read,
>  	},
> +	{
> +		.name = "min_usage_in_bytes",
> +		.write_uint = mem_cgroup_min_usage_write,
> +		.read_uint = mem_cgroup_min_usage_read,
> +	},
>  };
> 
>  /* XXX probably it's better to move try_to_free_mem_cgroup_pages to
> @@ -1142,6 +1164,36 @@ int mem_cgroup_swappiness(struct mem_cgr
>  	return mem->swappiness;
>  }
> 
> +int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
> +{
> +	struct page_cgroup *pc;
> +	int result = 1;
> +
> +	if (mem1 != NULL)
> +		return 1;
> +
> +	lock_page_cgroup(page);
> +	pc = page_get_page_cgroup(page);
> +	if (pc) {
> +		struct mem_cgroup *mem2 = pc->mem_cgroup;
> +		unsigned long long min_usage;
> +
> +		BUG_ON(mem2 == NULL);
> +		min_usage = mem2->min_usage;
> +		if (min_usage != 0) {
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&mem2->res.lock, flags);
> +			if (mem2->res.usage <= min_usage)
> +				result = 0;
> +			spin_unlock_irqrestore(&mem2->res.lock, flags);
> +		}
> +	}
> +	unlock_page_cgroup(page);
> +
> +	return result;
> +}
> +
>  static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
>  {
>  	struct mem_cgroup_per_node *pn;
> --- linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c.BACKUP2	2007-12-03 13:52:22.000000000 +0900
> +++ linux-2.6.24-rc3-mm2-swappiness/mm/vmscan.c	2007-12-03 14:11:42.000000000 +0900
> @@ -531,6 +531,11 @@ static unsigned long shrink_page_list(st
>  					referenced && page_mapping_inuse(page))
>  			goto activate_locked;
> 
> +#ifdef CONFIG_CGROUP_MEM_CONT
> +		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
> +			goto activate_locked;
> +#endif /* CONFIG_CGROUP_MEM_CONT */
> +
>  #ifdef CONFIG_SWAP
>  		/*
>  		 * Anonymous process memory has backing store?
> @@ -1122,6 +1127,12 @@ static void shrink_active_list(unsigned 
>  			list_add(&page->lru, &l_active);
>  			continue;
>  		}
> +#ifdef CONFIG_CGROUP_MEM_CONT
> +		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
> +			list_add(&page->lru, &l_active);
> +			continue;
> +		}
> +#endif /* CONFIG_CGROUP_MEM_CONT */
>  		list_add(&page->lru, &l_inactive);
>  	}
> 
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                             ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
@ 2007-12-05  0:34                               ` KAMEZAWA Hiroyuki
       [not found]                                 ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-05  0:34 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Tue, 04 Dec 2007 14:10:42 +0300
Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote:

> KAMEZAWA Hiroyuki wrote:
> > This is seqlock version res_counter.
> > Maybe this this will reduce # of spin_lock.
> > 
> > Pavel-san, How about this ?
> 
> AFAIS the readlock is used only in the check_under_limit(),
> but I think, that even if we read usage and limit values
> in this case non-atomically, this won't result in any 
> dramatic sequence at all. No?
> 
Reader can detect *any* changes in res_counter member which happens
while they access res_counter between seq_begin/seq_retry.
Memory barrier and "sequence" of seq_lock guarantees this.
So..there is no dramatical situation.
(it's used for accesing xtime.)

I'm sorry if I miss your point.

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                                 ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-05  9:12                                   ` Pavel Emelyanov
       [not found]                                     ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Pavel Emelyanov @ 2007-12-05  9:12 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

KAMEZAWA Hiroyuki wrote:
> On Tue, 04 Dec 2007 14:10:42 +0300
> Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote:
> 
>> KAMEZAWA Hiroyuki wrote:
>>> This is seqlock version res_counter.
>>> Maybe this this will reduce # of spin_lock.
>>>
>>> Pavel-san, How about this ?
>> AFAIS the readlock is used only in the check_under_limit(),
>> but I think, that even if we read usage and limit values
>> in this case non-atomically, this won't result in any 
>> dramatic sequence at all. No?
>>
> Reader can detect *any* changes in res_counter member which happens
> while they access res_counter between seq_begin/seq_retry.
> Memory barrier and "sequence" of seq_lock guarantees this.
> So..there is no dramatical situation.
> (it's used for accesing xtime.)
> 
> I'm sorry if I miss your point.

Sorry, let me explain it in other words.

I think, that protection in reader, that guarantees that it
will see the valid result, is not very important - even if
we compare usage and limit not atomically nothing serious
will happen (in this particular case)

> Thanks,
> -Kame
> 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                                     ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
@ 2007-12-05  9:29                                       ` KAMEZAWA Hiroyuki
       [not found]                                         ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-12-05  9:29 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Wed, 05 Dec 2007 12:12:22 +0300
Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote:

> Sorry, let me explain it in other words.
> 
> I think, that protection in reader, that guarantees that it
> will see the valid result, is not very important - even if
> we compare usage and limit not atomically nothing serious
> will happen (in this particular case)
> 
Maybe there is no serious situation (now).
But programmers don't assume that the function may not return trustable result.
And I think it shouldn be trustable AMAP.

I'd like to use seq_lock or res_counter_state, here.

BTW, I'm wondering I should hold off my patches until 2.6.25-rc series if they
make things complex.

Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] memory.min_usage (seqlock for res_counter)
       [not found]                                         ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-12-05  9:32                                           ` Pavel Emelyanov
  0 siblings, 0 replies; 26+ messages in thread
From: Pavel Emelyanov @ 2007-12-05  9:32 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Linux Containers, minoura-jCdQPDEk3idL9jVzuh4AOg,
	YAMAMOTO Takashi, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

KAMEZAWA Hiroyuki wrote:
> On Wed, 05 Dec 2007 12:12:22 +0300
> Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote:
> 
>> Sorry, let me explain it in other words.
>>
>> I think, that protection in reader, that guarantees that it
>> will see the valid result, is not very important - even if
>> we compare usage and limit not atomically nothing serious
>> will happen (in this particular case)
>>
> Maybe there is no serious situation (now).
> But programmers don't assume that the function may not return trustable result.
> And I think it shouldn be trustable AMAP.

Well... OK. Among other possible ways to achieve this goal
seqlocks is the most preferable one from my POV.

Thanks :)

> I'd like to use seq_lock or res_counter_state, here.
> 
> BTW, I'm wondering I should hold off my patches until 2.6.25-rc series if they
> make things complex.

Actually, Andrew wrote that he will pay little attention to
new functionality till 2.6.24 release, so I think that serious
patches should really be held off.

That's why I don't send the kmem controller yet :(

> Thanks,
> -Kame

Thanks,
Pavel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH][RFC] memory.min_usage again
  2007-12-04  4:09 [PATCH] memory.min_usage YAMAMOTO Takashi
@ 2008-09-10  8:44     ` YAMAMOTO Takashi
  0 siblings, 0 replies; 26+ messages in thread
From: YAMAMOTO Takashi @ 2008-09-10  8:44 UTC (permalink / raw)
  To: linux-mm-Bw31MaZKKs3YtjvyW6yDsg
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, xemul-GEFAQzZX7r8dnm+yROfE0A,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

hi,

> hi,
> 
> here's a patch to implement memory.min_usage,
> which controls the minimum memory usage for a cgroup.
> 
> it works similarly to mlock;
> global memory reclamation doesn't reclaim memory from
> cgroups whose memory usage is below the value.
> setting it too high is a dangerous operation.
> 
> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
> but it's logically independent from the swappiness patch.
> 
> todo:
> - restrict non-root user's operation ragardless of owner of cgroupfs files?
> - make oom killer aware of this?
> 
> YAMAMOTO Takashi

here's a new version adapted to 2.6.27-rc5-mm1.

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
---

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ee1b2fc..fdf35bf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -72,6 +72,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
 
+extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem);
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline void page_reset_bad_cgroup(struct page *page)
 {
@@ -163,6 +165,13 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
 {
 	return 0;
 }
+
+static inline int mem_cgroup_canreclaim(struct page *page,
+					struct mem_cgroup *mem)
+{
+	return 1;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2979d22..a567bdb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -129,6 +129,7 @@ struct mem_cgroup {
 	struct mem_cgroup_lru_info info;
 
 	int	prev_priority;	/* for recording reclaim priority */
+	unsigned long long min_usage; /* XXX should be a part of res_counter? */
 	/*
 	 * statistics.
 	 */
@@ -1004,6 +1005,28 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	return 0;
 }
 
+static int mem_cgroup_min_usage_write(struct cgroup *cg, struct cftype *cft,
+			    const char *buffer)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cg);
+	unsigned long long val;
+	int error;
+
+	error = res_counter_memparse_write_strategy(buffer, &val);
+	if (error)
+		return error;
+
+	mem->min_usage = val;
+	return 0;
+}
+
+static u64 mem_cgroup_min_usage_read(struct cgroup *cg, struct cftype *cft)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cg);
+
+	return mem->min_usage;
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -1036,8 +1059,43 @@ static struct cftype mem_cgroup_files[] = {
 		.name = "stat",
 		.read_map = mem_control_stat_show,
 	},
+	{
+		.name = "min_usage_in_bytes",
+		.write_string = mem_cgroup_min_usage_write,
+		.read_u64 = mem_cgroup_min_usage_read,
+	},
 };
 
+int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
+{
+	struct page_cgroup *pc;
+	int result = 1;
+
+	if (mem1 != NULL)
+		return 1;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	if (pc) {
+		struct mem_cgroup *mem2 = pc->mem_cgroup;
+		unsigned long long min_usage;
+
+		BUG_ON(mem2 == NULL);
+		min_usage = mem2->min_usage;
+		if (min_usage != 0) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&mem2->res.lock, flags);
+			if (mem2->res.usage <= min_usage)
+				result = 0;
+			spin_unlock_irqrestore(&mem2->res.lock, flags);
+		}
+	}
+	unlock_page_cgroup(page);
+
+	return result;
+}
+
 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
 	struct mem_cgroup_per_node *pn;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33e4319..ef37968 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -673,6 +673,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 					referenced && page_mapping_inuse(page))
 			goto activate_locked;
 
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
+			goto activate_locked;
+
 #ifdef CONFIG_SWAP
 		/*
 		 * Anonymous process memory has backing store?
@@ -1294,7 +1297,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			continue;
 		}
 
-		if (page_referenced(page, 0, sc->mem_cgroup)) {
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
+			list_add(&page->lru, &l_active);
+		} else if (page_referenced(page, 0, sc->mem_cgroup)) {
 			pgmoved++;
 			if (file) {
 				/* Referenced file pages stay active. */

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH][RFC] memory.min_usage again
@ 2008-09-10  8:44     ` YAMAMOTO Takashi
  0 siblings, 0 replies; 26+ messages in thread
From: YAMAMOTO Takashi @ 2008-09-10  8:44 UTC (permalink / raw)
  To: linux-mm; +Cc: containers, balbir, kamezawa.hiroyu, xemul

hi,

> hi,
> 
> here's a patch to implement memory.min_usage,
> which controls the minimum memory usage for a cgroup.
> 
> it works similarly to mlock;
> global memory reclamation doesn't reclaim memory from
> cgroups whose memory usage is below the value.
> setting it too high is a dangerous operation.
> 
> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
> but it's logically independent from the swappiness patch.
> 
> todo:
> - restrict non-root user's operation ragardless of owner of cgroupfs files?
> - make oom killer aware of this?
> 
> YAMAMOTO Takashi

here's a new version adapted to 2.6.27-rc5-mm1.

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
---

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ee1b2fc..fdf35bf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -72,6 +72,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
 
+extern int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem);
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline void page_reset_bad_cgroup(struct page *page)
 {
@@ -163,6 +165,13 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
 {
 	return 0;
 }
+
+static inline int mem_cgroup_canreclaim(struct page *page,
+					struct mem_cgroup *mem)
+{
+	return 1;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2979d22..a567bdb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -129,6 +129,7 @@ struct mem_cgroup {
 	struct mem_cgroup_lru_info info;
 
 	int	prev_priority;	/* for recording reclaim priority */
+	unsigned long long min_usage; /* XXX should be a part of res_counter? */
 	/*
 	 * statistics.
 	 */
@@ -1004,6 +1005,28 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	return 0;
 }
 
+static int mem_cgroup_min_usage_write(struct cgroup *cg, struct cftype *cft,
+			    const char *buffer)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cg);
+	unsigned long long val;
+	int error;
+
+	error = res_counter_memparse_write_strategy(buffer, &val);
+	if (error)
+		return error;
+
+	mem->min_usage = val;
+	return 0;
+}
+
+static u64 mem_cgroup_min_usage_read(struct cgroup *cg, struct cftype *cft)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cg);
+
+	return mem->min_usage;
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -1036,8 +1059,43 @@ static struct cftype mem_cgroup_files[] = {
 		.name = "stat",
 		.read_map = mem_control_stat_show,
 	},
+	{
+		.name = "min_usage_in_bytes",
+		.write_string = mem_cgroup_min_usage_write,
+		.read_u64 = mem_cgroup_min_usage_read,
+	},
 };
 
+int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1)
+{
+	struct page_cgroup *pc;
+	int result = 1;
+
+	if (mem1 != NULL)
+		return 1;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	if (pc) {
+		struct mem_cgroup *mem2 = pc->mem_cgroup;
+		unsigned long long min_usage;
+
+		BUG_ON(mem2 == NULL);
+		min_usage = mem2->min_usage;
+		if (min_usage != 0) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&mem2->res.lock, flags);
+			if (mem2->res.usage <= min_usage)
+				result = 0;
+			spin_unlock_irqrestore(&mem2->res.lock, flags);
+		}
+	}
+	unlock_page_cgroup(page);
+
+	return result;
+}
+
 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
 	struct mem_cgroup_per_node *pn;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33e4319..ef37968 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -673,6 +673,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 					referenced && page_mapping_inuse(page))
 			goto activate_locked;
 
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup))
+			goto activate_locked;
+
 #ifdef CONFIG_SWAP
 		/*
 		 * Anonymous process memory has backing store?
@@ -1294,7 +1297,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			continue;
 		}
 
-		if (page_referenced(page, 0, sc->mem_cgroup)) {
+		if (!mem_cgroup_canreclaim(page, sc->mem_cgroup)) {
+			list_add(&page->lru, &l_active);
+		} else if (page_referenced(page, 0, sc->mem_cgroup)) {
 			pgmoved++;
 			if (file) {
 				/* Referenced file pages stay active. */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-10  8:44     ` YAMAMOTO Takashi
@ 2008-09-10  8:53         ` KOSAKI Motohiro
  -1 siblings, 0 replies; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-09-10  8:53 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A,
	balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

Hi

>> here's a patch to implement memory.min_usage,
>> which controls the minimum memory usage for a cgroup.
>>
>> it works similarly to mlock;
>> global memory reclamation doesn't reclaim memory from
>> cgroups whose memory usage is below the value.
>> setting it too high is a dangerous operation.
>>
>> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
>> but it's logically independent from the swappiness patch.
>>
>> todo:
>> - restrict non-root user's operation ragardless of owner of cgroupfs files?
>> - make oom killer aware of this?

This is  really no good patch description.
You should write
 - Why you think it is useful.
 - Who need it.

A reviewer oftern want check to match coder's intention and actual code.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
@ 2008-09-10  8:53         ` KOSAKI Motohiro
  0 siblings, 0 replies; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-09-10  8:53 UTC (permalink / raw)
  To: YAMAMOTO Takashi; +Cc: linux-mm, containers, balbir, kamezawa.hiroyu, xemul

Hi

>> here's a patch to implement memory.min_usage,
>> which controls the minimum memory usage for a cgroup.
>>
>> it works similarly to mlock;
>> global memory reclamation doesn't reclaim memory from
>> cgroups whose memory usage is below the value.
>> setting it too high is a dangerous operation.
>>
>> it's against 2.6.24-rc3-mm2 + memory.swappiness patch i posted here yesterday.
>> but it's logically independent from the swappiness patch.
>>
>> todo:
>> - restrict non-root user's operation ragardless of owner of cgroupfs files?
>> - make oom killer aware of this?

This is  really no good patch description.
You should write
 - Why you think it is useful.
 - Who need it.

A reviewer oftern want check to match coder's intention and actual code.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-10  8:44     ` YAMAMOTO Takashi
@ 2008-09-10 15:32         ` Balbir Singh
  -1 siblings, 0 replies; 26+ messages in thread
From: Balbir Singh @ 2008-09-10 15:32 UTC (permalink / raw)
  To: YAMAMOTO Takashi
  Cc: containers-qjLDD68F18O7TbgM5vRIOg,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A

YAMAMOTO Takashi wrote:
> hi,
> 
>> hi,
>>
>> here's a patch to implement memory.min_usage,
>> which controls the minimum memory usage for a cgroup.
>>
>> it works similarly to mlock;
>> global memory reclamation doesn't reclaim memory from
>> cgroups whose memory usage is below the value.
>> setting it too high is a dangerous operation.
>>

Looking through the code I am a little worried, what if every cgroup is below
minimum value and the system is under memory pressure, do we OOM, while we could
have easily reclaimed?

I would prefer to see some heuristics around such a feature, mostly around the
priority that do_try_to_free_pages() to determine how desperate we are for
reclaiming memory.

-- 
	Balbir

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
@ 2008-09-10 15:32         ` Balbir Singh
  0 siblings, 0 replies; 26+ messages in thread
From: Balbir Singh @ 2008-09-10 15:32 UTC (permalink / raw)
  To: YAMAMOTO Takashi; +Cc: linux-mm, containers, kamezawa.hiroyu, xemul

YAMAMOTO Takashi wrote:
> hi,
> 
>> hi,
>>
>> here's a patch to implement memory.min_usage,
>> which controls the minimum memory usage for a cgroup.
>>
>> it works similarly to mlock;
>> global memory reclamation doesn't reclaim memory from
>> cgroups whose memory usage is below the value.
>> setting it too high is a dangerous operation.
>>

Looking through the code I am a little worried, what if every cgroup is below
minimum value and the system is under memory pressure, do we OOM, while we could
have easily reclaimed?

I would prefer to see some heuristics around such a feature, mostly around the
priority that do_try_to_free_pages() to determine how desperate we are for
reclaiming memory.

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-10 15:32         ` Balbir Singh
@ 2008-09-12  9:46             ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-09-12  9:46 UTC (permalink / raw)
  To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-qjLDD68F18O7TbgM5vRIOg,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, YAMAMOTO Takashi,
	xemul-GEFAQzZX7r8dnm+yROfE0A

On Wed, 10 Sep 2008 08:32:15 -0700
Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote:

> YAMAMOTO Takashi wrote:
> > hi,
> > 
> >> hi,
> >>
> >> here's a patch to implement memory.min_usage,
> >> which controls the minimum memory usage for a cgroup.
> >>
> >> it works similarly to mlock;
> >> global memory reclamation doesn't reclaim memory from
> >> cgroups whose memory usage is below the value.
> >> setting it too high is a dangerous operation.
> >>
> 
> Looking through the code I am a little worried, what if every cgroup is below
> minimum value and the system is under memory pressure, do we OOM, while we could
> have easily reclaimed?
> 
> I would prefer to see some heuristics around such a feature, mostly around the
> priority that do_try_to_free_pages() to determine how desperate we are for
> reclaiming memory.
> 
Taking "priority" of memory reclaim path into account is good.

==
static unsigned long shrink_inactive_list(unsigned long max_scan,
                        struct zone *zone, struct scan_control *sc,
                        int priority, int file)
==
How about ignore min_usage if "priority < DEF_PRIORITY - 2" ?


Thanks,
-Kame

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
@ 2008-09-12  9:46             ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-09-12  9:46 UTC (permalink / raw)
  To: balbir; +Cc: YAMAMOTO Takashi, linux-mm, containers, xemul

On Wed, 10 Sep 2008 08:32:15 -0700
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:

> YAMAMOTO Takashi wrote:
> > hi,
> > 
> >> hi,
> >>
> >> here's a patch to implement memory.min_usage,
> >> which controls the minimum memory usage for a cgroup.
> >>
> >> it works similarly to mlock;
> >> global memory reclamation doesn't reclaim memory from
> >> cgroups whose memory usage is below the value.
> >> setting it too high is a dangerous operation.
> >>
> 
> Looking through the code I am a little worried, what if every cgroup is below
> minimum value and the system is under memory pressure, do we OOM, while we could
> have easily reclaimed?
> 
> I would prefer to see some heuristics around such a feature, mostly around the
> priority that do_try_to_free_pages() to determine how desperate we are for
> reclaiming memory.
> 
Taking "priority" of memory reclaim path into account is good.

==
static unsigned long shrink_inactive_list(unsigned long max_scan,
                        struct zone *zone, struct scan_control *sc,
                        int priority, int file)
==
How about ignore min_usage if "priority < DEF_PRIORITY - 2" ?


Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-12  9:46             ` KAMEZAWA Hiroyuki
  (?)
@ 2008-09-29  0:43             ` YAMAMOTO Takashi
  2008-09-29  2:21               ` Balbir Singh
  2008-09-29  2:55               ` KAMEZAWA Hiroyuki
  -1 siblings, 2 replies; 26+ messages in thread
From: YAMAMOTO Takashi @ 2008-09-29  0:43 UTC (permalink / raw)
  To: kamezawa.hiroyu; +Cc: balbir, linux-mm, containers, xemul

hi,

> On Wed, 10 Sep 2008 08:32:15 -0700
> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> 
> > YAMAMOTO Takashi wrote:
> > > hi,
> > > 
> > >> hi,
> > >>
> > >> here's a patch to implement memory.min_usage,
> > >> which controls the minimum memory usage for a cgroup.
> > >>
> > >> it works similarly to mlock;
> > >> global memory reclamation doesn't reclaim memory from
> > >> cgroups whose memory usage is below the value.
> > >> setting it too high is a dangerous operation.
> > >>
> > 
> > Looking through the code I am a little worried, what if every cgroup is below
> > minimum value and the system is under memory pressure, do we OOM, while we could
> > have easily reclaimed?

i'm not sure what you are worring about.  can you explain a little more?
under the configuration, OOM is an expected behaviour.

> > 
> > I would prefer to see some heuristics around such a feature, mostly around the
> > priority that do_try_to_free_pages() to determine how desperate we are for
> > reclaiming memory.
> > 
> Taking "priority" of memory reclaim path into account is good.
> 
> ==
> static unsigned long shrink_inactive_list(unsigned long max_scan,
>                         struct zone *zone, struct scan_control *sc,
>                         int priority, int file)
> ==
> How about ignore min_usage if "priority < DEF_PRIORITY - 2" ?

are you suggesting ignoring mlock etc as well in that case?

YAMAMOTO Takashi

> 
> 
> Thanks,
> -Kame
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-29  0:43             ` YAMAMOTO Takashi
@ 2008-09-29  2:21               ` Balbir Singh
  2008-09-29  2:55               ` KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 26+ messages in thread
From: Balbir Singh @ 2008-09-29  2:21 UTC (permalink / raw)
  To: YAMAMOTO Takashi; +Cc: kamezawa.hiroyu, linux-mm, containers, xemul

YAMAMOTO Takashi wrote:
> hi,
> 
>> On Wed, 10 Sep 2008 08:32:15 -0700
>> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>>
>>> YAMAMOTO Takashi wrote:
>>>> hi,
>>>>
>>>>> hi,
>>>>>
>>>>> here's a patch to implement memory.min_usage,
>>>>> which controls the minimum memory usage for a cgroup.
>>>>>
>>>>> it works similarly to mlock;
>>>>> global memory reclamation doesn't reclaim memory from
>>>>> cgroups whose memory usage is below the value.
>>>>> setting it too high is a dangerous operation.
>>>>>
>>> Looking through the code I am a little worried, what if every cgroup is below
>>> minimum value and the system is under memory pressure, do we OOM, while we could
>>> have easily reclaimed?
> 
> i'm not sure what you are worring about.  can you explain a little more?
> under the configuration, OOM is an expected behaviour.
> 

Yes, but an OOM will violate the min_memory right? We promise not to reclaim,
but we can OOM. I would rather implement them as watermarks (best effort
service, rather than a guarantee). OOMing the system sounds bad, specially if
memory can be reclaimed.. No?

>>> I would prefer to see some heuristics around such a feature, mostly around the
>>> priority that do_try_to_free_pages() to determine how desperate we are for
>>> reclaiming memory.
>>>
>> Taking "priority" of memory reclaim path into account is good.
>>
>> ==
>> static unsigned long shrink_inactive_list(unsigned long max_scan,
>>                         struct zone *zone, struct scan_control *sc,
>>                         int priority, int file)
>> ==
>> How about ignore min_usage if "priority < DEF_PRIORITY - 2" ?
> 
> are you suggesting ignoring mlock etc as well in that case?
>

No.. not at all, we will get an mlock controller as well.


> YAMAMOTO Takashi
> 


-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH][RFC] memory.min_usage again
  2008-09-29  0:43             ` YAMAMOTO Takashi
  2008-09-29  2:21               ` Balbir Singh
@ 2008-09-29  2:55               ` KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-09-29  2:55 UTC (permalink / raw)
  To: YAMAMOTO Takashi; +Cc: balbir, linux-mm, containers, xemul

On Mon, 29 Sep 2008 09:43:32 +0900 (JST)
yamamoto@valinux.co.jp (YAMAMOTO Takashi) wrote:
> > > 
> > > I would prefer to see some heuristics around such a feature, mostly around the
> > > priority that do_try_to_free_pages() to determine how desperate we are for
> > > reclaiming memory.
> > > 
> > Taking "priority" of memory reclaim path into account is good.
> > 
> > ==
> > static unsigned long shrink_inactive_list(unsigned long max_scan,
> >                         struct zone *zone, struct scan_control *sc,
> >                         int priority, int file)
> > ==
> > How about ignore min_usage if "priority < DEF_PRIORITY - 2" ?
> 
> are you suggesting ignoring mlock etc as well in that case?
> 

No. Just freeing pages, which are usually freed is good.

==
int mem_cgroup_canreclaim(struct page *page, struct mem_cgroup *mem1,
			  int priority)
{
	struct page_cgroup *pc;
	int result = 1;

	if (mem1 != NULL)
		return 1;
	/* global lru is busy ? */
        if (priority < DEF_PEIORITY - 1)
		return 1;
        ....
}
==
Maybe min_usage can works as *soft* mlock by this.

Or another idea.
Making memory.min_usage as memory.reclaim_priority_level and allows

  priority_level == 0 => can_reclaim() returns 1 always.
  priority_level == 1 => can_reclaim returns 1 if priority < DEF_PRIORITY-1.
  priority_level == 2 => can_reclaim returns 1 if priority < DEF_PRIORITY-2.

(and only 0,1,2 are allowed.)

setting min_usage will not be prefered by lru management people.
This can work as "advice" to global lru.

Hmm ?

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-09-29  2:55 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-04  4:09 [PATCH] memory.min_usage YAMAMOTO Takashi
     [not found] ` <20071204040934.44AF41D0BA3-Pcsii4f/SVk@public.gmane.org>
2007-12-04  5:58   ` KAMEZAWA Hiroyuki
     [not found]     ` <20071204145831.da477f5b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04  7:01       ` YAMAMOTO Takashi
     [not found]         ` <20071204070122.16DDD1D0BCD-Pcsii4f/SVk@public.gmane.org>
2007-12-04  7:27           ` KAMEZAWA Hiroyuki
     [not found]             ` <20071204162753.c28cc550.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04  7:46               ` KAMEZAWA Hiroyuki
     [not found]                 ` <20071204164615.fc871e44.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04  7:58                   ` YAMAMOTO Takashi
     [not found]                     ` <20071204075854.5850B1D0BFA-Pcsii4f/SVk@public.gmane.org>
2007-12-04 10:54                       ` [PATCH] memory.min_usage (seqlock for res_counter) KAMEZAWA Hiroyuki
     [not found]                         ` <20071204195436.77fc911b.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-04 11:10                           ` Pavel Emelyanov
     [not found]                             ` <475535B2.1020801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2007-12-05  0:34                               ` KAMEZAWA Hiroyuki
     [not found]                                 ` <20071205093455.0f46b456.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-05  9:12                                   ` Pavel Emelyanov
     [not found]                                     ` <47566B76.6020102-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2007-12-05  9:29                                       ` KAMEZAWA Hiroyuki
     [not found]                                         ` <20071205182927.41d140a7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-12-05  9:32                                           ` Pavel Emelyanov
2007-12-04  6:03   ` [Devel] [PATCH] memory.min_usage Paul Menage
     [not found]     ` <6599ad830712032203t48e455cbjd25a40cf93cb453f-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-04  7:02       ` YAMAMOTO Takashi
2007-12-04 13:30   ` Balbir Singh
2008-09-10  8:44   ` [PATCH][RFC] memory.min_usage again YAMAMOTO Takashi
2008-09-10  8:44     ` YAMAMOTO Takashi
     [not found]     ` <20080910084443.8F7D85ACE-Pcsii4f/SVk@public.gmane.org>
2008-09-10  8:53       ` KOSAKI Motohiro
2008-09-10  8:53         ` KOSAKI Motohiro
2008-09-10 15:32       ` Balbir Singh
2008-09-10 15:32         ` Balbir Singh
     [not found]         ` <48C7E87F.2080706-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2008-09-12  9:46           ` KAMEZAWA Hiroyuki
2008-09-12  9:46             ` KAMEZAWA Hiroyuki
2008-09-29  0:43             ` YAMAMOTO Takashi
2008-09-29  2:21               ` Balbir Singh
2008-09-29  2:55               ` KAMEZAWA Hiroyuki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.