[RFC 02/18] cgroup_pids: track maximum pids

public inbox for cgroups@vger.kernel.org
 help / color / mirror / Atom feed

* [RFC 02/18] cgroup_pids: track maximum pids
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
@ 2016-06-13 19:44 ` Topi Miettinen
       [not found]   ` <1465847065-3577-3-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
  1 sibling, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Tejun Heo, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP CGROUP

Track maximum pids in the cgroup, present it in cgroup pids.current_max.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 kernel/cgroup_pids.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 303097b..53fb21d 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -48,6 +48,7 @@ struct pids_cgroup {
 	 * %PIDS_MAX = (%PID_MAX_LIMIT + 1).
 	 */
 	atomic64_t			counter;
+	atomic64_t			cur_max;
 	int64_t				limit;
 };
 
@@ -72,6 +73,7 @@ pids_css_alloc(struct cgroup_subsys_state *parent)
 
 	pids->limit = PIDS_MAX;
 	atomic64_set(&pids->counter, 0);
+	atomic64_set(&pids->cur_max, 0);
 	return &pids->css;
 }
 
@@ -182,6 +184,10 @@ static int pids_can_attach(struct cgroup_taskset *tset)
 
 		pids_charge(pids, 1);
 		pids_uncharge(old_pids, 1);
+		if (atomic64_read(&pids->cur_max) <
+		    atomic64_read(&pids->counter))
+			atomic64_set(&pids->cur_max,
+				     atomic64_read(&pids->counter));
 	}
 
 	return 0;
@@ -202,6 +208,10 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
 
 		pids_charge(old_pids, 1);
 		pids_uncharge(pids, 1);
+		if (atomic64_read(&old_pids->cur_max) <
+		    atomic64_read(&old_pids->counter))
+			atomic64_set(&old_pids->cur_max,
+				     atomic64_read(&old_pids->counter));
 	}
 }
 
@@ -236,6 +246,14 @@ static void pids_free(struct task_struct *task)
 	pids_uncharge(pids, 1);
 }
 
+static void pids_fork(struct task_struct *task)
+{
+	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
+
+	if (atomic64_read(&pids->cur_max) < atomic64_read(&pids->counter))
+		atomic64_set(&pids->cur_max, atomic64_read(&pids->counter));
+}
+
 static ssize_t pids_max_write(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
@@ -288,6 +306,14 @@ static s64 pids_current_read(struct cgroup_subsys_state *css,
 	return atomic64_read(&pids->counter);
 }
 
+static s64 pids_current_max_read(struct cgroup_subsys_state *css,
+				 struct cftype *cft)
+{
+	struct pids_cgroup *pids = css_pids(css);
+
+	return atomic64_read(&pids->cur_max);
+}
+
 static struct cftype pids_files[] = {
 	{
 		.name = "max",
@@ -300,6 +326,11 @@ static struct cftype pids_files[] = {
 		.read_s64 = pids_current_read,
 		.flags = CFTYPE_NOT_ON_ROOT,
 	},
+	{
+		.name = "current_max",
+		.read_s64 = pids_current_max_read,
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
 	{ }	/* terminate */
 };
 
@@ -313,4 +344,5 @@ struct cgroup_subsys pids_cgrp_subsys = {
 	.free		= pids_free,
 	.legacy_cftypes	= pids_files,
 	.dfl_cftypes	= pids_files,
+	.fork		= pids_fork,
 };
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
  2016-06-13 19:44 ` [RFC 02/18] cgroup_pids: track maximum pids Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
       [not found]   ` <1465847065-3577-4-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Johannes Weiner, Michal Hocko, Vladimir Davydov,
	Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER MEMCG,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER MEMCG

Present maximum used memory in cgroup memory.current_max.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 include/linux/page_counter.h |  7 ++++++-
 mm/memcontrol.c              | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 7e62920..be4de17 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -9,9 +9,9 @@ struct page_counter {
 	atomic_long_t count;
 	unsigned long limit;
 	struct page_counter *parent;
+	unsigned long watermark;
 
 	/* legacy */
-	unsigned long watermark;
 	unsigned long failcnt;
 };
 
@@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
 	return atomic_long_read(&counter->count);
 }
 
+static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
+{
+	return counter->watermark;
+}
+
 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
 bool page_counter_try_charge(struct page_counter *counter,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e7440..5513771 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
 	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
 }
 
+static u64 memory_current_max_read(struct cgroup_subsys_state *css,
+				   struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
+}
+
 static int memory_low_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
 		.read_u64 = memory_current_read,
 	},
 	{
+		.name = "current_max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = memory_current_max_read,
+	},
+	{
 		.name = "low",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = memory_low_show,
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]   ` <1465847065-3577-3-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-13 21:12     ` Tejun Heo
       [not found]       ` <20160613211227.GG31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Tejun Heo @ 2016-06-13 21:12 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

Hello,

On Mon, Jun 13, 2016 at 10:44:09PM +0300, Topi Miettinen wrote:
> Track maximum pids in the cgroup, present it in cgroup pids.current_max.

"max" is often used for maximum limits in cgroup.  I think "watermark"
or "high_watermark" would be a lot clearer.

> @@ -236,6 +246,14 @@ static void pids_free(struct task_struct *task)
>  	pids_uncharge(pids, 1);
>  }
>  
> +static void pids_fork(struct task_struct *task)
> +{
> +	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
> +
> +	if (atomic64_read(&pids->cur_max) < atomic64_read(&pids->counter))
> +		atomic64_set(&pids->cur_max, atomic64_read(&pids->counter));
> +}

Wouldn't it make more sense to track high watermark from the charge
functions instead?  I don't get why this requires a separate fork
callback.  Also, racing atomic64_set's are racy.  The counter can end
up with a lower number than it should be.

> @@ -300,6 +326,11 @@ static struct cftype pids_files[] = {
>  		.read_s64 = pids_current_read,
>  		.flags = CFTYPE_NOT_ON_ROOT,
>  	},
> +	{
> +		.name = "current_max",

Please make this "high_watermark" field in pids.stats file.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]       ` <20160613211227.GG31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
@ 2016-06-13 21:29         ` Topi Miettinen
       [not found]           ` <17cb1a37-47b1-dbd4-6835-efad3cf6c12f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-13 21:29 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On 06/13/16 21:12, Tejun Heo wrote:
> Hello,
> 
> On Mon, Jun 13, 2016 at 10:44:09PM +0300, Topi Miettinen wrote:
>> Track maximum pids in the cgroup, present it in cgroup pids.current_max.
> 
> "max" is often used for maximum limits in cgroup.  I think "watermark"
> or "high_watermark" would be a lot clearer.

OK, I have no preference.

> 
>> @@ -236,6 +246,14 @@ static void pids_free(struct task_struct *task)
>>  	pids_uncharge(pids, 1);
>>  }
>>  
>> +static void pids_fork(struct task_struct *task)
>> +{
>> +	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
>> +
>> +	if (atomic64_read(&pids->cur_max) < atomic64_read(&pids->counter))
>> +		atomic64_set(&pids->cur_max, atomic64_read(&pids->counter));
>> +}
> 
> Wouldn't it make more sense to track high watermark from the charge
> functions instead?  I don't get why this requires a separate fork
> callback.  Also, racing atomic64_set's are racy.  The counter can end
> up with a lower number than it should be.
> 

I used fork callback as I don't want to lower the watermark in all cases
where the charge can be lowered, so I'd update the watermark only when
the fork really happens.

Is there a better way to compare and set? I don't think atomic_cmpxchg()
does what's needed,

>> @@ -300,6 +326,11 @@ static struct cftype pids_files[] = {
>>  		.read_s64 = pids_current_read,
>>  		.flags = CFTYPE_NOT_ON_ROOT,
>>  	},
>> +	{
>> +		.name = "current_max",
> 
> Please make this "high_watermark" field in pids.stats file.
> 
> Thanks.
> 

OK.

-Topi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]           ` <17cb1a37-47b1-dbd4-6835-efad3cf6c12f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-13 21:33             ` Tejun Heo
       [not found]               ` <20160613213354.GH31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Tejun Heo @ 2016-06-13 21:33 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

Hello,

On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
> I used fork callback as I don't want to lower the watermark in all cases
> where the charge can be lowered, so I'd update the watermark only when
> the fork really happens.

I don't think that would make a noticeable difference.  That's where
we decide whether to grant fork or not after all and thus where the
actual usage is.

> Is there a better way to compare and set? I don't think atomic_cmpxchg()
> does what's needed,

cmpxchg loop should do what's necessary although I'm not sure how much
being strictly correct matters here.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]               ` <20160613213354.GH31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
@ 2016-06-13 21:59                 ` Topi Miettinen
       [not found]                   ` <15ef1041-35b6-cb31-ff98-8b0be7780bc3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2016-07-17 20:11                 ` Topi Miettinen
  1 sibling, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-13 21:59 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On 06/13/16 21:33, Tejun Heo wrote:
> Hello,
> 
> On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
>> I used fork callback as I don't want to lower the watermark in all cases
>> where the charge can be lowered, so I'd update the watermark only when
>> the fork really happens.
> 
> I don't think that would make a noticeable difference.  That's where
> we decide whether to grant fork or not after all and thus where the
> actual usage is.
> 

You mean, increment count on cgroup_can_fork()? But what if the fork()
fails after that (signal_pending case)?

>> Is there a better way to compare and set? I don't think atomic_cmpxchg()
>> does what's needed,
> 
> cmpxchg loop should do what's necessary although I'm not sure how much
> being strictly correct matters here.
> 
> Thanks.
> 

These are not used for any decisions taken by kernel, but by the user. I
have to say I don't know where's the line between strict correctness and
less strict.

-Topi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]                   ` <15ef1041-35b6-cb31-ff98-8b0be7780bc3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-13 22:09                     ` Tejun Heo
  0 siblings, 0 replies; 16+ messages in thread
From: Tejun Heo @ 2016-06-13 22:09 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On Mon, Jun 13, 2016 at 09:59:32PM +0000, Topi Miettinen wrote:
> On 06/13/16 21:33, Tejun Heo wrote:
> > Hello,
> > 
> > On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
> >> I used fork callback as I don't want to lower the watermark in all cases
> >> where the charge can be lowered, so I'd update the watermark only when
> >> the fork really happens.
> > 
> > I don't think that would make a noticeable difference.  That's where
> > we decide whether to grant fork or not after all and thus where the
> > actual usage is.
> > 
> 
> You mean, increment count on cgroup_can_fork()? But what if the fork()
> fails after that (signal_pending case)?

That number isn't gonna deviate by any significant amount and the
counter is to estimate what the limit should be set to to begin with.
It's logical to collect how close the usage got to can_attach failure
due to limit breach.

> >> Is there a better way to compare and set? I don't think atomic_cmpxchg()
> >> does what's needed,
> > 
> > cmpxchg loop should do what's necessary although I'm not sure how much
> > being strictly correct matters here.
> 
> These are not used for any decisions taken by kernel, but by the user. I
> have to say I don't know where's the line between strict correctness and
> less strict.

Provided that cmpxchg is done only when the counter needs to be
actually updated, it's not gonna be noticeably expensive.  Might as
well make it correct.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found]   ` <1465847065-3577-4-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-14  7:01     ` Michal Hocko
       [not found]       ` <20160614070130.GB5681-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Michal Hocko @ 2016-06-14  7:01 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Johannes Weiner,
	Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
> Present maximum used memory in cgroup memory.current_max.

It would be really much more preferable to present the usecase in the
patch description. It is true that this information is presented in the
v1 API but the current policy is to export new knobs only when there is
a reasonable usecase for it.

> Signed-off-by: Topi Miettinen <toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
>  include/linux/page_counter.h |  7 ++++++-
>  mm/memcontrol.c              | 13 +++++++++++++
>  2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
> index 7e62920..be4de17 100644
> --- a/include/linux/page_counter.h
> +++ b/include/linux/page_counter.h
> @@ -9,9 +9,9 @@ struct page_counter {
>  	atomic_long_t count;
>  	unsigned long limit;
>  	struct page_counter *parent;
> +	unsigned long watermark;
>  
>  	/* legacy */
> -	unsigned long watermark;
>  	unsigned long failcnt;
>  };
>  
> @@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
>  	return atomic_long_read(&counter->count);
>  }
>  
> +static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
> +{
> +	return counter->watermark;
> +}
> +
>  void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
>  void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
>  bool page_counter_try_charge(struct page_counter *counter,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 75e7440..5513771 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
>  	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
>  }
>  
> +static u64 memory_current_max_read(struct cgroup_subsys_state *css,
> +				   struct cftype *cft)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> +
> +	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
> +}
> +
>  static int memory_low_show(struct seq_file *m, void *v)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> @@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
>  		.read_u64 = memory_current_read,
>  	},
>  	{
> +		.name = "current_max",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.read_u64 = memory_current_max_read,
> +	},
> +	{
>  		.name = "low",
>  		.flags = CFTYPE_NOT_ON_ROOT,
>  		.seq_show = memory_low_show,
> -- 
> 2.8.1

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found]       ` <20160614070130.GB5681-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
@ 2016-06-14 15:47         ` Topi Miettinen
       [not found]           ` <b9d04ccd-28d2-993a-2a40-bbed7b6289d4-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-14 15:47 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Johannes Weiner,
	Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On 06/14/16 07:01, Michal Hocko wrote:
> On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
>> Present maximum used memory in cgroup memory.current_max.
> 
> It would be really much more preferable to present the usecase in the
> patch description. It is true that this information is presented in the
> v1 API but the current policy is to export new knobs only when there is
> a reasonable usecase for it.
> 

This was stated in the cover letter:
https://lkml.org/lkml/2016/6/13/857

"There are many basic ways to control processes, including capabilities,
cgroups and resource limits. However, there are far fewer ways to find out
useful values for the limits, except blind trial and error.

This patch series attempts to fix that by giving at least a nice starting
point from the actual maximum values. I looked where each limit is checked
and added a call to limit bump nearby."

"Cgroups
[RFC 02/18] cgroup_pids: track maximum pids
[RFC 03/18] memcontrol: present maximum used memory also for
[RFC 04/18] device_cgroup: track and present accessed devices

For tasks and memory cgroup limits the situation is somewhat better as the
current tasks and memory status can be easily seen with ps(1). However, any
transient tasks or temporary higher memory use might slip from the view.
Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
universal method. Program sources typically give no useful indication about
memory use or how many tasks there could be."

I can add some of this to the commit message, is that sufficient for you?

>> Signed-off-by: Topi Miettinen <toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>>  include/linux/page_counter.h |  7 ++++++-
>>  mm/memcontrol.c              | 13 +++++++++++++
>>  2 files changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
>> index 7e62920..be4de17 100644
>> --- a/include/linux/page_counter.h
>> +++ b/include/linux/page_counter.h
>> @@ -9,9 +9,9 @@ struct page_counter {
>>  	atomic_long_t count;
>>  	unsigned long limit;
>>  	struct page_counter *parent;
>> +	unsigned long watermark;
>>  
>>  	/* legacy */
>> -	unsigned long watermark;
>>  	unsigned long failcnt;
>>  };
>>  
>> @@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
>>  	return atomic_long_read(&counter->count);
>>  }
>>  
>> +static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
>> +{
>> +	return counter->watermark;
>> +}
>> +
>>  void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
>>  void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
>>  bool page_counter_try_charge(struct page_counter *counter,
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 75e7440..5513771 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
>>  	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
>>  }
>>  
>> +static u64 memory_current_max_read(struct cgroup_subsys_state *css,
>> +				   struct cftype *cft)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
>> +}
>> +
>>  static int memory_low_show(struct seq_file *m, void *v)
>>  {
>>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
>> @@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
>>  		.read_u64 = memory_current_read,
>>  	},
>>  	{
>> +		.name = "current_max",
>> +		.flags = CFTYPE_NOT_ON_ROOT,
>> +		.read_u64 = memory_current_max_read,
>> +	},
>> +	{
>>  		.name = "low",
>>  		.flags = CFTYPE_NOT_ON_ROOT,
>>  		.seq_show = memory_low_show,
>> -- 
>> 2.8.1
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found]           ` <b9d04ccd-28d2-993a-2a40-bbed7b6289d4-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-14 16:04             ` Johannes Weiner
       [not found]               ` <20160614160410.GB14279-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Johannes Weiner @ 2016-06-14 16:04 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: Michal Hocko, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Tue, Jun 14, 2016 at 03:47:20PM +0000, Topi Miettinen wrote:
> On 06/14/16 07:01, Michal Hocko wrote:
> > On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
> >> Present maximum used memory in cgroup memory.current_max.
> > 
> > It would be really much more preferable to present the usecase in the
> > patch description. It is true that this information is presented in the
> > v1 API but the current policy is to export new knobs only when there is
> > a reasonable usecase for it.
> > 
> 
> This was stated in the cover letter:
> https://lkml.org/lkml/2016/6/13/857
> 
> "There are many basic ways to control processes, including capabilities,
> cgroups and resource limits. However, there are far fewer ways to find out
> useful values for the limits, except blind trial and error.
> 
> This patch series attempts to fix that by giving at least a nice starting
> point from the actual maximum values. I looked where each limit is checked
> and added a call to limit bump nearby."
> 
> "Cgroups
> [RFC 02/18] cgroup_pids: track maximum pids
> [RFC 03/18] memcontrol: present maximum used memory also for
> [RFC 04/18] device_cgroup: track and present accessed devices
> 
> For tasks and memory cgroup limits the situation is somewhat better as the
> current tasks and memory status can be easily seen with ps(1). However, any
> transient tasks or temporary higher memory use might slip from the view.
> Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
> universal method. Program sources typically give no useful indication about
> memory use or how many tasks there could be."
> 
> I can add some of this to the commit message, is that sufficient for you?

It's useful to have a short summary of the justification in each patch
as well. Other than that it's fine to be broader and more detailed
about your motivation in the coverletter.

I didn't catch the coverletter, though. It makes sense to CC
recipients of any of those patches on the full series, including the
cover, since even though we are specialized in certain areas of the
code, many of us are interested in the whole picture of addressing a
problem, and not just the few bits in our area without more context.

As far as the memcg part of this series goes, one concern is that page
cache is trimmed back only when there is pressure, so in all but very
few cases the high watermark you are introducing will be pegged to the
configured limit. It doesn't give a whole lot of insight.

But there are consumers that are less/not compressible than cache,
such as anonymous memory, unreclaimable slab, maybe socket buffers
etc. Having spikes in those slip through two sampling points is an
issue, indeed. Adding consumer-specific watermarks might be useful.

Thanks

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found]               ` <20160614160410.GB14279-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
@ 2016-06-14 17:15                 ` Topi Miettinen
       [not found]                   ` <db6a51eb-d1f7-691b-11a6-ef0b7c1c9462-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-06-14 17:15 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Michal Hocko, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On 06/14/16 16:04, Johannes Weiner wrote:
> On Tue, Jun 14, 2016 at 03:47:20PM +0000, Topi Miettinen wrote:
>> On 06/14/16 07:01, Michal Hocko wrote:
>>> On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
>>>> Present maximum used memory in cgroup memory.current_max.
>>>
>>> It would be really much more preferable to present the usecase in the
>>> patch description. It is true that this information is presented in the
>>> v1 API but the current policy is to export new knobs only when there is
>>> a reasonable usecase for it.
>>>
>>
>> This was stated in the cover letter:
>> https://lkml.org/lkml/2016/6/13/857
>>
>> "There are many basic ways to control processes, including capabilities,
>> cgroups and resource limits. However, there are far fewer ways to find out
>> useful values for the limits, except blind trial and error.
>>
>> This patch series attempts to fix that by giving at least a nice starting
>> point from the actual maximum values. I looked where each limit is checked
>> and added a call to limit bump nearby."
>>
>> "Cgroups
>> [RFC 02/18] cgroup_pids: track maximum pids
>> [RFC 03/18] memcontrol: present maximum used memory also for
>> [RFC 04/18] device_cgroup: track and present accessed devices
>>
>> For tasks and memory cgroup limits the situation is somewhat better as the
>> current tasks and memory status can be easily seen with ps(1). However, any
>> transient tasks or temporary higher memory use might slip from the view.
>> Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
>> universal method. Program sources typically give no useful indication about
>> memory use or how many tasks there could be."
>>
>> I can add some of this to the commit message, is that sufficient for you?
> 
> It's useful to have a short summary of the justification in each patch
> as well. Other than that it's fine to be broader and more detailed
> about your motivation in the coverletter.
> 
> I didn't catch the coverletter, though. It makes sense to CC
> recipients of any of those patches on the full series, including the
> cover, since even though we are specialized in certain areas of the
> code, many of us are interested in the whole picture of addressing a
> problem, and not just the few bits in our area without more context.
> 

Thank you for this nice explanation. I suppose "git send-email
--cc-cmd=scripts/get_maintainer.pl" doesn't do this.

> As far as the memcg part of this series goes, one concern is that page
> cache is trimmed back only when there is pressure, so in all but very
> few cases the high watermark you are introducing will be pegged to the
> configured limit. It doesn't give a whole lot of insight.
> 

So using the high watermark would not give a very useful starting point
for the user who wished to configure the memory limit? What else could
be used instead?

> But there are consumers that are less/not compressible than cache,
> such as anonymous memory, unreclaimable slab, maybe socket buffers
> etc. Having spikes in those slip through two sampling points is an
> issue, indeed. Adding consumer-specific watermarks might be useful.
> 
> Thanks
> 

OK, but there's no limiting or tuning mechanism in place for now for
those, or is there? How could the results be used?

-Topi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found]                   ` <db6a51eb-d1f7-691b-11a6-ef0b7c1c9462-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-06-16 10:27                     ` Michal Hocko
  0 siblings, 0 replies; 16+ messages in thread
From: Michal Hocko @ 2016-06-16 10:27 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: Johannes Weiner, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Tue 14-06-16 17:15:06, Topi Miettinen wrote:
> On 06/14/16 16:04, Johannes Weiner wrote:
[...]
> > I didn't catch the coverletter, though. It makes sense to CC
> > recipients of any of those patches on the full series, including the
> > cover, since even though we are specialized in certain areas of the
> > code, many of us are interested in the whole picture of addressing a
> > problem, and not just the few bits in our area without more context.
> > 
> 
> Thank you for this nice explanation. I suppose "git send-email
> --cc-cmd=scripts/get_maintainer.pl" doesn't do this.

No it doesn't. What I do for this kind of series is the following. Put
an explicit CC (acked, reviews etc...) to each patch. git format-patch
$RANGE and then
$ git send-email --cc-cmd=./cc-cmd-only-cover.sh $DEFAULT_TO_CC --compose *.patch

$ cat cc-cmd-only-cover.sh
#!/bin/bash

if [[ $1 == *gitsendemail.msg* || $1 == *cover-letter* ]]; then
        grep '<.*@.*>' -h *.patch | sed 's/^.*: //' | sort | uniq
fi

A bit error prone because you have to cleanup any previous patch files
from the directory but works more or less well for me.

s 
> > As far as the memcg part of this series goes, one concern is that page
> > cache is trimmed back only when there is pressure, so in all but very
> > few cases the high watermark you are introducing will be pegged to the
> > configured limit. It doesn't give a whole lot of insight.
> > 
> 
> So using the high watermark would not give a very useful starting point
> for the user who wished to configure the memory limit? What else could
> be used instead?

we have an event notification mechanism. In v1 it is vmpressure and v2
you will get a notification when the high/max limit is hit or when we
hit the oom.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]               ` <20160613213354.GH31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
  2016-06-13 21:59                 ` Topi Miettinen
@ 2016-07-17 20:11                 ` Topi Miettinen
       [not found]                   ` <3b03822f-c5d0-5b84-79c3-edeb8e78e2dd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-07-17 20:11 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On 06/13/16 21:33, Tejun Heo wrote:
> Hello,
> 
> On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
>> I used fork callback as I don't want to lower the watermark in all cases
>> where the charge can be lowered, so I'd update the watermark only when
>> the fork really happens.
> 
> I don't think that would make a noticeable difference.  That's where
> we decide whether to grant fork or not after all and thus where the
> actual usage is.

I tried using only charge functions, but then the result was too low.
With fork callback, the result was as expected.

-Topi

> 
>> Is there a better way to compare and set? I don't think atomic_cmpxchg()
>> does what's needed,
> 
> cmpxchg loop should do what's necessary although I'm not sure how much
> being strictly correct matters here.
> 
> Thanks.
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]                   ` <3b03822f-c5d0-5b84-79c3-edeb8e78e2dd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-07-19  1:09                     ` Tejun Heo
  2016-07-19 16:59                       ` Topi Miettinen
  0 siblings, 1 reply; 16+ messages in thread
From: Tejun Heo @ 2016-07-19  1:09 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On Sun, Jul 17, 2016 at 08:11:31PM +0000, Topi Miettinen wrote:
> On 06/13/16 21:33, Tejun Heo wrote:
> > Hello,
> > 
> > On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
> >> I used fork callback as I don't want to lower the watermark in all cases
> >> where the charge can be lowered, so I'd update the watermark only when
> >> the fork really happens.
> > 
> > I don't think that would make a noticeable difference.  That's where
> > we decide whether to grant fork or not after all and thus where the
> > actual usage is.
> 
> I tried using only charge functions, but then the result was too low.
> With fork callback, the result was as expected.

Can you please elaborate in more details?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
  2016-07-19  1:09                     ` Tejun Heo
@ 2016-07-19 16:59                       ` Topi Miettinen
       [not found]                         ` <45e50dcb-7446-d203-de6e-0a59dc09a874-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Topi Miettinen @ 2016-07-19 16:59 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On 07/19/16 01:09, Tejun Heo wrote:
> On Sun, Jul 17, 2016 at 08:11:31PM +0000, Topi Miettinen wrote:
>> On 06/13/16 21:33, Tejun Heo wrote:
>>> Hello,
>>>
>>> On Mon, Jun 13, 2016 at 09:29:32PM +0000, Topi Miettinen wrote:
>>>> I used fork callback as I don't want to lower the watermark in all cases
>>>> where the charge can be lowered, so I'd update the watermark only when
>>>> the fork really happens.
>>>
>>> I don't think that would make a noticeable difference.  That's where
>>> we decide whether to grant fork or not after all and thus where the
>>> actual usage is.
>>
>> I tried using only charge functions, but then the result was too low.
>> With fork callback, the result was as expected.
> 
> Can you please elaborate in more details?

With the example systemd-timesyncd case, I was only getting 1 as the
highwatermark, but there were already two tasks.

-Topi


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC 02/18] cgroup_pids: track maximum pids
       [not found]                         ` <45e50dcb-7446-d203-de6e-0a59dc09a874-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-07-19 18:13                           ` Tejun Heo
  0 siblings, 0 replies; 16+ messages in thread
From: Tejun Heo @ 2016-07-19 18:13 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Li Zefan, Johannes Weiner,
	open list:CONTROL GROUP (CGROUP)

On Tue, Jul 19, 2016 at 04:59:18PM +0000, Topi Miettinen wrote:
> With the example systemd-timesyncd case, I was only getting 1 as the
> highwatermark, but there were already two tasks.

Can you please find out why that is so?  Given that that's where we
charge pid usage, it doesn't make sense to me that you're getting
lower numbers than actual usage there.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-07-19 18:13 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
2016-06-13 19:44 ` [RFC 02/18] cgroup_pids: track maximum pids Topi Miettinen
     [not found]   ` <1465847065-3577-3-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-13 21:12     ` Tejun Heo
     [not found]       ` <20160613211227.GG31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-06-13 21:29         ` Topi Miettinen
     [not found]           ` <17cb1a37-47b1-dbd4-6835-efad3cf6c12f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-13 21:33             ` Tejun Heo
     [not found]               ` <20160613213354.GH31708-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-06-13 21:59                 ` Topi Miettinen
     [not found]                   ` <15ef1041-35b6-cb31-ff98-8b0be7780bc3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-13 22:09                     ` Tejun Heo
2016-07-17 20:11                 ` Topi Miettinen
     [not found]                   ` <3b03822f-c5d0-5b84-79c3-edeb8e78e2dd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-07-19  1:09                     ` Tejun Heo
2016-07-19 16:59                       ` Topi Miettinen
     [not found]                         ` <45e50dcb-7446-d203-de6e-0a59dc09a874-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-07-19 18:13                           ` Tejun Heo
2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
     [not found]   ` <1465847065-3577-4-git-send-email-toiwoton-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-14  7:01     ` Michal Hocko
     [not found]       ` <20160614070130.GB5681-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2016-06-14 15:47         ` Topi Miettinen
     [not found]           ` <b9d04ccd-28d2-993a-2a40-bbed7b6289d4-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-14 16:04             ` Johannes Weiner
     [not found]               ` <20160614160410.GB14279-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
2016-06-14 17:15                 ` Topi Miettinen
     [not found]                   ` <db6a51eb-d1f7-691b-11a6-ef0b7c1c9462-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-06-16 10:27                     ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox