All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yosry Ahmed <yosry.ahmed@linux.dev>
To: inwardvessel <inwardvessel@gmail.com>
Cc: tj@kernel.org, shakeel.butt@linux.dev, mhocko@kernel.org,
	hannes@cmpxchg.org, akpm@linux-foundation.org,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH 4/4 v2] cgroup: separate rstat list pointers from base stats
Date: Fri, 28 Feb 2025 20:33:40 +0000	[thread overview]
Message-ID: <Z8IdpGIwwdlk-oSk@google.com> (raw)
In-Reply-To: <20250227215543.49928-5-inwardvessel@gmail.com>

On Thu, Feb 27, 2025 at 01:55:43PM -0800, inwardvessel wrote:
> From: JP Kobryn <inwardvessel@gmail.com>
> 
> A majority of the cgroup_rstat_cpu struct size is made up of the base
> stat entities. Since only the "self" subsystem state makes use of these,
> move them into a struct of their own. This allows for a new compact
> cgroup_rstat_cpu struct that the formal subsystems can make use of.
> Where applicable, decide on whether to allocate the compact or full
> struct including the base stats.

Mentioning the memory savings in this patch's log would be helpful.

> 
> Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
> ---
>  include/linux/cgroup-defs.h | 37 ++++++++++++++----------
>  kernel/cgroup/rstat.c       | 57 +++++++++++++++++++++++++------------
>  2 files changed, 61 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index 1598e1389615..b0a07c63fd46 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -170,7 +170,10 @@ struct cgroup_subsys_state {
>  	struct percpu_ref refcnt;
>  
>  	/* per-cpu recursive resource statistics */
> -	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +	union {
> +		struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +		struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
> +	};
>  
>  	/*
>  	 * siblings list anchored at the parent's ->children
> @@ -356,6 +359,24 @@ struct cgroup_base_stat {
>   * resource statistics on top of it - bsync, bstat and last_bstat.
>   */
>  struct cgroup_rstat_cpu {
> +	/*
> +	 * Child cgroups with stat updates on this cpu since the last read
> +	 * are linked on the parent's ->updated_children through
> +	 * ->updated_next.
> +	 *
> +	 * In addition to being more compact, singly-linked list pointing
> +	 * to the cgroup makes it unnecessary for each per-cpu struct to
> +	 * point back to the associated cgroup.
> +	 *
> +	 * Protected by per-cpu cgroup_rstat_cpu_lock.

I just noticed, the patch that split the lock should have updated this
comment.

> +	 */
> +	struct cgroup_subsys_state *updated_children;	/* terminated by self */
> +	struct cgroup_subsys_state *updated_next;		/* NULL if not on list */
> +};
> +
> +struct cgroup_rstat_base_cpu {
> +	struct cgroup_rstat_cpu self;

Why 'self'? Why not 'rstat_cpu' like it's named in struct
cgroup_subsys_state?

> +
>  	/*
>  	 * ->bsync protects ->bstat.  These are the only fields which get
>  	 * updated in the hot path.
> @@ -382,20 +403,6 @@ struct cgroup_rstat_cpu {
>  	 * deltas to propagate to the per-cpu subtree_bstat.
>  	 */
>  	struct cgroup_base_stat last_subtree_bstat;
> -
> -	/*
> -	 * Child cgroups with stat updates on this cpu since the last read
> -	 * are linked on the parent's ->updated_children through
> -	 * ->updated_next.
> -	 *
> -	 * In addition to being more compact, singly-linked list pointing
> -	 * to the cgroup makes it unnecessary for each per-cpu struct to
> -	 * point back to the associated cgroup.
> -	 *
> -	 * Protected by per-cpu cgroup_rstat_cpu_lock.
> -	 */
> -	struct cgroup_subsys_state *updated_children;	/* terminated by self */
> -	struct cgroup_subsys_state *updated_next;		/* NULL if not on list */
>  };
>  
>  struct cgroup_freezer_state {
> diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> index b3eaefc1fd07..c08ebe2f9568 100644
> --- a/kernel/cgroup/rstat.c
> +++ b/kernel/cgroup/rstat.c
> @@ -24,6 +24,12 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(
>  	return per_cpu_ptr(css->rstat_cpu, cpu);
>  }
>  
> +static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
> +		struct cgroup_subsys_state *css, int cpu)
> +{
> +	return per_cpu_ptr(css->rstat_base_cpu, cpu);
> +}
> +
>  static inline bool is_base_css(struct cgroup_subsys_state *css)
>  {
>  	return css->ss == NULL;
> @@ -438,17 +444,31 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css)
>  
>  	/* the root cgrp's self css has rstat_cpu preallocated */
>  	if (!css->rstat_cpu) {
> -		css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
> -		if (!css->rstat_cpu)
> -			return -ENOMEM;
> -	}
> +		if (is_base_css(css)) {
> +			css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
> +			if (!css->rstat_base_cpu)
> +				return -ENOMEM;
>  
> -	/* ->updated_children list is self terminated */
> -	for_each_possible_cpu(cpu) {
> -		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(css, cpu);
> +			for_each_possible_cpu(cpu) {
> +				struct cgroup_rstat_base_cpu *rstatc;

We should use different variable names for cgroup_rstat_base_cpu and
cgroup_rstat_cpu throughout. Maybe 'brstatc' or 'rstatbc' for the
latter?

> +
> +				rstatc = cgroup_rstat_base_cpu(css, cpu);
> +				rstatc->self.updated_children = css;
> +				u64_stats_init(&rstatc->bsync);
> +			}
> +		} else {
> +			css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
> +			if (!css->rstat_cpu)
> +				return -ENOMEM;
> +
> +			for_each_possible_cpu(cpu) {
> +				struct cgroup_rstat_cpu *rstatc;
> +
> +				rstatc = cgroup_rstat_cpu(css, cpu);
> +				rstatc->updated_children = css;
> +			}
> +		}
>  
> -		rstatc->updated_children = css;
> -		u64_stats_init(&rstatc->bsync);

I think there's too much replication here. We can probably do something
like this (untested):

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index de057c992f824..1750a69887a2e 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -443,34 +443,30 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css)
 	int cpu;
 
 	/* the root cgrp's self css has rstat_cpu preallocated */
-	if (!css->rstat_cpu) {
-		if (is_base_css(css)) {
-			css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
-			if (!css->rstat_base_cpu)
-				return -ENOMEM;
-
-			for_each_possible_cpu(cpu) {
-				struct cgroup_rstat_base_cpu *rstatc;
+	if (css->rstat_cpu)
+		return 0;
 
-				rstatc = cgroup_rstat_base_cpu(css, cpu);
-				rstatc->self.updated_children = css;
-				u64_stats_init(&rstatc->bsync);
-			}
-		} else {
-			css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
-			if (!css->rstat_cpu)
-				return -ENOMEM;
+	if (is_base_css(css)) {
+		css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
+		if (!css->rstat_base_cpu)
+			return -ENOMEM;
+	} else {
+		css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+		if (!css->rstat_cpu)
+			return -ENOMEM;
+	}
 
-			for_each_possible_cpu(cpu) {
-				struct cgroup_rstat_cpu *rstatc;
+	for_each_possible_cpu(cpu) {
+		struct cgroup_rstat_base_cpu *brstatc = NULL;
+		struct cgroup_rstat_cpu *rstatc;
 
-				rstatc = cgroup_rstat_cpu(css, cpu);
-				rstatc->updated_children = css;
-			}
+		if (is_base_css(css)) {
+			brstatc = cgroup_rstat_base_cpu(css, cpu);
+			u64_stats_init(&brstatc->bsync);
+			rstatc = brstatc->self;
 		}
-
+		rstatc->updated_children = css;
 	}
-
 	return 0;
 }
 
>  	}
>  
>  	return 0;
> @@ -522,9 +542,10 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
>  
>  static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
>  {
> -	struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(&cgrp->self, cpu);
> +	struct cgroup_rstat_base_cpu *rstatc = cgroup_rstat_base_cpu(
> +			&cgrp->self, cpu);
>  	struct cgroup *parent = cgroup_parent(cgrp);
> -	struct cgroup_rstat_cpu *prstatc;
> +	struct cgroup_rstat_base_cpu *prstatc;

Same here, we should use different names than rstatc and prstatc. Same
applies for the rest of the diff.

>  	struct cgroup_base_stat delta;
>  	unsigned seq;
>  
> @@ -552,25 +573,25 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
>  		cgroup_base_stat_add(&cgrp->last_bstat, &delta);
>  
>  		delta = rstatc->subtree_bstat;
> -		prstatc = cgroup_rstat_cpu(&parent->self, cpu);
> +		prstatc = cgroup_rstat_base_cpu(&parent->self, cpu);
>  		cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
>  		cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
>  		cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
>  	}
>  }
>  
> -static struct cgroup_rstat_cpu *
> +static struct cgroup_rstat_base_cpu *
>  cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  
> -	rstatc = get_cpu_ptr(cgrp->self.rstat_cpu);
> +	rstatc = get_cpu_ptr(cgrp->self.rstat_base_cpu);
>  	*flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
>  	return rstatc;
>  }
>  
>  static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
> -						 struct cgroup_rstat_cpu *rstatc,
> +						 struct cgroup_rstat_base_cpu *rstatc,
>  						 unsigned long flags)
>  {
>  	u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
> @@ -580,7 +601,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
>  
>  void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  	unsigned long flags;
>  
>  	rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
> @@ -591,7 +612,7 @@ void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
>  void __cgroup_account_cputime_field(struct cgroup *cgrp,
>  				    enum cpu_usage_stat index, u64 delta_exec)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  	unsigned long flags;
>  
>  	rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
> -- 
> 2.43.5
> 
> 

  parent reply	other threads:[~2025-02-28 20:33 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-27 21:55 [PATCH 0/4 v2] cgroup: separate rstat trees inwardvessel
2025-02-27 21:55 ` [PATCH 1/4 v2] cgroup: move cgroup_rstat from cgroup to cgroup_subsys_state inwardvessel
2025-02-27 22:43   ` Shakeel Butt
2025-02-28 19:04   ` Yosry Ahmed
2025-03-01  1:06     ` JP Kobryn
2025-03-01  1:25       ` Yosry Ahmed
2025-03-01  1:30         ` JP Kobryn
2025-03-03 18:18         ` Shakeel Butt
2025-03-03 18:21           ` Yosry Ahmed
2025-03-03 15:20   ` Michal Koutný
2025-03-03 19:31     ` JP Kobryn
2025-02-27 21:55 ` [PATCH 2/4 v2] cgroup: rstat lock indirection inwardvessel
2025-03-03 15:21   ` Michal Koutný
2025-02-27 21:55 ` [PATCH 3/4 v2] cgroup: separate rstat locks for subsystems inwardvessel
2025-02-27 22:52   ` Shakeel Butt
2025-02-28 16:07     ` JP Kobryn
2025-02-28 17:37   ` JP Kobryn
2025-02-28 19:20   ` Yosry Ahmed
2025-03-06 21:47     ` JP Kobryn
2025-03-01 23:00   ` kernel test robot
2025-03-03 15:22   ` Michal Koutný
2025-03-03 18:29     ` Yosry Ahmed
2025-03-03 18:40       ` Shakeel Butt
2025-03-03 19:23         ` JP Kobryn
2025-03-03 19:39           ` Shakeel Butt
2025-03-03 19:50         ` Yosry Ahmed
2025-03-03 20:09           ` Shakeel Butt
2025-03-03 18:49       ` Michal Koutný
2025-03-10 17:59         ` JP Kobryn
2025-03-11 13:49           ` Michal Koutný
2025-03-06 21:36       ` JP Kobryn
2025-03-03 23:49   ` kernel test robot
2025-02-27 21:55 ` [PATCH 4/4 v2] cgroup: separate rstat list pointers from base stats inwardvessel
2025-02-27 23:01   ` Shakeel Butt
2025-02-28 20:33   ` Yosry Ahmed [this message]
2025-02-28 18:22 ` [PATCH 0/4 v2] cgroup: separate rstat trees Yosry Ahmed
2025-03-03 15:19 ` Michal Koutný
2025-03-06  1:07   ` JP Kobryn
2025-03-11 13:49     ` Michal Koutný

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z8IdpGIwwdlk-oSk@google.com \
    --to=yosry.ahmed@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=inwardvessel@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.