From: Yosry Ahmed <yosry.ahmed@linux.dev>
To: JP Kobryn <inwardvessel@gmail.com>
Cc: shakeel.butt@linux.dev, tj@kernel.org, mhocko@kernel.org,
hannes@cmpxchg.org, akpm@linux-foundation.org,
linux-mm@kvack.org, cgroups@vger.kernel.org,
kernel-team@meta.com
Subject: Re: [PATCH 01/11] cgroup: move rstat pointers into struct of their own
Date: Mon, 24 Feb 2025 18:36:22 +0000 [thread overview]
Message-ID: <Z7y8JkG3rBXCi_cr@google.com> (raw)
In-Reply-To: <e0ff8143-c6fd-4185-b953-d543ffd58535@gmail.com>
On Mon, Feb 24, 2025 at 09:06:21AM -0800, JP Kobryn wrote:
> On 2/20/25 8:53 AM, Yosry Ahmed wrote:
> > On Mon, Feb 17, 2025 at 07:14:38PM -0800, JP Kobryn wrote:
> > > The rstat infrastructure makes use of pointers for list management.
> > > These pointers only exist as fields in the cgroup struct, so moving them
> > > into their own struct will allow them to be used elsewhere. The base
> > > stat entities are included with them for now.
> > >
> > > Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
> > > ---
> > > include/linux/cgroup-defs.h | 90 +-----------------
> > > include/linux/cgroup_rstat.h | 92 +++++++++++++++++++
> > > kernel/cgroup/cgroup.c | 3 +-
> > > kernel/cgroup/rstat.c | 27 +++---
> > > .../selftests/bpf/progs/btf_type_tag_percpu.c | 4 +-
> > > 5 files changed, 112 insertions(+), 104 deletions(-)
> > > create mode 100644 include/linux/cgroup_rstat.h
> > >
> > > diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> > > index 1b20d2d8ef7c..6b6cc027fe70 100644
> > > --- a/include/linux/cgroup-defs.h
> > > +++ b/include/linux/cgroup-defs.h
> > > @@ -17,7 +17,7 @@
> > > #include <linux/refcount.h>
> > > #include <linux/percpu-refcount.h>
> > > #include <linux/percpu-rwsem.h>
> > > -#include <linux/u64_stats_sync.h>
> > > +#include <linux/cgroup_rstat.h>
> > > #include <linux/workqueue.h>
> > > #include <linux/bpf-cgroup-defs.h>
> > > #include <linux/psi_types.h>
> > > @@ -321,78 +321,6 @@ struct css_set {
> > > struct rcu_head rcu_head;
> > > };
> > > -struct cgroup_base_stat {
> > > - struct task_cputime cputime;
> > > -
> > > -#ifdef CONFIG_SCHED_CORE
> > > - u64 forceidle_sum;
> > > -#endif
> > > - u64 ntime;
> > > -};
> > > -
> > > -/*
> > > - * rstat - cgroup scalable recursive statistics. Accounting is done
> > > - * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
> > > - * hierarchy on reads.
> > > - *
> > > - * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
> > > - * linked into the updated tree. On the following read, propagation only
> > > - * considers and consumes the updated tree. This makes reading O(the
> > > - * number of descendants which have been active since last read) instead of
> > > - * O(the total number of descendants).
> > > - *
> > > - * This is important because there can be a lot of (draining) cgroups which
> > > - * aren't active and stat may be read frequently. The combination can
> > > - * become very expensive. By propagating selectively, increasing reading
> > > - * frequency decreases the cost of each read.
> > > - *
> > > - * This struct hosts both the fields which implement the above -
> > > - * updated_children and updated_next - and the fields which track basic
> > > - * resource statistics on top of it - bsync, bstat and last_bstat.
> > > - */
> > > -struct cgroup_rstat_cpu {
> > > - /*
> > > - * ->bsync protects ->bstat. These are the only fields which get
> > > - * updated in the hot path.
> > > - */
> > > - struct u64_stats_sync bsync;
> > > - struct cgroup_base_stat bstat;
> > > -
> > > - /*
> > > - * Snapshots at the last reading. These are used to calculate the
> > > - * deltas to propagate to the global counters.
> > > - */
> > > - struct cgroup_base_stat last_bstat;
> > > -
> > > - /*
> > > - * This field is used to record the cumulative per-cpu time of
> > > - * the cgroup and its descendants. Currently it can be read via
> > > - * eBPF/drgn etc, and we are still trying to determine how to
> > > - * expose it in the cgroupfs interface.
> > > - */
> > > - struct cgroup_base_stat subtree_bstat;
> > > -
> > > - /*
> > > - * Snapshots at the last reading. These are used to calculate the
> > > - * deltas to propagate to the per-cpu subtree_bstat.
> > > - */
> > > - struct cgroup_base_stat last_subtree_bstat;
> > > -
> > > - /*
> > > - * Child cgroups with stat updates on this cpu since the last read
> > > - * are linked on the parent's ->updated_children through
> > > - * ->updated_next.
> > > - *
> > > - * In addition to being more compact, singly-linked list pointing
> > > - * to the cgroup makes it unnecessary for each per-cpu struct to
> > > - * point back to the associated cgroup.
> > > - *
> > > - * Protected by per-cpu cgroup_rstat_cpu_lock.
> > > - */
> > > - struct cgroup *updated_children; /* terminated by self cgroup */
> > > - struct cgroup *updated_next; /* NULL iff not on the list */
> > > -};
> > > -
> > > struct cgroup_freezer_state {
> > > /* Should the cgroup and its descendants be frozen. */
> > > bool freeze;
> > > @@ -517,23 +445,9 @@ struct cgroup {
> > > struct cgroup *old_dom_cgrp; /* used while enabling threaded */
> > > /* per-cpu recursive resource statistics */
> > > - struct cgroup_rstat_cpu __percpu *rstat_cpu;
> > > + struct cgroup_rstat rstat;
> > > struct list_head rstat_css_list;
> > > - /*
> > > - * Add padding to separate the read mostly rstat_cpu and
> > > - * rstat_css_list into a different cacheline from the following
> > > - * rstat_flush_next and *bstat fields which can have frequent updates.
> > > - */
> > > - CACHELINE_PADDING(_pad_);
> > > -
> > > - /*
> > > - * A singly-linked list of cgroup structures to be rstat flushed.
> > > - * This is a scratch field to be used exclusively by
> > > - * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
> > > - */
> > > - struct cgroup *rstat_flush_next;
> > > -
> > > /* cgroup basic resource statistics */
> > > struct cgroup_base_stat last_bstat;
> > > struct cgroup_base_stat bstat;
> > > diff --git a/include/linux/cgroup_rstat.h b/include/linux/cgroup_rstat.h
> > > new file mode 100644
> > > index 000000000000..f95474d6f8ab
> > > --- /dev/null
> > > +++ b/include/linux/cgroup_rstat.h
> > > @@ -0,0 +1,92 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef _LINUX_RSTAT_H
> > > +#define _LINUX_RSTAT_H
> > > +
> > > +#include <linux/u64_stats_sync.h>
> > > +
> > > +struct cgroup_rstat_cpu;
> >
> > Why do we need the forward declaration instead of just defining struct
> > cgroup_rstat_cpu first? Also, why do we need a new header for these
> > definitions rather than just adding struct cgroup_rstat to
> > cgroup-defs.h?
>
> The new header was added so the cgroup_rstat type can be used in bpf
> cgroup-defs.h. As for the forward declaration, this was done so that
> updated_next and updated children fields of the cgroup_rstat_cpu can
> change type from from cgroup to cgroup_rstat.
>
> Regardless, based on the direction we are moving with bpf sharing the
> "self" tree, this new header will NOT be needed in v2.
Nice. Seems like the series will be simplified quite a bit by doing
this.
next prev parent reply other threads:[~2025-02-24 18:36 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 3:14 [PATCH 00/11] cgroup: separate rstat trees JP Kobryn
2025-02-18 3:14 ` [PATCH 01/11] cgroup: move rstat pointers into struct of their own JP Kobryn
2025-02-19 1:05 ` Shakeel Butt
2025-02-19 1:23 ` Shakeel Butt
2025-02-20 16:53 ` Yosry Ahmed
2025-02-24 17:06 ` JP Kobryn
2025-02-24 18:36 ` Yosry Ahmed [this message]
2025-02-18 3:14 ` [PATCH 02/11] cgroup: add level of indirection for cgroup_rstat struct JP Kobryn
2025-02-19 2:26 ` Shakeel Butt
2025-02-20 17:08 ` Yosry Ahmed
2025-02-19 5:57 ` kernel test robot
2025-02-18 3:14 ` [PATCH 03/11] cgroup: move cgroup_rstat from cgroup to cgroup_subsys_state JP Kobryn
2025-02-20 17:06 ` Shakeel Butt
2025-02-20 17:22 ` Yosry Ahmed
2025-02-25 19:20 ` JP Kobryn
2025-02-18 3:14 ` [PATCH 04/11] cgroup: introduce cgroup_rstat_ops JP Kobryn
2025-02-19 7:21 ` kernel test robot
2025-02-20 17:50 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 05/11] cgroup: separate rstat for bpf cgroups JP Kobryn
2025-02-21 18:14 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 06/11] cgroup: rstat lock indirection JP Kobryn
2025-02-21 22:09 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 07/11] cgroup: fetch cpu-specific lock in rstat cpu lock helpers JP Kobryn
2025-02-21 22:35 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 08/11] cgroup: rstat cpu lock indirection JP Kobryn
2025-02-19 8:48 ` kernel test robot
2025-02-22 0:18 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 09/11] cgroup: separate rstat locks for bpf cgroups JP Kobryn
2025-02-18 3:14 ` [PATCH 10/11] cgroup: separate rstat locks for subsystems JP Kobryn
2025-02-22 0:23 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 11/11] cgroup: separate rstat list pointers from base stats JP Kobryn
2025-02-22 0:28 ` Shakeel Butt
2025-02-20 15:51 ` [PATCH 00/11] cgroup: separate rstat trees Tejun Heo
2025-02-27 23:44 ` JP Kobryn
2025-02-20 17:26 ` Yosry Ahmed
2025-02-20 17:53 ` Shakeel Butt
2025-02-20 17:59 ` Yosry Ahmed
2025-02-20 18:14 ` JP Kobryn
2025-02-20 20:04 ` Yosry Ahmed
2025-02-20 20:22 ` Yosry Ahmed
2025-02-24 21:13 ` Shakeel Butt
2025-02-24 21:54 ` Yosry Ahmed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z7y8JkG3rBXCi_cr@google.com \
--to=yosry.ahmed@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=inwardvessel@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.