From: Shakeel Butt <shakeel.butt@linux.dev>
To: Tejun Heo <tj@kernel.org>, Andrew Morton <akpm@linux-foundation.org>
Cc: "JP Kobryn" <inwardvessel@gmail.com>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Michal Hocko" <mhocko@kernel.org>,
"Roman Gushchin" <roman.gushchin@linux.dev>,
"Muchun Song" <muchun.song@linux.dev>,
"Vlastimil Babka" <vbabka@suse.cz>,
"Alexei Starovoitov" <ast@kernel.org>,
"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>,
"Michal Koutný" <mkoutny@suse.com>,
"Harry Yoo" <harry.yoo@oracle.com>,
"Yosry Ahmed" <yosry.ahmed@linux.dev>,
bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org,
linux-kernel@vger.kernel.org,
"Meta kernel team" <kernel-team@meta.com>
Subject: [PATCH v2 3/4] cgroup: remove per-cpu per-subsystem locks
Date: Wed, 11 Jun 2025 15:15:31 -0700 [thread overview]
Message-ID: <20250611221532.2513772-4-shakeel.butt@linux.dev> (raw)
In-Reply-To: <20250611221532.2513772-1-shakeel.butt@linux.dev>
The rstat update side used to insert the cgroup whose stats are updated
in the update tree and the read side flush the update tree to get the
latest uptodate stats. The per-cpu per-subsystem locks were used to
synchronize the update and flush side. However now the update side does
not access update tree but uses per-cpu lockless lists. So there is no
need for locks to synchronize update and flush side. Let's remove them.
Suggested-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
include/linux/cgroup-defs.h | 7 ---
include/trace/events/cgroup.h | 47 ---------------
kernel/cgroup/rstat.c | 107 ++--------------------------------
3 files changed, 4 insertions(+), 157 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 45860fe5dd0c..bca3562e3df4 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -375,12 +375,6 @@ struct css_rstat_cpu {
* Child cgroups with stat updates on this cpu since the last read
* are linked on the parent's ->updated_children through
* ->updated_next. updated_children is terminated by its container css.
- *
- * In addition to being more compact, singly-linked list pointing to
- * the css makes it unnecessary for each per-cpu struct to point back
- * to the associated css.
- *
- * Protected by per-cpu css->ss->rstat_ss_cpu_lock.
*/
struct cgroup_subsys_state *updated_children;
struct cgroup_subsys_state *updated_next; /* NULL if not on the list */
@@ -824,7 +818,6 @@ struct cgroup_subsys {
unsigned int depends_on;
spinlock_t rstat_ss_lock;
- raw_spinlock_t __percpu *rstat_ss_cpu_lock;
struct llist_head __percpu *lhead; /* lockless update list head */
};
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index 7d332387be6c..ba9229af9a34 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -257,53 +257,6 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
TP_ARGS(cgrp, cpu, contended)
);
-/*
- * Related to per CPU locks:
- * global rstat_base_cpu_lock for base stats
- * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats
- */
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
#endif /* _TRACE_CGROUP_H */
/* This part must be outside protection */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a7550961dd12..c8a48cf83878 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -10,7 +10,6 @@
#include <trace/events/cgroup.h>
static DEFINE_SPINLOCK(rstat_base_lock);
-static DEFINE_PER_CPU(raw_spinlock_t, rstat_base_cpu_lock);
static DEFINE_PER_CPU(struct llist_head, rstat_backlog_list);
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
@@ -53,86 +52,6 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
return per_cpu_ptr(&rstat_backlog_list, cpu);
}
-static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu)
-{
- if (ss) {
- /*
- * Depending on config, the subsystem per-cpu lock type may be an
- * empty struct. In enviromnents where this is the case, allocation
- * of this field is not performed in ss_rstat_init(). Avoid a
- * cpu-based offset relative to NULL by returning early. When the
- * lock type is zero in size, the corresponding lock functions are
- * no-ops so passing them NULL is acceptable.
- */
- if (sizeof(*ss->rstat_ss_cpu_lock) == 0)
- return NULL;
-
- return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu);
- }
-
- return per_cpu_ptr(&rstat_base_cpu_lock, cpu);
-}
-
-/*
- * Helper functions for rstat per CPU locks.
- *
- * This makes it easier to diagnose locking issues and contention in
- * production environments. The parameter @fast_path determine the
- * tracepoints being added, allowing us to diagnose "flush" related
- * operations without handling high-frequency fast-path "update" events.
- */
-static __always_inline
-unsigned long _css_rstat_cpu_lock(struct cgroup_subsys_state *css, int cpu,
- const bool fast_path)
-{
- struct cgroup *cgrp = css->cgroup;
- raw_spinlock_t *cpu_lock;
- unsigned long flags;
- bool contended;
-
- /*
- * The _irqsave() is needed because the locks used for flushing are
- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock
- * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT
- * kernel. The raw_spinlock_t below disables interrupts on both
- * configurations. The _irqsave() ensures that interrupts are always
- * disabled and later restored.
- */
- cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
- contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
- if (contended) {
- if (fast_path)
- trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended);
- else
- trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended);
-
- raw_spin_lock_irqsave(cpu_lock, flags);
- }
-
- if (fast_path)
- trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended);
- else
- trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended);
-
- return flags;
-}
-
-static __always_inline
-void _css_rstat_cpu_unlock(struct cgroup_subsys_state *css, int cpu,
- unsigned long flags, const bool fast_path)
-{
- struct cgroup *cgrp = css->cgroup;
- raw_spinlock_t *cpu_lock;
-
- if (fast_path)
- trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
- else
- trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
-
- cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
- raw_spin_unlock_irqrestore(cpu_lock, flags);
-}
-
/**
* css_rstat_updated - keep track of updated rstat_cpu
* @css: target cgroup subsystem state
@@ -335,15 +254,12 @@ static struct cgroup_subsys_state *css_rstat_updated_list(
{
struct css_rstat_cpu *rstatc = css_rstat_cpu(root, cpu);
struct cgroup_subsys_state *head = NULL, *parent, *child;
- unsigned long flags;
-
- flags = _css_rstat_cpu_lock(root, cpu, false);
css_process_update_tree(root->ss, cpu);
/* Return NULL if this subtree is not on-list */
if (!rstatc->updated_next)
- goto unlock_ret;
+ return NULL;
/*
* Unlink @root from its parent. As the updated_children list is
@@ -375,8 +291,7 @@ static struct cgroup_subsys_state *css_rstat_updated_list(
rstatc->updated_children = root;
if (child != root)
head = css_rstat_push_children(head, child, cpu);
-unlock_ret:
- _css_rstat_cpu_unlock(root, cpu, flags, false);
+
return head;
}
@@ -572,29 +487,15 @@ int __init ss_rstat_init(struct cgroup_subsys *ss)
{
int cpu;
- /*
- * Depending on config, the subsystem per-cpu lock type may be an empty
- * struct. Avoid allocating a size of zero in this case.
- */
- if (ss && sizeof(*ss->rstat_ss_cpu_lock)) {
- ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t);
- if (!ss->rstat_ss_cpu_lock)
- return -ENOMEM;
- }
-
if (ss) {
ss->lhead = alloc_percpu(struct llist_head);
- if (!ss->lhead) {
- free_percpu(ss->rstat_ss_cpu_lock);
+ if (!ss->lhead)
return -ENOMEM;
- }
}
spin_lock_init(ss_rstat_lock(ss));
- for_each_possible_cpu(cpu) {
- raw_spin_lock_init(ss_rstat_cpu_lock(ss, cpu));
+ for_each_possible_cpu(cpu)
init_llist_head(ss_lhead_cpu(ss, cpu));
- }
return 0;
}
--
2.47.1
next prev parent reply other threads:[~2025-06-11 22:16 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-11 22:15 [PATCH v2 0/4] cgroup: nmi safe css_rstat_updated Shakeel Butt
2025-06-11 22:15 ` [PATCH v2 1/4] cgroup: support to enable nmi-safe css_rstat_updated Shakeel Butt
2025-06-11 22:15 ` [PATCH v2 2/4] cgroup: make css_rstat_updated nmi safe Shakeel Butt
2025-06-11 22:15 ` Shakeel Butt [this message]
2025-06-11 22:15 ` [PATCH v2 4/4] memcg: cgroup: call css_rstat_updated irrespective of in_nmi() Shakeel Butt
2025-06-16 18:15 ` [PATCH v2 0/4] cgroup: nmi safe css_rstat_updated Tejun Heo
2025-06-16 19:20 ` Shakeel Butt
2025-06-17 19:06 ` Tejun Heo
2025-06-17 19:38 ` Shakeel Butt
2025-06-16 20:08 ` JP Kobryn
2025-06-16 20:13 ` Shakeel Butt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250611221532.2513772-4-shakeel.butt@linux.dev \
--to=shakeel.butt@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=ast@kernel.org \
--cc=bigeasy@linutronix.de \
--cc=bpf@vger.kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=harry.yoo@oracle.com \
--cc=inwardvessel@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=mkoutny@suse.com \
--cc=muchun.song@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=tj@kernel.org \
--cc=vbabka@suse.cz \
--cc=yosry.ahmed@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).