From: Josh Don <joshdon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
To: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
Zefan Li <lizefan.x-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>,
Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>,
Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
Juri Lelli <juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
Vincent Guittot
<vincent.guittot-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Cc: Dietmar Eggemann <dietmar.eggemann-5wv7dgnIgG8@public.gmane.org>,
Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>,
Ben Segall <bsegall-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
Mel Gorman <mgorman-l3A5Bk7waGM@public.gmane.org>,
Daniel Bristot de Oliveira
<bristot-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Josh Don <joshdon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH 1/2] cgroup: add cpu.stat_percpu
Date: Fri, 7 Jan 2022 15:41:37 -0800 [thread overview]
Message-ID: <20220107234138.1765668-1-joshdon@google.com> (raw)
cpu.stat displays global metrics, such as cgroup usage. It would also be
useful to be able to break these down by cpu; to that end, this patch
adds a new interface, 'cpu.stat_percpu', to display the percpu values of
these stats.
Each line of the output corresponds to a particular metric. The format
of each line is the name of the metric, followed by space delimited
percpu values. The reason for this approach (vs having each line
correspond to a particular cpu) is to make it easier to display extra
subsystem-specific percpu fields.
Signed-off-by: Josh Don <joshdon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup-defs.h | 5 +
kernel/cgroup/cgroup-internal.h | 1 +
kernel/cgroup/cgroup.c | 10 ++
kernel/cgroup/rstat.c | 159 ++++++++++++++++++++++++++++----
4 files changed, 155 insertions(+), 20 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index db2e147e069f..7778a011f457 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -461,6 +461,11 @@ struct cgroup {
struct cgroup_base_stat bstat;
struct prev_cputime prev_cputime; /* for printing out cputime */
+ /* Per-cpu basic resource statistics. These are NULL on root. */
+ struct cgroup_base_stat __percpu *bstat_cpu;
+ struct cgroup_base_stat __percpu *last_bstat_cpu;
+ struct prev_cputime __percpu *prev_cputime_cpu;
+
/*
* list of pidlists, up to two for each namespace (one for procs, one
* for tasks); created on demand.
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index bfbeabc17a9d..07e932c4f875 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -254,6 +254,7 @@ int cgroup_rstat_init(struct cgroup *cgrp);
void cgroup_rstat_exit(struct cgroup *cgrp);
void cgroup_rstat_boot(void);
void cgroup_base_stat_cputime_show(struct seq_file *seq);
+void cgroup_base_stat_percpu_cputime_show(struct seq_file *seq);
/*
* namespace.c
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 919194de39c8..4f5ddce529eb 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3604,6 +3604,12 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
return ret;
}
+static int cpu_stat_percpu_show(struct seq_file *seq, void *v)
+{
+ cgroup_base_stat_percpu_cputime_show(seq);
+ return 0;
+}
+
#ifdef CONFIG_PSI
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
{
@@ -5014,6 +5020,10 @@ static struct cftype cgroup_base_files[] = {
.name = "cpu.stat",
.seq_show = cpu_stat_show,
},
+ {
+ .name = "cpu.stat_percpu",
+ .seq_show = cpu_stat_percpu_show,
+ },
#ifdef CONFIG_PSI
{
.name = "io.pressure",
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1486768f2318..1af37333e5bf 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -253,7 +253,19 @@ int cgroup_rstat_init(struct cgroup *cgrp)
if (!cgrp->rstat_cpu) {
cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
if (!cgrp->rstat_cpu)
- return -ENOMEM;
+ goto error_nomem;
+
+ cgrp->last_bstat_cpu = alloc_percpu(struct cgroup_base_stat);
+ if (!cgrp->last_bstat_cpu)
+ goto error_nomem;
+
+ cgrp->bstat_cpu = alloc_percpu(struct cgroup_base_stat);
+ if (!cgrp->bstat_cpu)
+ goto error_nomem;
+
+ cgrp->prev_cputime_cpu = alloc_percpu(struct prev_cputime);
+ if (!cgrp->prev_cputime_cpu)
+ goto error_nomem;
}
/* ->updated_children list is self terminated */
@@ -265,6 +277,21 @@ int cgroup_rstat_init(struct cgroup *cgrp)
}
return 0;
+
+error_nomem:
+ free_percpu(cgrp->rstat_cpu);
+ cgrp->rstat_cpu = NULL;
+
+ free_percpu(cgrp->last_bstat_cpu);
+ cgrp->last_bstat_cpu = NULL;
+
+ free_percpu(cgrp->bstat_cpu);
+ cgrp->bstat_cpu = NULL;
+
+ free_percpu(cgrp->prev_cputime_cpu);
+ cgrp->prev_cputime_cpu = NULL;
+
+ return -ENOMEM;
}
void cgroup_rstat_exit(struct cgroup *cgrp)
@@ -284,6 +311,12 @@ void cgroup_rstat_exit(struct cgroup *cgrp)
free_percpu(cgrp->rstat_cpu);
cgrp->rstat_cpu = NULL;
+ free_percpu(cgrp->last_bstat_cpu);
+ cgrp->last_bstat_cpu = NULL;
+ free_percpu(cgrp->bstat_cpu);
+ cgrp->bstat_cpu = NULL;
+ free_percpu(cgrp->prev_cputime_cpu);
+ cgrp->prev_cputime_cpu = NULL;
}
void __init cgroup_rstat_boot(void)
@@ -319,22 +352,29 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
struct cgroup *parent = cgroup_parent(cgrp);
struct cgroup_base_stat cur, delta;
+ struct cgroup_base_stat *bstat_cpu, *last_bstat_cpu;
unsigned seq;
/* Root-level stats are sourced from system-wide CPU stats */
if (!parent)
return;
+ /* these are not present on root */
+ bstat_cpu = per_cpu_ptr(cgrp->bstat_cpu, cpu);
+ last_bstat_cpu = per_cpu_ptr(cgrp->last_bstat_cpu, cpu);
+
/* fetch the current per-cpu values */
do {
seq = __u64_stats_fetch_begin(&rstatc->bsync);
cur.cputime = rstatc->bstat.cputime;
} while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
+
/* propagate percpu delta to global */
delta = cur;
cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
cgroup_base_stat_add(&cgrp->bstat, &delta);
+ cgroup_base_stat_add(bstat_cpu, &delta);
cgroup_base_stat_add(&rstatc->last_bstat, &delta);
/* propagate global delta to parent (unless that's root) */
@@ -343,6 +383,11 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
cgroup_base_stat_add(&parent->bstat, &delta);
cgroup_base_stat_add(&cgrp->last_bstat, &delta);
+
+ delta = *bstat_cpu;
+ cgroup_base_stat_sub(&delta, last_bstat_cpu);
+ cgroup_base_stat_add(per_cpu_ptr(parent->bstat_cpu, cpu), &delta);
+ cgroup_base_stat_add(last_bstat_cpu, &delta);
}
}
@@ -400,6 +445,30 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
}
+/* See root_cgroup_cputime. Note that this does not first reset cputime. */
+static void root_cgroup_add_cputime_cpu(struct task_cputime *cputime, int cpu)
+{
+ struct kernel_cpustat kcpustat;
+ u64 *cpustat = kcpustat.cpustat;
+ u64 user = 0;
+ u64 sys = 0;
+
+ kcpustat_cpu_fetch(&kcpustat, cpu);
+
+ user += cpustat[CPUTIME_USER];
+ user += cpustat[CPUTIME_NICE];
+ cputime->utime += user;
+
+ sys += cpustat[CPUTIME_SYSTEM];
+ sys += cpustat[CPUTIME_IRQ];
+ sys += cpustat[CPUTIME_SOFTIRQ];
+ cputime->stime += sys;
+
+ cputime->sum_exec_runtime += user;
+ cputime->sum_exec_runtime += sys;
+ cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+}
+
/*
* compute the cputime for the root cgroup by getting the per cpu data
* at a global level, then categorizing the fields in a manner consistent
@@ -414,25 +483,7 @@ static void root_cgroup_cputime(struct task_cputime *cputime)
cputime->utime = 0;
cputime->sum_exec_runtime = 0;
for_each_possible_cpu(i) {
- struct kernel_cpustat kcpustat;
- u64 *cpustat = kcpustat.cpustat;
- u64 user = 0;
- u64 sys = 0;
-
- kcpustat_cpu_fetch(&kcpustat, i);
-
- user += cpustat[CPUTIME_USER];
- user += cpustat[CPUTIME_NICE];
- cputime->utime += user;
-
- sys += cpustat[CPUTIME_SYSTEM];
- sys += cpustat[CPUTIME_IRQ];
- sys += cpustat[CPUTIME_SOFTIRQ];
- cputime->stime += sys;
-
- cputime->sum_exec_runtime += user;
- cputime->sum_exec_runtime += sys;
- cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+ root_cgroup_add_cputime_cpu(cputime, i);
}
}
@@ -464,3 +515,71 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
"system_usec %llu\n",
usage, utime, stime);
}
+
+void cgroup_base_stat_percpu_cputime_show(struct seq_file *seq)
+{
+ static DEFINE_MUTEX(mutex);
+ static DEFINE_PER_CPU(struct cgroup_base_stat, cached_percpu_stats);
+ struct cgroup_base_stat *cached_bstat;
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
+ u64 val;
+ int cpu;
+
+ /* protects cached_percpu_stats */
+ mutex_lock(&mutex);
+
+ if (cgroup_parent(cgrp)) {
+ struct cgroup_base_stat *bstat_cpu;
+
+ cgroup_rstat_flush_hold(cgrp);
+
+ for_each_possible_cpu(cpu) {
+ bstat_cpu = per_cpu_ptr(cgrp->bstat_cpu, cpu);
+ cached_bstat = per_cpu_ptr(&cached_percpu_stats, cpu);
+
+ cached_bstat->cputime.sum_exec_runtime =
+ bstat_cpu->cputime.sum_exec_runtime;
+ cputime_adjust(&bstat_cpu->cputime,
+ per_cpu_ptr(cgrp->prev_cputime_cpu, cpu),
+ &cached_bstat->cputime.utime,
+ &cached_bstat->cputime.stime);
+ }
+
+ cgroup_rstat_flush_release();
+ } else {
+ for_each_possible_cpu(cpu) {
+ cached_bstat = per_cpu_ptr(&cached_percpu_stats, cpu);
+ memset(cached_bstat, 0, sizeof(*cached_bstat));
+ root_cgroup_add_cputime_cpu(&cached_bstat->cputime, cpu);
+ }
+ }
+
+ seq_puts(seq, "usage_usec");
+ for_each_possible_cpu(cpu) {
+ cached_bstat = per_cpu_ptr(&cached_percpu_stats, cpu);
+ val = cached_bstat->cputime.sum_exec_runtime;
+ do_div(val, NSEC_PER_USEC);
+ seq_printf(seq, " %llu", val);
+ }
+ seq_puts(seq, "\n");
+
+ seq_puts(seq, "user_usec");
+ for_each_possible_cpu(cpu) {
+ cached_bstat = per_cpu_ptr(&cached_percpu_stats, cpu);
+ val = cached_bstat->cputime.utime;
+ do_div(val, NSEC_PER_USEC);
+ seq_printf(seq, " %llu", val);
+ }
+ seq_puts(seq, "\n");
+
+ seq_puts(seq, "system_usec");
+ for_each_possible_cpu(cpu) {
+ cached_bstat = per_cpu_ptr(&cached_percpu_stats, cpu);
+ val = cached_bstat->cputime.stime;
+ do_div(val, NSEC_PER_USEC);
+ seq_printf(seq, " %llu", val);
+ }
+ seq_puts(seq, "\n");
+
+ mutex_unlock(&mutex);
+}
--
2.34.1.575.g55b058a8bb-goog
next reply other threads:[~2022-01-07 23:41 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-07 23:41 Josh Don [this message]
2022-01-07 23:41 ` [PATCH 2/2] sched: show percpu throttled stats Josh Don
[not found] ` <20220107234138.1765668-1-joshdon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2022-01-11 12:49 ` [PATCH 1/2] cgroup: add cpu.stat_percpu Peter Zijlstra
2022-01-11 23:38 ` Josh Don
2022-01-12 8:30 ` Peter Zijlstra
[not found] ` <CABk29NuGs_9uxgbv678W=BGGinZNiUHO5T57FHGbOG+HP-FT2g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2022-01-12 20:23 ` Tejun Heo
[not found] ` <Yd8429VrPCay9cn4-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2022-01-13 1:55 ` Josh Don
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220107234138.1765668-1-joshdon@google.com \
--to=joshdon-hpiqsd4aklfqt0dzr+alfa@public.gmane.org \
--cc=bristot-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=bsegall-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
--cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=dietmar.eggemann-5wv7dgnIgG8@public.gmane.org \
--cc=hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org \
--cc=juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=lizefan.x-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org \
--cc=mgorman-l3A5Bk7waGM@public.gmane.org \
--cc=mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org \
--cc=rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org \
--cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
--cc=vincent.guittot-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).