From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
"Christophe Leroy (CS GROUP)" <chleroy@kernel.org>,
"Rafael J. Wysocki" <rafael@kernel.org>,
Alexander Gordeev <agordeev@linux.ibm.com>,
Anna-Maria Behnsen <anna-maria@linutronix.de>,
Ben Segall <bsegall@google.com>,
Boqun Feng <boqun.feng@gmail.com>,
Christian Borntraeger <borntraeger@linux.ibm.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Heiko Carstens <hca@linux.ibm.com>,
Ingo Molnar <mingo@redhat.com>,
Jan Kiszka <jan.kiszka@siemens.com>,
Joel Fernandes <joelagnelf@nvidia.com>,
Juri Lelli <juri.lelli@redhat.com>,
Kieran Bingham <kbingham@kernel.org>,
Madhavan Srinivasan <maddy@linux.ibm.com>,
Mel Gorman <mgorman@suse.de>,
Michael Ellerman <mpe@ellerman.id.au>,
Neeraj Upadhyay <neeraj.upadhyay@kernel.org>,
Nicholas Piggin <npiggin@gmail.com>,
"Paul E . McKenney" <paulmck@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Shrikanth Hegde <sshegde@linux.ibm.com>,
Steven Rostedt <rostedt@goodmis.org>,
Sven Schnelle <svens@linux.ibm.com>,
Thomas Gleixner <tglx@linutronix.de>,
Uladzislau Rezki <urezki@gmail.com>,
Valentin Schneider <vschneid@redhat.com>,
Vasily Gorbik <gor@linux.ibm.com>,
Vincent Guittot <vincent.guittot@linaro.org>,
Viresh Kumar <viresh.kumar@linaro.org>,
Xin Zhao <jackzxcui1989@163.com>,
linux-pm@vger.kernel.org, linux-s390@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 11/14] tick/sched: Consolidate idle time fetching APIs
Date: Tue, 31 Mar 2026 15:16:19 +0200 [thread overview]
Message-ID: <20260331131622.30505-12-frederic@kernel.org> (raw)
In-Reply-To: <20260331131622.30505-1-frederic@kernel.org>
Fetching the idle cputime is available through a variety of accessors
all over the place depending on the different accounting flavours and
needs:
- idle vtime generic accounting can be accessed by kcpustat_field(),
kcpustat_cpu_fetch(), get_idle/iowait_time() and
get_cpu_idle/iowait_time_us()
- dynticks-idle accounting can only be accessed by get_idle/iowait_time()
or get_cpu_idle/iowait_time_us()
- CONFIG_NO_HZ_COMMON=n idle accounting can be accessed by kcpustat_field()
kcpustat_cpu_fetch(), or get_idle/iowait_time() but not by
get_cpu_idle/iowait_time_us()
Moreover get_idle/iowait_time() relies on get_cpu_idle/iowait_time_us()
with a non-sensical conversion to microseconds and back to nanoseconds
on the way.
Start consolidating the APIs with removing get_idle/iowait_time() and
make kcpustat_field() and kcpustat_cpu_fetch() work for all cases.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
fs/proc/stat.c | 40 +++---------------------
fs/proc/uptime.c | 8 ++---
include/linux/kernel_stat.h | 34 ++++++++++++++++++---
kernel/sched/cputime.c | 61 ++++++++++++++++++++++++-------------
4 files changed, 76 insertions(+), 67 deletions(-)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 8b444e862319..c00468a83f64 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,38 +22,6 @@
#define arch_irq_stat() 0
#endif
-u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
-{
- u64 idle, idle_usecs = -1ULL;
-
- if (cpu_online(cpu))
- idle_usecs = get_cpu_idle_time_us(cpu, NULL);
-
- if (idle_usecs == -1ULL)
- /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
- idle = kcs->cpustat[CPUTIME_IDLE];
- else
- idle = idle_usecs * NSEC_PER_USEC;
-
- return idle;
-}
-
-static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
-{
- u64 iowait, iowait_usecs = -1ULL;
-
- if (cpu_online(cpu))
- iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
-
- if (iowait_usecs == -1ULL)
- /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
- iowait = kcs->cpustat[CPUTIME_IOWAIT];
- else
- iowait = iowait_usecs * NSEC_PER_USEC;
-
- return iowait;
-}
-
static void show_irq_gap(struct seq_file *p, unsigned int gap)
{
static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
@@ -105,8 +73,8 @@ static int show_stat(struct seq_file *p, void *v)
user += cpustat[CPUTIME_USER];
nice += cpustat[CPUTIME_NICE];
system += cpustat[CPUTIME_SYSTEM];
- idle += get_idle_time(&kcpustat, i);
- iowait += get_iowait_time(&kcpustat, i);
+ idle += cpustat[CPUTIME_IDLE];
+ iowait += cpustat[CPUTIME_IOWAIT];
irq += cpustat[CPUTIME_IRQ];
softirq += cpustat[CPUTIME_SOFTIRQ];
steal += cpustat[CPUTIME_STEAL];
@@ -146,8 +114,8 @@ static int show_stat(struct seq_file *p, void *v)
user = cpustat[CPUTIME_USER];
nice = cpustat[CPUTIME_NICE];
system = cpustat[CPUTIME_SYSTEM];
- idle = get_idle_time(&kcpustat, i);
- iowait = get_iowait_time(&kcpustat, i);
+ idle = cpustat[CPUTIME_IDLE];
+ iowait = cpustat[CPUTIME_IOWAIT];
irq = cpustat[CPUTIME_IRQ];
softirq = cpustat[CPUTIME_SOFTIRQ];
steal = cpustat[CPUTIME_STEAL];
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index b5343d209381..433aa947cd57 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -18,12 +18,8 @@ static int uptime_proc_show(struct seq_file *m, void *v)
int i;
idle_nsec = 0;
- for_each_possible_cpu(i) {
- struct kernel_cpustat kcs;
-
- kcpustat_cpu_fetch(&kcs, i);
- idle_nsec += get_idle_time(&kcs, i);
- }
+ for_each_possible_cpu(i)
+ idle_nsec += kcpustat_field(CPUTIME_IDLE, i);
ktime_get_boottime_ts64(&uptime);
timens_add_boottime(&uptime);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 9343353ac7a3..3680519d7b2c 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -110,32 +110,59 @@ extern void kcpustat_dyntick_start(u64 now);
extern void kcpustat_dyntick_stop(u64 now);
extern void kcpustat_irq_enter(u64 now);
extern void kcpustat_irq_exit(u64 now);
+extern u64 kcpustat_field_idle(int cpu);
+extern u64 kcpustat_field_iowait(int cpu);
static inline bool kcpustat_idle_dyntick(void)
{
return __this_cpu_read(kernel_cpustat.idle_dyntick);
}
#else
+static inline u64 kcpustat_field_idle(int cpu)
+{
+ return kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+}
+static inline u64 kcpustat_field_iowait(int cpu)
+{
+ return kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+}
+
static inline bool kcpustat_idle_dyntick(void)
{
return false;
}
#endif /* CONFIG_NO_HZ_COMMON */
+/* Fetch cputime values when vtime is disabled on a CPU */
+static inline u64 kcpustat_field_default(enum cpu_usage_stat usage, int cpu)
+{
+ if (usage == CPUTIME_IDLE)
+ return kcpustat_field_idle(cpu);
+ if (usage == CPUTIME_IOWAIT)
+ return kcpustat_field_iowait(cpu);
+ return kcpustat_cpu(cpu).cpustat[usage];
+}
+
+static inline void kcpustat_cpu_fetch_default(struct kernel_cpustat *dst, int cpu)
+{
+ *dst = kcpustat_cpu(cpu);
+ dst->cpustat[CPUTIME_IDLE] = kcpustat_field_idle(cpu);
+ dst->cpustat[CPUTIME_IOWAIT] = kcpustat_field_iowait(cpu);
+}
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern u64 kcpustat_field(enum cpu_usage_stat usage, int cpu);
extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
#else
static inline u64 kcpustat_field(enum cpu_usage_stat usage, int cpu)
{
- return kcpustat_cpu(cpu).cpustat[usage];
+ return kcpustat_field_default(usage, cpu);
}
static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
{
- *dst = kcpustat_cpu(cpu);
+ kcpustat_cpu_fetch_default(dst, cpu);
}
-
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
extern void account_user_time(struct task_struct *, u64);
@@ -145,7 +172,6 @@ extern void account_system_index_time(struct task_struct *, u64,
enum cpu_usage_stat);
extern void account_steal_time(u64);
extern void account_idle_time(u64);
-extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 4a259f2700a1..ebb9eee049e0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -490,24 +490,14 @@ void kcpustat_irq_exit(u64 now)
kcpustat_idle_start(kc, now);
}
-static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
- bool compute_delta, u64 *last_update_time)
+static u64 kcpustat_field_dyntick(int cpu, enum cpu_usage_stat idx,
+ bool compute_delta, u64 now)
{
struct kernel_cpustat *kc = &kcpustat_cpu(cpu);
u64 *cpustat = kc->cpustat;
unsigned int seq;
- ktime_t now;
u64 idle;
- now = ktime_get();
- if (last_update_time)
- *last_update_time = ktime_to_us(now);
-
- if (vtime_generic_enabled_cpu(cpu)) {
- idle = kcpustat_field(idx, cpu);
- goto to_us;
- }
-
do {
seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
@@ -517,12 +507,42 @@ static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
idle = cpustat[idx];
} while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
-to_us:
- do_div(idle, NSEC_PER_USEC);
-
return idle;
}
+u64 kcpustat_field_idle(int cpu)
+{
+ return kcpustat_field_dyntick(cpu, CPUTIME_IDLE,
+ !nr_iowait_cpu(cpu), ktime_get());
+}
+EXPORT_SYMBOL_GPL(kcpustat_field_idle);
+
+u64 kcpustat_field_iowait(int cpu)
+{
+ return kcpustat_field_dyntick(cpu, CPUTIME_IOWAIT,
+ nr_iowait_cpu(cpu), ktime_get());
+}
+EXPORT_SYMBOL_GPL(kcpustat_field_iowait);
+
+static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
+ bool compute_delta, u64 *last_update_time)
+{
+ ktime_t now = ktime_get();
+ u64 res;
+
+ if (vtime_generic_enabled_cpu(cpu))
+ res = kcpustat_field(idx, cpu);
+ else
+ res = kcpustat_field_dyntick(cpu, idx, compute_delta, now);
+
+ do_div(res, NSEC_PER_USEC);
+
+ if (last_update_time)
+ *last_update_time = ktime_to_us(now);
+
+ return res;
+}
+
/**
* get_cpu_idle_time_us - get the total idle time of a CPU
* @cpu: CPU number to query
@@ -570,7 +590,6 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
-
#endif /* CONFIG_NO_HZ_COMMON */
/*
@@ -1124,8 +1143,8 @@ u64 kcpustat_field(enum cpu_usage_stat usage, int cpu)
struct rq *rq;
int err;
- if (!vtime_accounting_enabled_cpu(cpu))
- return val;
+ if (!vtime_generic_enabled_cpu(cpu))
+ return kcpustat_field_default(usage, cpu);
rq = cpu_rq(cpu);
@@ -1220,8 +1239,8 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
struct rq *rq;
int err;
- if (!vtime_accounting_enabled_cpu(cpu)) {
- *dst = *src;
+ if (!vtime_generic_enabled_cpu(cpu)) {
+ kcpustat_cpu_fetch_default(dst, cpu);
return;
}
@@ -1234,7 +1253,7 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
curr = rcu_dereference(rq->curr);
if (WARN_ON_ONCE(!curr)) {
rcu_read_unlock();
- *dst = *src;
+ kcpustat_cpu_fetch_default(dst, cpu);
return;
}
--
2.53.0
next prev parent reply other threads:[~2026-03-31 13:17 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-31 13:16 [PATCH 00/14 v3] tick/sched: Refactor idle cputime accounting Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 01/14] sched/idle: Handle offlining first in idle loop Frederic Weisbecker
2026-03-31 13:59 ` Rafael J. Wysocki
2026-03-31 13:16 ` [PATCH 02/14] sched/cputime: Remove superfluous and error prone kcpustat_field() parameter Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 03/14] sched/cputime: Correctly support generic vtime idle time Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 04/14] powerpc/time: Prepare to stop elapsing in dynticks-idle Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 05/14] s390/time: " Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 06/14] tick/sched: Unify idle cputime accounting Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 07/14] tick/sched: Remove nohz disabled special case in cputime fetch Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 08/14] tick/sched: Move dyntick-idle cputime accounting to cputime code Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 09/14] tick/sched: Remove unused fields Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 10/14] tick/sched: Account tickless idle cputime only when tick is stopped Frederic Weisbecker
2026-03-31 13:16 ` Frederic Weisbecker [this message]
2026-03-31 13:16 ` [PATCH 12/14] sched/cputime: Provide get_cpu_[idle|iowait]_time_us() off-case Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 13/14] sched/cputime: Handle idle irqtime gracefully Frederic Weisbecker
2026-03-31 13:16 ` [PATCH 14/14] sched/cputime: Handle dyntick-idle steal time correctly Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260331131622.30505-12-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=agordeev@linux.ibm.com \
--cc=anna-maria@linutronix.de \
--cc=boqun.feng@gmail.com \
--cc=borntraeger@linux.ibm.com \
--cc=bsegall@google.com \
--cc=chleroy@kernel.org \
--cc=dietmar.eggemann@arm.com \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=jackzxcui1989@163.com \
--cc=jan.kiszka@siemens.com \
--cc=joelagnelf@nvidia.com \
--cc=juri.lelli@redhat.com \
--cc=kbingham@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=maddy@linux.ibm.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mpe@ellerman.id.au \
--cc=neeraj.upadhyay@kernel.org \
--cc=npiggin@gmail.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rafael@kernel.org \
--cc=rostedt@goodmis.org \
--cc=sshegde@linux.ibm.com \
--cc=svens@linux.ibm.com \
--cc=tglx@linutronix.de \
--cc=urezki@gmail.com \
--cc=vincent.guittot@linaro.org \
--cc=viresh.kumar@linaro.org \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox