Linux s390 Architecture development
 help / color / mirror / Atom feed
* [PATCH 0/4] s390: Idle time accounting improvements
@ 2026-05-13 14:01 Heiko Carstens
  2026-05-13 14:01 ` [PATCH 1/4] s390/timex: Move union tod_clock type to separate header Heiko Carstens
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Heiko Carstens @ 2026-05-13 14:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Peter Zijlstra, Thomas Gleixner,
	Alexander Gordeev, Sven Schnelle, Vasily Gorbik,
	Christian Borntraeger
  Cc: linux-kernel, linux-s390

This series is on top of Frederic Weisbecker's idle cpu time accounting
refactor series [1][2].

This is supposed to improve s390 idle time accounting, and brings it
back to the state it was before arch_cpu_idle_time() was removed from
s390 [3].

In result all cpu time accounting is done by the s390 architecture backend
again, instead of having a mix of architecure specific and common code
accounting (common code: idle, s390 architecture: everything else).

Changes since RFC [4]:

- Split into more patches

- Fix various build errors for !CONFIG_NO_HZ_COMMON

- Get rid of private seqcount implementation and access seqcount from
  kernel_cpustat directly from assembler code, as suggested by Frederic
  Weisbecker

[1] https://lore.kernel.org/all/20260508131647.43868-1-frederic@kernel.org/
[2] git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git timers/core-v4
[3] commit be76ea614460 ("s390/idle: remove arch_cpu_idle_time() and corresponding code")
[4] https://lore.kernel.org/all/20260225145146.1031705-1-hca@linux.ibm.com/

Heiko Carstens (4):
  s390/timex: Move union tod_clock type to separate header
  s390/irq/idle: Use stcke instead of stckf for time stamps
  s390/idle: Provide arch specific kcpustat_field_idle()/kcpustat_field_iowait()
  s390/idle: Remove idle time and count sysfs files

 arch/s390/include/asm/idle.h      |  14 ++--
 arch/s390/include/asm/lowcore.h   |   4 +-
 arch/s390/include/asm/timex.h     |  20 +----
 arch/s390/include/asm/tod_types.h |  30 +++++++
 arch/s390/include/asm/vtime.h     |   4 +-
 arch/s390/kernel/asm-offsets.c    |   7 ++
 arch/s390/kernel/entry.S          |  14 +++-
 arch/s390/kernel/idle.c           | 129 +++++++++++++++++++++++-------
 arch/s390/kernel/irq.c            |   7 +-
 arch/s390/kernel/smp.c            |  33 +-------
 arch/s390/kernel/vtime.c          |  55 +------------
 drivers/s390/cio/qdio_main.c      |   2 +-
 drivers/s390/cio/qdio_thinint.c   |   2 +-
 include/linux/kernel_stat.h       |  27 +++++++
 include/linux/vtime.h             |   6 ++
 kernel/sched/cputime.c            |   4 +-
 16 files changed, 203 insertions(+), 155 deletions(-)
 create mode 100644 arch/s390/include/asm/tod_types.h

base-commit: e64ba052ce04e363ff76d3cb8bedc5f812188acb
-- 
2.51.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] s390/timex: Move union tod_clock type to separate header
  2026-05-13 14:01 [PATCH 0/4] s390: Idle time accounting improvements Heiko Carstens
@ 2026-05-13 14:01 ` Heiko Carstens
  2026-05-13 14:01 ` [PATCH 2/4] s390/irq/idle: Use stcke instead of stckf for time stamps Heiko Carstens
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Heiko Carstens @ 2026-05-13 14:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Peter Zijlstra, Thomas Gleixner,
	Alexander Gordeev, Sven Schnelle, Vasily Gorbik,
	Christian Borntraeger
  Cc: linux-kernel, linux-s390

Move union tod_clock type to separate header file. This is preparation
for upcoming changes in order to avoid header dependency problems.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/timex.h     | 20 +-------------------
 arch/s390/include/asm/tod_types.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 19 deletions(-)
 create mode 100644 arch/s390/include/asm/tod_types.h

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 49447b40f038..ac3ab6c29912 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -12,6 +12,7 @@
 
 #include <linux/preempt.h>
 #include <linux/time64.h>
+#include <asm/tod_types.h>
 #include <asm/lowcore.h>
 #include <asm/machine.h>
 #include <asm/asm.h>
@@ -21,25 +22,6 @@
 
 extern u64 clock_comparator_max;
 
-union tod_clock {
-	__uint128_t val;
-	struct {
-		__uint128_t ei	:  8; /* epoch index */
-		__uint128_t tod : 64; /* bits 0-63 of tod clock */
-		__uint128_t	: 40;
-		__uint128_t pf	: 16; /* programmable field */
-	};
-	struct {
-		__uint128_t eitod : 72; /* epoch index + bits 0-63 tod clock */
-		__uint128_t	  : 56;
-	};
-	struct {
-		__uint128_t us	: 60; /* micro-seconds */
-		__uint128_t sus	: 12; /* sub-microseconds */
-		__uint128_t	: 56;
-	};
-} __packed;
-
 /* Inline functions for clock register access. */
 static inline int set_tod_clock(__u64 time)
 {
diff --git a/arch/s390/include/asm/tod_types.h b/arch/s390/include/asm/tod_types.h
new file mode 100644
index 000000000000..976fa0a1e895
--- /dev/null
+++ b/arch/s390/include/asm/tod_types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_TOD_TYPES_H
+#define _ASM_S390_TOD_TYPES_H
+
+#include <linux/types.h>
+
+#ifndef __ASSEMBLER__
+
+union tod_clock {
+	__uint128_t val;
+	struct {
+		__uint128_t ei	:  8; /* epoch index */
+		__uint128_t tod : 64; /* bits 0-63 of tod clock */
+		__uint128_t	: 40;
+		__uint128_t pf	: 16; /* programmable field */
+	};
+	struct {
+		__uint128_t eitod : 72; /* epoch index + bits 0-63 tod clock */
+		__uint128_t	  : 56;
+	};
+	struct {
+		__uint128_t us	: 60; /* micro-seconds */
+		__uint128_t sus	: 12; /* sub-microseconds */
+		__uint128_t	: 56;
+	};
+} __packed;
+
+#endif
+#endif
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] s390/irq/idle: Use stcke instead of stckf for time stamps
  2026-05-13 14:01 [PATCH 0/4] s390: Idle time accounting improvements Heiko Carstens
  2026-05-13 14:01 ` [PATCH 1/4] s390/timex: Move union tod_clock type to separate header Heiko Carstens
@ 2026-05-13 14:01 ` Heiko Carstens
  2026-05-13 14:01 ` [PATCH 3/4] s390/idle: Provide arch specific kcpustat_field_idle()/kcpustat_field_iowait() Heiko Carstens
  2026-05-13 14:01 ` [PATCH 4/4] s390/idle: Remove idle time and count sysfs files Heiko Carstens
  3 siblings, 0 replies; 5+ messages in thread
From: Heiko Carstens @ 2026-05-13 14:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Peter Zijlstra, Thomas Gleixner,
	Alexander Gordeev, Sven Schnelle, Vasily Gorbik,
	Christian Borntraeger
  Cc: linux-kernel, linux-s390

The upcoming cpu idle time accounting rework involves comparing and
subtracting cross cpu time stamps. Time stamps created with the stckf
instruction monotonic with respect to the local cpu. For cross cpu
monotonic time stamps the slightly slower stcke instruction has to
be used [1].

Convert the idle time accounting relevant usages of stckf to stcke.

[1] Principles of Operation - Setting and Inspecting the Clock

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/idle.h    | 3 ++-
 arch/s390/include/asm/lowcore.h | 4 ++--
 arch/s390/include/asm/vtime.h   | 4 ++--
 arch/s390/kernel/entry.S        | 2 +-
 arch/s390/kernel/idle.c         | 4 ++--
 arch/s390/kernel/irq.c          | 7 ++++---
 drivers/s390/cio/qdio_main.c    | 2 +-
 drivers/s390/cio/qdio_thinint.c | 2 +-
 8 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index e4ad09a22400..6963e92b60a1 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -11,14 +11,15 @@
 #include <linux/percpu-defs.h>
 #include <linux/types.h>
 #include <linux/device.h>
+#include <asm/tod_types.h>
 
 struct s390_idle_data {
 	bool	      idle_dyntick;
 	unsigned long idle_count;
 	unsigned long idle_time;
-	unsigned long clock_idle_enter;
 	unsigned long timer_idle_enter;
 	unsigned long mt_cycles_enter[8];
+	union tod_clock clock_idle_enter;
 };
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 50ffe75adeb4..b7720484b2f7 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -10,6 +10,7 @@
 #define _ASM_S390_LOWCORE_H
 
 #include <linux/types.h>
+#include <asm/tod_types.h>
 #include <asm/machine.h>
 #include <asm/ptrace.h>
 #include <asm/ctlreg.h>
@@ -125,8 +126,7 @@ struct lowcore {
 	__u64	avg_steal_timer;		/* 0x0300 */
 	__u64	last_update_timer;		/* 0x0308 */
 	__u64	last_update_clock;		/* 0x0310 */
-	__u64	int_clock;			/* 0x0318 */
-	__u8	pad_0x0320[0x0328-0x0320];	/* 0x0320 */
+	union tod_clock int_clock;		/* 0x0318 */
 	__u64	clock_comparator;		/* 0x0328 */
 	__u8	pad_0x0330[0x0340-0x0330];	/* 0x0330 */
 
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index b1db75d14e9d..da116a93d3b6 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -48,8 +48,8 @@ static inline void update_timer_idle(void)
 	 * The accounted CPU times will be subtracted again from steal_timer
 	 * when accumulated steal time is calculated in do_account_vtime().
 	 */
-	lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
-	lc->last_update_clock = lc->int_clock;
+	lc->steal_timer += idle->clock_idle_enter.tod - lc->last_update_clock;
+	lc->last_update_clock = lc->int_clock.tod;
 	lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
 	lc->last_update_timer = lc->sys_enter_timer;
 }
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index bb806d1ddae0..7147f3e51ace 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -379,7 +379,7 @@ SYM_CODE_END(pgm_check_handler)
 SYM_CODE_START(\name)
 	STMG_LC	%r8,%r15,__LC_SAVE_AREA
 	GET_LC	%r13
-	stckf	__LC_INT_CLOCK(%r13)
+	stcke	__LC_INT_CLOCK(%r13)
 	stpt	__LC_SYS_ENTER_TIMER(%r13)
 	STBEAR	__LC_LAST_BREAK(%r13)
 	BPOFF
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 4685d7c5bc51..36020dffb86b 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -26,7 +26,7 @@ void account_idle_time_irq(void)
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
 	unsigned long idle_time;
 
-	idle_time = get_lowcore()->int_clock - idle->clock_idle_enter;
+	idle_time = get_lowcore()->int_clock.tod - idle->clock_idle_enter.tod;
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	__atomic64_add(idle_time, &idle->idle_time);
@@ -49,7 +49,7 @@ void noinstr arch_cpu_idle(void)
 	set_cpu_flag(CIF_ENABLED_WAIT);
 	if (smp_cpu_mtid)
 		stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
-	idle->clock_idle_enter = get_tod_clock_fast();
+	store_tod_clock_ext(&idle->clock_idle_enter);
 	idle->timer_idle_enter = get_cpu_timer();
 	bpon();
 	__load_psw_mask(psw_mask);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index d10a17e6531d..24f44f4a3aac 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -103,9 +103,10 @@ static const struct irq_class irqclass_sub_desc[] = {
 
 static void do_IRQ(struct pt_regs *regs, int irq)
 {
-	if (tod_after_eq(get_lowcore()->int_clock,
-			 get_lowcore()->clock_comparator))
-		/* Serve timer interrupts first. */
+	struct lowcore *lc = get_lowcore();
+
+	/* Serve timer interrupts first */
+	if (tod_after_eq(lc->int_clock.tod, lc->clock_comparator))
 		clock_comparator_work();
 	generic_handle_irq(irq);
 }
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 7e594a800525..c1e09fa34e77 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -695,7 +695,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
 		return;
 
 	qdio_deliver_irq(irq_ptr);
-	irq_ptr->last_data_irq_time = get_lowcore()->int_clock;
+	irq_ptr->last_data_irq_time = get_lowcore()->int_clock.tod;
 }
 
 static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 85ca8650adeb..e167aa75c3df 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -99,7 +99,7 @@ static inline u32 clear_shared_ind(void)
 static void tiqdio_thinint_handler(struct airq_struct *airq,
 				   struct tpi_info *tpi_info)
 {
-	u64 irq_time = get_lowcore()->int_clock;
+	u64 irq_time = get_lowcore()->int_clock.tod;
 	u32 si_used = clear_shared_ind();
 	struct qdio_irq *irq;
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] s390/idle: Provide arch specific kcpustat_field_idle()/kcpustat_field_iowait()
  2026-05-13 14:01 [PATCH 0/4] s390: Idle time accounting improvements Heiko Carstens
  2026-05-13 14:01 ` [PATCH 1/4] s390/timex: Move union tod_clock type to separate header Heiko Carstens
  2026-05-13 14:01 ` [PATCH 2/4] s390/irq/idle: Use stcke instead of stckf for time stamps Heiko Carstens
@ 2026-05-13 14:01 ` Heiko Carstens
  2026-05-13 14:01 ` [PATCH 4/4] s390/idle: Remove idle time and count sysfs files Heiko Carstens
  3 siblings, 0 replies; 5+ messages in thread
From: Heiko Carstens @ 2026-05-13 14:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Peter Zijlstra, Thomas Gleixner,
	Alexander Gordeev, Sven Schnelle, Vasily Gorbik,
	Christian Borntraeger
  Cc: linux-kernel, linux-s390

The former s390 specific arch_cpu_idle_time() implementation was
removed, since its implementation was racy and reported idle time
could go backwards [1].

However this removal was not necessary, since independently of the s390
architecture specific races there exists the iowait counter update race,
which can also lead to reported idle time going backwards [2].

With Frederic Weisbecker's recent cpu idle time accounting refactoring
kernel_cpustat got a sequence counter. Use this to implement s390 specific
variants of kcpustat_field_idle() and kcpustat_field_iowait(). This is
logically a revert of [1] and moves cpu idle time accounting back into s390
architecture code, which is also more precise than the dyntick idle time
accounting by nohz/scheduler.

For comparing cross cpu time stamps it is necessary to use the stcke
instead of the stckf instruction in irq entry path. Furthermore this
open-codes a sequence lock in assembler and C code, which is required to
update the irq entry time stamp to the per cpu idle_data structure in a
race free manner.

[1] commit be76ea614460 ("s390/idle: remove arch_cpu_idle_time() and corresponding code")
[2] commit ead70b752373 ("timers/nohz: Add a comment about broken iowait counter update race")

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/idle.h   |   5 +-
 arch/s390/kernel/asm-offsets.c |   7 +++
 arch/s390/kernel/entry.S       |  12 +++-
 arch/s390/kernel/idle.c        | 109 ++++++++++++++++++++++++++++++---
 arch/s390/kernel/vtime.c       |  55 +----------------
 include/linux/kernel_stat.h    |  27 ++++++++
 include/linux/vtime.h          |   6 ++
 kernel/sched/cputime.c         |   4 +-
 8 files changed, 158 insertions(+), 67 deletions(-)

diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 6963e92b60a1..f3502d5621c0 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -14,12 +14,15 @@
 #include <asm/tod_types.h>
 
 struct s390_idle_data {
-	bool	      idle_dyntick;
+#ifdef CONFIG_NO_HZ_COMMON
+	bool	      in_idle;
+#endif
 	unsigned long idle_count;
 	unsigned long idle_time;
 	unsigned long timer_idle_enter;
 	unsigned long mt_cycles_enter[8];
 	union tod_clock clock_idle_enter;
+	union tod_clock clock_idle_exit;
 };
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fbd26f3e9f96..f6dd2b67dcee 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -11,9 +11,11 @@
 #include <linux/purgatory.h>
 #include <linux/pgtable.h>
 #include <linux/ftrace_regs.h>
+#include <linux/kernel_stat.h>
 #include <asm/kvm_host_types.h>
 #include <asm/stacktrace.h>
 #include <asm/ptrace.h>
+#include <asm/idle.h>
 
 int main(void)
 {
@@ -128,6 +130,7 @@ int main(void)
 	OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
 	OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
 	OFFSET(__LC_CURRENT, lowcore, current_task);
+	OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
 	OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
 	OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
 	OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
@@ -180,6 +183,10 @@ int main(void)
 	DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
 	DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
 	DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+	OFFSET(__IDLE_CLOCK_EXIT, s390_idle_data, clock_idle_exit);
+#ifdef CONFIG_NO_HZ_COMMON
+	OFFSET(__KCPUSTAT_SEQUENCE, kernel_cpustat, idle_sleeptime_seq);
+#endif
 	OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
 	DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
 
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 7147f3e51ace..79a45efae23d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -379,8 +379,19 @@ SYM_CODE_END(pgm_check_handler)
 SYM_CODE_START(\name)
 	STMG_LC	%r8,%r15,__LC_SAVE_AREA
 	GET_LC	%r13
+#ifdef CONFIG_NO_HZ_COMMON
+	larl	%r12,kernel_cpustat
+	ag	%r12,__LC_PERCPU_OFFSET(%r13)
+	asi	__KCPUSTAT_SEQUENCE(%r12),1
+#endif
 	stcke	__LC_INT_CLOCK(%r13)
 	stpt	__LC_SYS_ENTER_TIMER(%r13)
+	larl	%r10,s390_idle
+	ag	%r10,__LC_PERCPU_OFFSET(%r13)
+	mvc	__IDLE_CLOCK_EXIT(16,%r10),__LC_INT_CLOCK(%r13)
+#ifdef CONFIG_NO_HZ_COMMON
+	asi	__KCPUSTAT_SEQUENCE(%r12),1
+#endif
 	STBEAR	__LC_LAST_BREAK(%r13)
 	BPOFF
 	lmg	%r8,%r9,\lc_old_psw(%r13)
@@ -407,7 +418,6 @@ SYM_CODE_START(\name)
 	xgr	%r5,%r5
 	xgr	%r6,%r6
 	xgr	%r7,%r7
-	xgr	%r10,%r10
 	xgr	%r12,%r12
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 36020dffb86b..b5fae512fc9c 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched/stat.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
@@ -21,22 +22,111 @@
 
 DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void account_idle_time_irq(void)
+static __always_inline void __account_idle_time_irq(void)
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
 	unsigned long idle_time;
 
-	idle_time = get_lowcore()->int_clock.tod - idle->clock_idle_enter.tod;
-
-	/* Account time spent with enabled wait psw loaded as idle time. */
+	idle_time = idle->clock_idle_exit.tod - idle->clock_idle_enter.tod;
 	__atomic64_add(idle_time, &idle->idle_time);
 	__atomic64_add_const(1, &idle->idle_count);
-
-	/* Dyntick idle time accounted by nohz/scheduler */
-	if (!idle->idle_dyntick)
-		account_idle_time(cputime_to_nsecs(idle_time));
+	account_idle_time(cputime_to_nsecs(idle_time));
 }
 
+static __always_inline void __account_idle_time_setup(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+
+	store_tod_clock_ext(&idle->clock_idle_enter);
+	idle->timer_idle_enter = get_cpu_timer();
+	idle->clock_idle_exit = idle->clock_idle_enter;
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
+
+static u64 arch_cpu_in_idle_time(int cpu)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+	union tod_clock now;
+	u64 idle_time;
+
+	if (!idle->in_idle)
+		return 0;
+	store_tod_clock_ext(&now);
+	if (tod_after(idle->clock_idle_exit.tod, idle->clock_idle_enter.tod))
+		idle_time = idle->clock_idle_exit.tod - idle->clock_idle_enter.tod;
+	else
+		idle_time = now.tod - idle->clock_idle_enter.tod;
+	return cputime_to_nsecs(idle_time);
+}
+
+static u64 arch_cpu_idle_time(int cpu, enum cpu_usage_stat idx, bool compute_delta)
+{
+	struct kernel_cpustat *kc = &kcpustat_cpu(cpu);
+	u64 *cpustat = kc->cpustat;
+	unsigned int seq;
+	u64 idle_time;
+
+	/*
+	 * The open coded seqcount writer in entry.S relies on the
+	 * raw counting mechanism without any writer protection.
+	 */
+	typecheck(typeof(kc->idle_sleeptime_seq), seqcount_t);
+	do {
+		seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
+		idle_time = cpustat[idx];
+		if (compute_delta)
+			idle_time += arch_cpu_in_idle_time(cpu);
+	} while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
+	return idle_time;
+}
+
+u64 arch_kcpustat_field_idle(int cpu)
+{
+	return arch_cpu_idle_time(cpu, CPUTIME_IDLE, !nr_iowait_cpu(cpu));
+}
+
+u64 arch_kcpustat_field_iowait(int cpu)
+{
+	return arch_cpu_idle_time(cpu, CPUTIME_IOWAIT, nr_iowait_cpu(cpu));
+}
+
+void account_idle_time_irq(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+	write_seqcount_begin(&kc->idle_sleeptime_seq);
+	idle->in_idle = false;
+	__account_idle_time_irq();
+	write_seqcount_end(&kc->idle_sleeptime_seq);
+}
+
+static __always_inline void account_idle_time_setup(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+	raw_write_seqcount_begin(&kc->idle_sleeptime_seq);
+	idle->in_idle = true;
+	__account_idle_time_setup();
+	raw_write_seqcount_end(&kc->idle_sleeptime_seq);
+}
+
+#else  /* CONFIG_NO_HZ_COMMON */
+
+void account_idle_time_irq(void)
+{
+	__account_idle_time_irq();
+}
+
+static __always_inline void account_idle_time_setup(void)
+{
+	__account_idle_time_setup();
+}
+
+#endif /* CONFIG_NO_HZ_COMMON */
+
 void noinstr arch_cpu_idle(void)
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
@@ -49,8 +139,7 @@ void noinstr arch_cpu_idle(void)
 	set_cpu_flag(CIF_ENABLED_WAIT);
 	if (smp_cpu_mtid)
 		stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
-	store_tod_clock_ext(&idle->clock_idle_enter);
-	idle->timer_idle_enter = get_cpu_timer();
+	account_idle_time_setup();
 	bpon();
 	__load_psw_mask(psw_mask);
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index d1102a6f80bd..d804e1140c2e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -140,8 +140,6 @@ static int do_account_vtime(struct task_struct *tsk)
 
 	if (hardirq_count())
 		lc->hardirq_timer += timer;
-	else if (in_serving_softirq())
-		lc->softirq_timer += timer;
 	else
 		lc->system_timer += timer;
 
@@ -241,63 +239,14 @@ EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_softirq(struct task_struct *tsk)
 {
-	if (!__this_cpu_read(s390_idle.idle_dyntick))
-		get_lowcore()->softirq_timer += vtime_delta();
-	else
-		vtime_flush(tsk);
+	get_lowcore()->softirq_timer += vtime_delta();
 }
 
 void vtime_account_hardirq(struct task_struct *tsk)
 {
-	if (!__this_cpu_read(s390_idle.idle_dyntick)) {
-		get_lowcore()->hardirq_timer += vtime_delta();
-	} else {
-		/*
-		 * In dynticks mode, the idle cputime is accounted by the nohz
-		 * subsystem. Therefore the s390 timer/clocks are reset on IRQ
-		 * entry and steal time must be accounted now.
-		 */
-		vtime_flush(tsk);
-	}
+	get_lowcore()->hardirq_timer += vtime_delta();
 }
 
-#ifdef CONFIG_NO_HZ_COMMON
-/**
- * vtime_reset - Fast forward vtime entry clocks
- *
- * Called from dynticks idle IRQ entry to fast-forward the clocks to current time
- * so that the IRQ time is still accounted by vtime while nohz cputime is paused.
- */
-void vtime_reset(void)
-{
-	vtime_reset_last_update(get_lowcore());
-}
-
-/**
- * vtime_dyntick_start - Inform vtime about entry to idle-dynticks
- *
- * Called when idle enters in dyntick mode. The idle cputime that elapsed so far
- * is flushed and the tick subsystem takes over the idle cputime accounting.
- */
-void vtime_dyntick_start(void)
-{
-	__this_cpu_write(s390_idle.idle_dyntick, true);
-	vtime_flush(current);
-}
-
-/**
- * vtime_dyntick_stop - Inform vtime about exit from idle-dynticks
- *
- * Called when idle exits from dyntick mode. The vtime entry clocks are
- * fast-forward to current time and idle accounting resumes.
- */
-void vtime_dyntick_stop(void)
-{
-	vtime_reset_last_update(get_lowcore());
-	__this_cpu_write(s390_idle.idle_dyntick, false);
-}
-#endif /* CONFIG_NO_HZ_COMMON */
-
 /*
  * Sorted add to a list. List is linear searched until first bigger
  * element is found.
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index fce1392e2140..9ca6c2259dfe 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -107,6 +107,30 @@ static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu)
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
+
+#ifdef CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE
+
+static inline void kcpustat_dyntick_start(u64 now) { }
+static inline void kcpustat_dyntick_stop(u64 now) { }
+static inline void kcpustat_irq_enter(u64 now) { }
+static inline void kcpustat_irq_exit(u64 now) { }
+static inline bool kcpustat_idle_dyntick(void) { return false; }
+
+extern u64 arch_kcpustat_field_idle(int cpu);
+extern u64 arch_kcpustat_field_iowait(int cpu);
+
+static inline u64 kcpustat_field_idle(int cpu)
+{
+	return arch_kcpustat_field_idle(cpu);
+}
+
+static inline u64 kcpustat_field_iowait(int cpu)
+{
+	return arch_kcpustat_field_iowait(cpu);
+}
+
+#else /* !CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE */
+
 extern void kcpustat_dyntick_start(u64 now);
 extern void kcpustat_dyntick_stop(u64 now);
 extern void kcpustat_irq_enter(u64 now);
@@ -118,6 +142,9 @@ static inline bool kcpustat_idle_dyntick(void)
 {
 	return __this_cpu_read(kernel_cpustat.idle_dyntick);
 }
+
+#endif /* !CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE */
+
 #else
 static inline u64 kcpustat_field_idle(int cpu)
 {
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 9dc25b04a119..82825e775499 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -42,9 +42,15 @@ extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
 extern void vtime_account_softirq(struct task_struct *tsk);
 extern void vtime_account_hardirq(struct task_struct *tsk);
 extern void vtime_flush(struct task_struct *tsk);
+#ifdef CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE
+static inline void vtime_reset(void) { }
+static inline void vtime_dyntick_start(void) { }
+static inline void vtime_dyntick_stop(void) { }
+#else
 extern void vtime_reset(void);
 extern void vtime_dyntick_start(void);
 extern void vtime_dyntick_stop(void);
+#endif
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
 static inline void vtime_account_softirq(struct task_struct *tsk) { }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 244b57417240..ed49a1e23d17 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -421,7 +421,7 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
 						int nr_ticks) { }
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
-#ifdef CONFIG_NO_HZ_COMMON
+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE)
 static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now)
 {
 	u64 *cpustat = kc->cpustat;
@@ -560,7 +560,7 @@ static u64 kcpustat_field_dyntick(int cpu, enum cpu_usage_stat idx,
 {
 	return kcpustat_cpu(cpu).cpustat[idx];
 }
-#endif /* CONFIG_NO_HZ_COMMON */
+#endif /* CONFIG_NO_HZ_COMMON && !CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE */
 
 static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
 				 bool compute_delta, u64 *last_update_time)
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] s390/idle: Remove idle time and count sysfs files
  2026-05-13 14:01 [PATCH 0/4] s390: Idle time accounting improvements Heiko Carstens
                   ` (2 preceding siblings ...)
  2026-05-13 14:01 ` [PATCH 3/4] s390/idle: Provide arch specific kcpustat_field_idle()/kcpustat_field_iowait() Heiko Carstens
@ 2026-05-13 14:01 ` Heiko Carstens
  3 siblings, 0 replies; 5+ messages in thread
From: Heiko Carstens @ 2026-05-13 14:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Peter Zijlstra, Thomas Gleixner,
	Alexander Gordeev, Sven Schnelle, Vasily Gorbik,
	Christian Borntraeger
  Cc: linux-kernel, linux-s390

Remove the s390 specific idle_time_us and idle_count per cpu sysfs
files. They do not provide any additional value. The risk that there
are existing applications which rely on these architecture specific
files should be very low.

However if it turns out such applications exist, this can be easily
reverted.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/idle.h |  6 ------
 arch/s390/kernel/idle.c      | 20 --------------------
 arch/s390/kernel/smp.c       | 33 +--------------------------------
 3 files changed, 1 insertion(+), 58 deletions(-)

diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index f3502d5621c0..07819f11987c 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -10,15 +10,12 @@
 
 #include <linux/percpu-defs.h>
 #include <linux/types.h>
-#include <linux/device.h>
 #include <asm/tod_types.h>
 
 struct s390_idle_data {
 #ifdef CONFIG_NO_HZ_COMMON
 	bool	      in_idle;
 #endif
-	unsigned long idle_count;
-	unsigned long idle_time;
 	unsigned long timer_idle_enter;
 	unsigned long mt_cycles_enter[8];
 	union tod_clock clock_idle_enter;
@@ -27,7 +24,4 @@ struct s390_idle_data {
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
-extern struct device_attribute dev_attr_idle_count;
-extern struct device_attribute dev_attr_idle_time_us;
-
 #endif /* _S390_IDLE_H */
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index b5fae512fc9c..7f7851c001e0 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -28,8 +28,6 @@ static __always_inline void __account_idle_time_irq(void)
 	unsigned long idle_time;
 
 	idle_time = idle->clock_idle_exit.tod - idle->clock_idle_enter.tod;
-	__atomic64_add(idle_time, &idle->idle_time);
-	__atomic64_add_const(1, &idle->idle_count);
 	account_idle_time(cputime_to_nsecs(idle_time));
 }
 
@@ -144,24 +142,6 @@ void noinstr arch_cpu_idle(void)
 	__load_psw_mask(psw_mask);
 }
 
-static ssize_t show_idle_count(struct device *dev,
-			       struct device_attribute *attr, char *buf)
-{
-	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-
-	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
-}
-DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
-
-static ssize_t show_idle_time(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-
-	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-
 void arch_cpu_idle_enter(void)
 {
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 50bb499cf3e5..0ba7f89b8161 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -54,7 +54,6 @@
 #include <asm/debug.h>
 #include <asm/os_info.h>
 #include <asm/sigp.h>
-#include <asm/idle.h>
 #include <asm/nmi.h>
 #include <asm/stacktrace.h>
 #include <asm/topology.h>
@@ -1085,31 +1084,6 @@ static struct attribute_group cpu_common_attr_group = {
 	.attrs = cpu_common_attrs,
 };
 
-static struct attribute *cpu_online_attrs[] = {
-	&dev_attr_idle_count.attr,
-	&dev_attr_idle_time_us.attr,
-	NULL,
-};
-
-static struct attribute_group cpu_online_attr_group = {
-	.attrs = cpu_online_attrs,
-};
-
-static int smp_cpu_online(unsigned int cpu)
-{
-	struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
-
-	return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
-}
-
-static int smp_cpu_pre_down(unsigned int cpu)
-{
-	struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
-
-	sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
-	return 0;
-}
-
 bool arch_cpu_is_hotpluggable(int cpu)
 {
 	return !!cpu;
@@ -1175,18 +1149,13 @@ static DEVICE_ATTR_WO(rescan);
 static int __init s390_smp_init(void)
 {
 	struct device *dev_root;
-	int rc;
+	int rc = 0;
 
 	dev_root = bus_get_dev_root(&cpu_subsys);
 	if (dev_root) {
 		rc = device_create_file(dev_root, &dev_attr_rescan);
 		put_device(dev_root);
-		if (rc)
-			return rc;
 	}
-	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
-			       smp_cpu_online, smp_cpu_pre_down);
-	rc = rc <= 0 ? rc : 0;
 	return rc;
 }
 subsys_initcall(s390_smp_init);
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-05-13 14:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 14:01 [PATCH 0/4] s390: Idle time accounting improvements Heiko Carstens
2026-05-13 14:01 ` [PATCH 1/4] s390/timex: Move union tod_clock type to separate header Heiko Carstens
2026-05-13 14:01 ` [PATCH 2/4] s390/irq/idle: Use stcke instead of stckf for time stamps Heiko Carstens
2026-05-13 14:01 ` [PATCH 3/4] s390/idle: Provide arch specific kcpustat_field_idle()/kcpustat_field_iowait() Heiko Carstens
2026-05-13 14:01 ` [PATCH 4/4] s390/idle: Remove idle time and count sysfs files Heiko Carstens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox