public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Jeanson <mjeanson@efficios.com>,
	Jens Axboe <axboe@kernel.dk>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Peter Zijlstra <peterz@infradead.org>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Boqun Feng <boqun.feng@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Wei Liu <wei.liu@kernel.org>, Dexuan Cui <decui@microsoft.com>,
	x86@kernel.org, Arnd Bergmann <arnd@arndb.de>,
	Heiko Carstens <hca@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	Huacai Chen <chenhuacai@kernel.org>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>
Subject: [patch V4 17/36] rseq: Expose lightweight statistics in debugfs
Date: Mon,  8 Sep 2025 23:31:59 +0200 (CEST)	[thread overview]
Message-ID: <20250908212926.352693438@linutronix.de> (raw)
In-Reply-To: 20250908212737.353775467@linutronix.de

Analyzing the call frequency without actually using tracing is helpful for
analysis of this infrastructure. The overhead is minimal as it just
increments a per CPU counter associated to each operation.

The debugfs readout provides a racy sum of all counters.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

---
 include/linux/rseq.h       |   16 ---------
 include/linux/rseq_entry.h |   49 +++++++++++++++++++++++++++
 init/Kconfig               |   12 ++++++
 kernel/rseq.c              |   79 +++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 133 insertions(+), 23 deletions(-)

--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -29,21 +29,6 @@ static inline void rseq_sched_switch_eve
 	}
 }
 
-static __always_inline void rseq_exit_to_user_mode(void)
-{
-	struct rseq_event *ev = &current->rseq.event;
-
-	if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
-		WARN_ON_ONCE(ev->sched_switch);
-
-	/*
-	 * Ensure that event (especially user_irq) is cleared when the
-	 * interrupt did not result in a schedule and therefore the
-	 * rseq processing did not clear it.
-	 */
-	ev->events = 0;
-}
-
 /*
  * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
  * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
@@ -92,7 +77,6 @@ static inline void rseq_sched_switch_eve
 static inline void rseq_virt_userspace_exit(void) { }
 static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
 static inline void rseq_execve(struct task_struct *t) { }
-static inline void rseq_exit_to_user_mode(void) { }
 #endif  /* !CONFIG_RSEQ */
 
 #ifdef CONFIG_DEBUG_RSEQ
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -2,6 +2,37 @@
 #ifndef _LINUX_RSEQ_ENTRY_H
 #define _LINUX_RSEQ_ENTRY_H
 
+/* Must be outside the CONFIG_RSEQ guard to resolve the stubs */
+#ifdef CONFIG_RSEQ_STATS
+#include <linux/percpu.h>
+
+struct rseq_stats {
+	unsigned long	exit;
+	unsigned long	signal;
+	unsigned long	slowpath;
+	unsigned long	ids;
+	unsigned long	cs;
+	unsigned long	clear;
+	unsigned long	fixup;
+};
+
+DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
+
+/*
+ * Slow path has interrupts and preemption enabled, but the fast path
+ * runs with interrupts disabled so there is no point in having the
+ * preemption checks implied in __this_cpu_inc() for every operation.
+ */
+#ifdef RSEQ_BUILD_SLOW_PATH
+#define rseq_stat_inc(which)	this_cpu_inc((which))
+#else
+#define rseq_stat_inc(which)	raw_cpu_inc((which))
+#endif
+
+#else /* CONFIG_RSEQ_STATS */
+#define rseq_stat_inc(x)	do { } while (0)
+#endif /* !CONFIG_RSEQ_STATS */
+
 #ifdef CONFIG_RSEQ
 #include <linux/rseq.h>
 
@@ -39,8 +70,26 @@ static __always_inline void rseq_note_us
 		current->rseq.event.user_irq = true;
 }
 
+static __always_inline void rseq_exit_to_user_mode(void)
+{
+	struct rseq_event *ev = &current->rseq.event;
+
+	rseq_stat_inc(rseq_stats.exit);
+
+	if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
+		WARN_ON_ONCE(ev->sched_switch);
+
+	/*
+	 * Ensure that event (especially user_irq) is cleared when the
+	 * interrupt did not result in a schedule and therefore the
+	 * rseq processing did not clear it.
+	 */
+	ev->events = 0;
+}
+
 #else /* CONFIG_RSEQ */
 static inline void rseq_note_user_irq_entry(void) { }
+static inline void rseq_exit_to_user_mode(void) { }
 #endif /* !CONFIG_RSEQ */
 
 #endif /* _LINUX_RSEQ_ENTRY_H */
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1883,6 +1883,18 @@ config RSEQ
 
 	  If unsure, say Y.
 
+config RSEQ_STATS
+	default n
+	bool "Enable lightweight statistics of restartable sequences" if EXPERT
+	depends on RSEQ && DEBUG_FS
+	help
+	  Enable lightweight counters which expose information about the
+	  frequency of RSEQ operations via debugfs. Mostly interesting for
+	  kernel debugging or performance analysis. While lightweight it's
+	  still adding code into the user/kernel mode transitions.
+
+	  If unsure, say N.
+
 config DEBUG_RSEQ
 	default n
 	bool "Enable debugging of rseq() system call" if EXPERT
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -67,12 +67,16 @@
  *   F1. <failure>
  */
 
+/* Required to select the proper per_cpu ops for rseq_stats_inc() */
+#define RSEQ_BUILD_SLOW_PATH
+
+#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
+#include <linux/rseq_entry.h>
 #include <linux/sched.h>
-#include <linux/uaccess.h>
 #include <linux/syscalls.h>
-#include <linux/rseq.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
-#include <linux/ratelimit.h>
 #include <asm/ptrace.h>
 
 #define CREATE_TRACE_POINTS
@@ -108,6 +112,56 @@ void __rseq_trace_ip_fixup(unsigned long
 }
 #endif /* CONFIG_TRACEPOINTS */
 
+#ifdef CONFIG_RSEQ_STATS
+DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
+
+static int rseq_debug_show(struct seq_file *m, void *p)
+{
+	struct rseq_stats stats = { };
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		stats.exit	+= data_race(per_cpu(rseq_stats.exit, cpu));
+		stats.signal	+= data_race(per_cpu(rseq_stats.signal, cpu));
+		stats.slowpath	+= data_race(per_cpu(rseq_stats.slowpath, cpu));
+		stats.ids	+= data_race(per_cpu(rseq_stats.ids, cpu));
+		stats.cs	+= data_race(per_cpu(rseq_stats.cs, cpu));
+		stats.clear	+= data_race(per_cpu(rseq_stats.clear, cpu));
+		stats.fixup	+= data_race(per_cpu(rseq_stats.fixup, cpu));
+	}
+
+	seq_printf(m, "exit:   %16lu\n", stats.exit);
+	seq_printf(m, "signal: %16lu\n", stats.signal);
+	seq_printf(m, "slowp:  %16lu\n", stats.slowpath);
+	seq_printf(m, "ids:    %16lu\n", stats.ids);
+	seq_printf(m, "cs:     %16lu\n", stats.cs);
+	seq_printf(m, "clear:  %16lu\n", stats.clear);
+	seq_printf(m, "fixup:  %16lu\n", stats.fixup);
+	return 0;
+}
+
+static int rseq_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rseq_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_ops = {
+	.open		= rseq_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init rseq_debugfs_init(void)
+{
+	struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
+
+	debugfs_create_file("stats", 0444, root_dir, NULL, &dfs_ops);
+	return 0;
+}
+__initcall(rseq_debugfs_init);
+#endif /* CONFIG_RSEQ_STATS */
+
 #ifdef CONFIG_DEBUG_RSEQ
 static struct rseq *rseq_kernel_fields(struct task_struct *t)
 {
@@ -187,12 +241,13 @@ static int rseq_update_cpu_node_id(struc
 	u32 node_id = cpu_to_node(cpu_id);
 	u32 mm_cid = task_mm_cid(t);
 
-	/*
-	 * Validate read-only rseq fields.
-	 */
+	rseq_stat_inc(rseq_stats.ids);
+
+	/* Validate read-only rseq fields on debug kernels */
 	if (rseq_validate_ro_fields(t))
 		goto efault;
 	WARN_ON_ONCE((int) mm_cid < 0);
+
 	if (!user_write_access_begin(rseq, t->rseq.len))
 		goto efault;
 
@@ -403,6 +458,8 @@ static int rseq_ip_fixup(struct pt_regs
 	struct rseq_cs rseq_cs;
 	int ret;
 
+	rseq_stat_inc(rseq_stats.cs);
+
 	ret = rseq_get_rseq_cs(t, &rseq_cs);
 	if (ret)
 		return ret;
@@ -412,8 +469,10 @@ static int rseq_ip_fixup(struct pt_regs
 	 * If not nested over a rseq critical section, restart is useless.
 	 * Clear the rseq_cs pointer and return.
 	 */
-	if (!in_rseq_cs(ip, &rseq_cs))
+	if (!in_rseq_cs(ip, &rseq_cs)) {
+		rseq_stat_inc(rseq_stats.clear);
 		return clear_rseq_cs(t->rseq.usrptr);
+	}
 	ret = rseq_check_flags(t, rseq_cs.flags);
 	if (ret < 0)
 		return ret;
@@ -422,6 +481,7 @@ static int rseq_ip_fixup(struct pt_regs
 	ret = clear_rseq_cs(t->rseq.usrptr);
 	if (ret)
 		return ret;
+	rseq_stat_inc(rseq_stats.fixup);
 	trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
 			    rseq_cs.abort_ip);
 	instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
@@ -462,6 +522,11 @@ void __rseq_handle_notify_resume(struct
 	if (unlikely(t->flags & PF_EXITING))
 		return;
 
+	if (ksig)
+		rseq_stat_inc(rseq_stats.signal);
+	else
+		rseq_stat_inc(rseq_stats.slowpath);
+
 	/*
 	 * Read and clear the event pending bit first. If the task
 	 * was not preempted or migrated or a signal is on the way,


  parent reply	other threads:[~2025-09-08 21:32 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-08 21:31 [patch V4 00/36] rseq: Optimize exit to user space Thomas Gleixner
2025-09-08 21:31 ` [patch V4 01/36] rseq: Avoid pointless evaluation in __rseq_notify_resume() Thomas Gleixner
2025-09-08 21:31 ` [patch V4 02/36] rseq: Condense the inline stubs Thomas Gleixner
2025-09-08 21:31 ` [patch V4 03/36] rseq: Move algorithm comment to top Thomas Gleixner
2025-09-08 21:31 ` [patch V4 04/36] rseq: Remove the ksig argument from rseq_handle_notify_resume() Thomas Gleixner
2025-09-08 21:31 ` [patch V4 05/36] rseq: Simplify registration Thomas Gleixner
2025-09-08 21:31 ` [patch V4 06/36] rseq: Simplify the event notification Thomas Gleixner
2025-09-09 13:18   ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 07/36] rseq, virt: Retrigger RSEQ after vcpu_run() Thomas Gleixner
2025-09-09  0:00   ` Sean Christopherson
2025-09-09 12:10     ` Thomas Gleixner
2025-09-09 13:21   ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 08/36] rseq: Avoid CPU/MM CID updates when no event pending Thomas Gleixner
2025-09-09 13:25   ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 09/36] rseq: Introduce struct rseq_data Thomas Gleixner
2025-09-09 13:30   ` Mathieu Desnoyers
2025-09-12 20:44     ` Thomas Gleixner
2025-09-12 21:33       ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 10/36] entry: Cleanup header Thomas Gleixner
2025-09-08 21:31 ` [patch V4 11/36] entry: Remove syscall_enter_from_user_mode_prepare() Thomas Gleixner
2025-09-09 13:33   ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 12/36] entry: Inline irqentry_enter/exit_from/to_user_mode() Thomas Gleixner
2025-09-09 13:38   ` Mathieu Desnoyers
2025-09-09 14:10     ` Thomas Gleixner
2025-09-09 14:59       ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 13/36] sched: Move MM CID related functions to sched.h Thomas Gleixner
2025-09-08 21:31 ` [patch V4 14/36] rseq: Cache CPU ID and MM CID values Thomas Gleixner
2025-09-09 13:43   ` Mathieu Desnoyers
2025-09-09 14:13     ` Thomas Gleixner
2025-09-09 15:01       ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 15/36] rseq: Record interrupt from user space Thomas Gleixner
2025-09-09 13:53   ` Mathieu Desnoyers
2025-09-09 14:17     ` Thomas Gleixner
2025-09-09 15:05       ` Mathieu Desnoyers
2025-09-08 21:31 ` [patch V4 16/36] rseq: Provide tracepoint wrappers for inline code Thomas Gleixner
2025-09-08 21:31 ` Thomas Gleixner [this message]
2025-09-08 21:32 ` [patch V4 18/36] rseq: Provide static branch for runtime debugging Thomas Gleixner
2025-09-08 21:32 ` [patch V4 19/36] rseq: Provide and use rseq_update_user_cs() Thomas Gleixner
2025-09-09 15:11   ` Mathieu Desnoyers
2025-09-08 21:32 ` [patch V4 20/36] rseq: Replace the original debug implementation Thomas Gleixner
2025-09-08 21:32 ` [patch V4 21/36] rseq: Make exit debugging static branch based Thomas Gleixner
2025-09-08 21:32 ` [patch V4 22/36] rseq: Use static branch for syscall exit debug when GENERIC_IRQ_ENTRY=y Thomas Gleixner
2025-09-08 21:32 ` [patch V4 23/36] rseq: Provide and use rseq_set_ids() Thomas Gleixner
2025-09-11 13:40   ` Mathieu Desnoyers
2025-09-11 16:02     ` Thomas Gleixner
2025-09-11 17:13       ` Mathieu Desnoyers
2025-09-08 21:32 ` [patch V4 24/36] rseq: Separate the signal delivery path Thomas Gleixner
2025-09-08 21:32 ` [patch V4 25/36] rseq: Rework the TIF_NOTIFY handler Thomas Gleixner
2025-09-08 21:32 ` [patch V4 26/36] rseq: Optimize event setting Thomas Gleixner
2025-09-11 14:03   ` Mathieu Desnoyers
2025-09-11 16:06     ` Thomas Gleixner
2025-09-11 17:15       ` Mathieu Desnoyers
2025-09-12  6:58         ` Thomas Gleixner
2025-09-08 21:32 ` [patch V4 27/36] rseq: Implement fast path for exit to user Thomas Gleixner
2025-09-11 14:27   ` Mathieu Desnoyers
2025-09-11 16:08     ` Thomas Gleixner
2025-09-08 21:32 ` [patch V4 28/36] rseq: Switch to fast path processing on " Thomas Gleixner
2025-09-11 14:44   ` Mathieu Desnoyers
2025-09-11 14:45     ` Mathieu Desnoyers
2025-09-11 16:50       ` Thomas Gleixner
2025-09-11 16:47     ` Thomas Gleixner
2025-09-11 20:00       ` Mathieu Desnoyers
2025-09-12 14:22         ` Thomas Gleixner
2025-09-12 15:44           ` Mathieu Desnoyers
2025-09-08 21:32 ` [patch V4 29/36] entry: Split up exit_to_user_mode_prepare() Thomas Gleixner
2025-09-08 21:32 ` [patch V4 30/36] rseq: Split up rseq_exit_to_user_mode() Thomas Gleixner
2025-09-08 21:32 ` [patch V4 31/36] asm-generic: Provide generic TIF infrastructure Thomas Gleixner
2025-09-17  6:16   ` [tip: core/core] " tip-bot2 for Thomas Gleixner
2025-09-08 21:32 ` [patch V4 32/36] x86: Use generic TIF bits Thomas Gleixner
2025-09-17  6:16   ` [tip: core/core] " tip-bot2 for Thomas Gleixner
2025-09-08 21:32 ` [patch V4 33/36] s390: " Thomas Gleixner
2025-09-11  9:11   ` Sven Schnelle
2025-09-11 11:03   ` Heiko Carstens
2025-09-17  6:16   ` [tip: core/core] " tip-bot2 for Thomas Gleixner
2025-09-08 21:32 ` [patch V4 34/36] loongarch: " Thomas Gleixner
2025-09-17  6:16   ` [tip: core/core] " tip-bot2 for Thomas Gleixner
2025-09-08 21:32 ` [patch V4 35/36] riscv: " Thomas Gleixner
2025-09-17  6:16   ` [tip: core/core] " tip-bot2 for Thomas Gleixner
2025-09-08 21:32 ` [patch V4 36/36] rseq: Switch to TIF_RSEQ if supported Thomas Gleixner
2025-09-10 13:55 ` [patch V4 00/36] rseq: Optimize exit to user space Jens Axboe
2025-09-10 14:45   ` Michael Jeanson
2025-09-10 15:34     ` Jens Axboe
2025-09-10 14:54   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250908212926.352693438@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=arnd@arndb.de \
    --cc=axboe@kernel.dk \
    --cc=boqun.feng@gmail.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=chenhuacai@kernel.org \
    --cc=decui@microsoft.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mjeanson@efficios.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=seanjc@google.com \
    --cc=svens@linux.ibm.com \
    --cc=wei.liu@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox