From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Jeanson <mjeanson@efficios.com>,
Jens Axboe <axboe@kernel.dk>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Peter Zijlstra <peterz@infradead.org>,
"Paul E. McKenney" <paulmck@kernel.org>,
x86@kernel.org, Sean Christopherson <seanjc@google.com>,
Wei Liu <wei.liu@kernel.org>
Subject: [patch V6 25/31] rseq: Rework the TIF_NOTIFY handler
Date: Mon, 27 Oct 2025 09:45:12 +0100 (CET) [thread overview]
Message-ID: <20251027084307.517640811@linutronix.de> (raw)
In-Reply-To: 20251027084220.785525188@linutronix.de
Replace the whole logic with a new implementation, which is shared with
signal delivery and the upcoming exit fast path.
Contrary to the original implementation, this ignores invocations from
KVM/IO-uring, which invoke resume_user_mode_work() with the @regs argument
set to NULL.
The original implementation updated the CPU/Node/MM CID fields, but that
was just a side effect, which was addressing the problem that this
invocation cleared TIF_NOTIFY_RESUME, which in turn could cause an update
on return to user space to be lost.
This problem has been addressed differently, so that it's not longer
required to do that update before entering the guest.
That might be considered a user visible change, when the hosts thread TLS
memory is mapped into the guest, but as this was never intentionally
supported, this abuse of kernel internal implementation details is not
considered an ABI break.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
V3: Moved rseq_update_usr() to this one - Mathieu
Documented the KVM visible change - Sean
---
include/linux/rseq_entry.h | 29 +++++++++++++++++
kernel/rseq.c | 76 +++++++++++++++++++--------------------------
2 files changed, 62 insertions(+), 43 deletions(-)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -368,6 +368,35 @@ bool rseq_set_ids_get_csaddr(struct task
return false;
}
+/*
+ * Update user space with new IDs and conditionally check whether the task
+ * is in a critical section.
+ */
+static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
+ struct rseq_ids *ids, u32 node_id)
+{
+ u64 csaddr;
+
+ if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
+ return false;
+
+ /*
+ * On architectures which utilize the generic entry code this
+ * allows to skip the critical section when the entry was not from
+ * a user space interrupt, unless debug mode is enabled.
+ */
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ if (!static_branch_unlikely(&rseq_debug_enabled)) {
+ if (likely(!t->rseq.event.user_irq))
+ return true;
+ }
+ }
+ if (likely(!csaddr))
+ return true;
+ /* Sigh, this really needs to do work */
+ return rseq_update_user_cs(t, regs, csaddr);
+}
+
static __always_inline void rseq_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -82,12 +82,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
static inline void rseq_control_debug(bool on)
@@ -239,38 +233,15 @@ static bool rseq_handle_cs(struct task_s
return false;
}
-/*
- * This resume handler must always be executed between any of:
- * - preemption,
- * - signal delivery,
- * and return to user-space.
- *
- * This is how we can ensure that the entire rseq critical section
- * will issue the commit instruction only if executed atomically with
- * respect to other threads scheduled on the same CPU, and with respect
- * to signal handlers.
- */
-void __rseq_handle_notify_resume(struct pt_regs *regs)
+static void rseq_slowpath_update_usr(struct pt_regs *regs)
{
+ /* Preserve rseq state and user_irq state for exit to user */
+ const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, };
struct task_struct *t = current;
struct rseq_ids ids;
u32 node_id;
bool event;
- /*
- * If invoked from hypervisors before entering the guest via
- * resume_user_mode_work(), then @regs is a NULL pointer.
- *
- * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
- * it before returning from the ioctl() to user space when
- * rseq_event.sched_switch is set.
- *
- * So it's safe to ignore here instead of pointlessly updating it
- * in the vcpu_run() loop.
- */
- if (!regs)
- return;
-
if (unlikely(t->flags & PF_EXITING))
return;
@@ -294,26 +265,45 @@ void __rseq_handle_notify_resume(struct
* with the result handed in to allow the detection of
* inconsistencies.
*/
- scoped_guard(RSEQ_EVENT_GUARD) {
+ scoped_guard(irq) {
event = t->rseq.event.sched_switch;
- t->rseq.event.sched_switch = false;
+ t->rseq.event.all &= evt_mask.all;
ids.cpu_id = task_cpu(t);
ids.mm_cid = task_mm_cid(t);
}
- if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
+ if (!event)
return;
- if (!rseq_handle_cs(t, regs))
- goto error;
-
node_id = cpu_to_node(ids.cpu_id);
- if (!rseq_set_ids(t, &ids, node_id))
- goto error;
- return;
-error:
- force_sig(SIGSEGV);
+ if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) {
+ /*
+ * Clear the errors just in case this might survive magically, but
+ * leave the rest intact.
+ */
+ t->rseq.event.error = 0;
+ force_sig(SIGSEGV);
+ }
+}
+
+void __rseq_handle_notify_resume(struct pt_regs *regs)
+{
+ /*
+ * If invoked from hypervisors before entering the guest via
+ * resume_user_mode_work(), then @regs is a NULL pointer.
+ *
+ * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
+ * it before returning from the ioctl() to user space when
+ * rseq_event.sched_switch is set.
+ *
+ * So it's safe to ignore here instead of pointlessly updating it
+ * in the vcpu_run() loop.
+ */
+ if (!regs)
+ return;
+
+ rseq_slowpath_update_usr(regs);
}
void __rseq_signal_deliver(int sig, struct pt_regs *regs)
next prev parent reply other threads:[~2025-10-27 8:45 UTC|newest]
Thread overview: 142+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 8:44 [patch V6 00/31] rseq: Optimize exit to user space Thomas Gleixner
2025-10-27 8:44 ` [patch V6 01/31] rseq: Avoid pointless evaluation in __rseq_notify_resume() Thomas Gleixner
2025-10-29 10:24 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 02/31] rseq: Condense the inline stubs Thomas Gleixner
2025-10-29 10:24 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 03/31] rseq: Move algorithm comment to top Thomas Gleixner
2025-10-29 10:24 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 04/31] rseq: Remove the ksig argument from rseq_handle_notify_resume() Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 05/31] rseq: Simplify registration Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 06/31] rseq: Simplify the event notification Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 07/31] rseq, virt: Retrigger RSEQ after vcpu_run() Thomas Gleixner
2025-10-28 15:08 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 08/31] rseq: Avoid CPU/MM CID updates when no event pending Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 09/31] rseq: Introduce struct rseq_data Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 10/31] entry: Cleanup header Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` [tip: core/rseq] entry: Clean up header tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 11/31] entry: Remove syscall_enter_from_user_mode_prepare() Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 12/31] entry: Inline irqentry_enter/exit_from/to_user_mode() Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 13/31] sched: Move MM CID related functions to sched.h Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 14/31] rseq: Cache CPU ID and MM CID values Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 15/31] rseq: Record interrupt from user space Thomas Gleixner
2025-10-28 15:26 ` Mathieu Desnoyers
2025-10-28 17:02 ` Thomas Gleixner
2025-10-28 17:53 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 16/31] rseq: Provide tracepoint wrappers for inline code Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 17/31] rseq: Expose lightweight statistics in debugfs Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 18/31] rseq: Provide static branch for runtime debugging Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:44 ` [patch V6 19/31] rseq: Provide and use rseq_update_user_cs() Thomas Gleixner
2025-10-28 15:40 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-10-29 16:04 ` [patch V6 19/31] " Steven Rostedt
2025-10-29 21:00 ` Thomas Gleixner
2025-10-29 21:53 ` Steven Rostedt
2025-11-03 14:47 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 20/31] rseq: Replace the original debug implementation Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-10-30 21:52 ` [patch V6 20/31] " Prakash Sangappa
2025-10-31 14:27 ` Thomas Gleixner
2025-11-03 14:47 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 21/31] rseq: Make exit debugging static branch based Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 22/31] rseq: Use static branch for syscall exit debug when GENERIC_IRQ_ENTRY=y Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 23/31] rseq: Provide and use rseq_set_ids() Thomas Gleixner
2025-10-28 15:47 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 24/31] rseq: Separate the signal delivery path Thomas Gleixner
2025-10-28 15:51 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` Thomas Gleixner [this message]
2025-10-29 10:23 ` [tip: core/rseq] rseq: Rework the TIF_NOTIFY handler tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:17 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 26/31] rseq: Optimize event setting Thomas Gleixner
2025-10-28 15:57 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 27/31] rseq: Implement fast path for exit to user Thomas Gleixner
2025-10-28 16:09 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-10-29 16:28 ` [patch V6 27/31] " Steven Rostedt
2025-11-03 14:47 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 28/31] rseq: Switch to fast path processing on " Thomas Gleixner
2025-10-28 16:14 ` Mathieu Desnoyers
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 29/31] entry: Split up exit_to_user_mode_prepare() Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 30/31] rseq: Split up rseq_exit_to_user_mode() Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-27 8:45 ` [patch V6 31/31] rseq: Switch to TIF_RSEQ if supported Thomas Gleixner
2025-10-29 10:23 ` [tip: core/rseq] " tip-bot2 for Thomas Gleixner
2025-11-03 14:47 ` tip-bot2 for Thomas Gleixner
2025-11-04 8:16 ` tip-bot2 for Thomas Gleixner
2025-10-29 10:23 ` [patch V6 00/31] rseq: Optimize exit to user space Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251027084307.517640811@linutronix.de \
--to=tglx@linutronix.de \
--cc=axboe@kernel.dk \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mjeanson@efficios.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=seanjc@google.com \
--cc=wei.liu@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox