linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>,
	Peter Zijlstra <peterz@infradead.org>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Boqun Feng <boqun.feng@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Wei Liu <wei.liu@kernel.org>, Dexuan Cui <decui@microsoft.com>,
	x86@kernel.org, Arnd Bergmann <arnd@arndb.de>,
	Heiko Carstens <hca@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	Huacai Chen <chenhuacai@kernel.org>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>
Subject: Re: [patch V2 09/37] rseq: Introduce struct rseq_event
Date: Mon, 25 Aug 2025 14:11:37 -0400	[thread overview]
Message-ID: <9a94b9d4-708c-476a-bf7d-7deb1c14f1ac@efficios.com> (raw)
In-Reply-To: <20250823161653.843757955@linutronix.de>

On 2025-08-23 12:39, Thomas Gleixner wrote:
> In preparation for a major rewrite of this code, provide a data structure
> for event management.
> 
> Put the sched_switch event and a indicator for RSEQ on a task into it as a
> start. That uses a union, which allows to mask and clear the whole lot
> efficiently.
> 
> The indicators are explicitely not a bit field. Bit fields generate abysmal

explicitly

> code.
> 
> The boolean members are defined as u8 as that actually guarantees that it
> fits. There seem to be strange architecture ABIs which need more than 8bits
> for a boolean.
> 
> The has_rseq member is redudandant vs. task::rseq, but it turns out that

redundant

> boolean operations and quick checks on the union generate better code than
> fiddling with seperate entities and data types.

separate

> 
> This struct will be extended over time to carry more information.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   include/linux/rseq.h       |   23 ++++++++++++-----------
>   include/linux/rseq_types.h |   30 ++++++++++++++++++++++++++++++
>   include/linux/sched.h      |    7 ++-----
>   kernel/rseq.c              |    6 ++++--
>   4 files changed, 48 insertions(+), 18 deletions(-)
> 
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
>   
>   static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>   {
> -	if (current->rseq)
> +	if (current->rseq_event.has_rseq)
>   		__rseq_handle_notify_resume(NULL, regs);
>   }
>   
>   static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
>   {
> -	if (current->rseq) {
> -		current->rseq_event_pending = true;
> +	if (current->rseq_event.has_rseq) {
> +		current->rseq_event.sched_switch = true;
>   		__rseq_handle_notify_resume(ksig, regs);
>   	}
>   }
>   
>   static inline void rseq_sched_switch_event(struct task_struct *t)
>   {
> -	if (t->rseq) {
> -		t->rseq_event_pending = true;
> +	if (t->rseq_event.has_rseq) {
> +		t->rseq_event.sched_switch = true;
>   		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
>   	}
>   }
> @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
>   static __always_inline void rseq_exit_to_user_mode(void)
>   {
>   	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
> -			current->rseq_event_pending = false;
> +		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
> +				 current->rseq_event.events))
> +			current->rseq_event.events = 0;
>   	}
>   }
>   
> @@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
>    */
>   static inline void rseq_virt_userspace_exit(void)
>   {
> -	if (current->rseq_event_pending)
> +	if (current->rseq_event.sched_switch)
>   		set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
>   }
>   
> @@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
>   		t->rseq = NULL;
>   		t->rseq_len = 0;
>   		t->rseq_sig = 0;
> -		t->rseq_event_pending = false;
> +		t->rseq_event.all = 0;
>   	} else {
>   		t->rseq = current->rseq;
>   		t->rseq_len = current->rseq_len;
>   		t->rseq_sig = current->rseq_sig;
> -		t->rseq_event_pending = current->rseq_event_pending;
> +		t->rseq_event = current->rseq_event;
>   	}
>   }
>   
> @@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
>   	t->rseq = NULL;
>   	t->rseq_len = 0;
>   	t->rseq_sig = 0;
> -	t->rseq_event_pending = false;
> +	t->rseq_event.all = 0;
>   }
>   
>   #else /* CONFIG_RSEQ */
> --- /dev/null
> +++ b/include/linux/rseq_types.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_TYPES_H
> +#define _LINUX_RSEQ_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * struct rseq_event - Storage for rseq related event management
> + * @all:		Compound to initialize and clear the data efficiently
> + * @events:		Compund to access events with a single load/store

Compound

> + * @sched_switch:	True if the task was scheduled out
> + * @has_rseq:		True if the task has a rseq pointer installed
> + */
> +struct rseq_event {
> +	union {
> +		u32				all;
> +		struct {
> +			union {
> +				u16		events;
> +				struct {
> +					u8	sched_switch;
> +				};

Is alpha still supported, or can we assume bytewise loads/stores ?

Are those events meant to each consume 1 byte (which limits us to 2
events for a 2-byte "events"/4-byte "all"), or is the plan to update
them with bitwise or/~ and ?

Thanks,

Mathieu

> +			};
> +
> +			u8			has_rseq;
> +		};
> +	};
> +};
> +
> +#endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -41,6 +41,7 @@
>   #include <linux/task_io_accounting.h>
>   #include <linux/posix-timers_types.h>
>   #include <linux/restart_block.h>
> +#include <linux/rseq_types.h>
>   #include <uapi/linux/rseq.h>
>   #include <linux/seqlock_types.h>
>   #include <linux/kcsan.h>
> @@ -1404,11 +1405,7 @@ struct task_struct {
>   	struct rseq __user		*rseq;
>   	u32				rseq_len;
>   	u32				rseq_sig;
> -	/*
> -	 * RmW on rseq_event_pending must be performed atomically
> -	 * with respect to preemption.
> -	 */
> -	bool				rseq_event_pending;
> +	struct rseq_event		rseq_event;
>   # ifdef CONFIG_DEBUG_RSEQ
>   	/*
>   	 * This is a place holder to save a copy of the rseq fields for
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
>   	 * inconsistencies.
>   	 */
>   	scoped_guard(RSEQ_EVENT_GUARD) {
> -		event = t->rseq_event_pending;
> -		t->rseq_event_pending = false;
> +		event = t->rseq_event.sched_switch;
> +		t->rseq_event.sched_switch = false;
>   	}
>   
>   	if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   		current->rseq = NULL;
>   		current->rseq_sig = 0;
>   		current->rseq_len = 0;
> +		current->rseq_event.all = 0;
>   		return 0;
>   	}
>   
> @@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   	 * registered, ensure the cpu_id_start and cpu_id fields
>   	 * are updated before returning to user-space.
>   	 */
> +	current->rseq_event.has_rseq = true;
>   	rseq_sched_switch_event(current);
>   
>   	return 0;
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

  reply	other threads:[~2025-08-25 18:11 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-23 16:39 [patch V2 00/37] rseq: Optimize exit to user space Thomas Gleixner
2025-08-23 16:39 ` [patch V2 01/37] rseq: Avoid pointless evaluation in __rseq_notify_resume() Thomas Gleixner
2025-08-25 15:39   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 02/37] rseq: Condense the inline stubs Thomas Gleixner
2025-08-25 15:40   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 03/37] resq: Move algorithm comment to top Thomas Gleixner
2025-08-25 15:41   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 04/37] rseq: Remove the ksig argument from rseq_handle_notify_resume() Thomas Gleixner
2025-08-25 15:43   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 05/37] rseq: Simplify registration Thomas Gleixner
2025-08-25 15:44   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 06/37] rseq: Simplify the event notification Thomas Gleixner
2025-08-25 17:36   ` Mathieu Desnoyers
2025-09-02 13:39     ` Thomas Gleixner
2025-09-04 17:19       ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 07/37] rseq, virt: Retrigger RSEQ after vcpu_run() Thomas Gleixner
2025-08-25 17:54   ` Mathieu Desnoyers
2025-08-25 20:24     ` Sean Christopherson
2025-09-02 15:37       ` Thomas Gleixner
2025-08-23 16:39 ` [patch V2 08/37] rseq: Avoid CPU/MM CID updates when no event pending Thomas Gleixner
2025-08-25 18:02   ` Mathieu Desnoyers
2025-09-02 13:41     ` Thomas Gleixner
2025-09-04 17:20       ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 09/37] rseq: Introduce struct rseq_event Thomas Gleixner
2025-08-25 18:11   ` Mathieu Desnoyers [this message]
2025-09-02 13:45     ` Thomas Gleixner
2025-08-23 16:39 ` [patch V2 10/37] entry: Cleanup header Thomas Gleixner
2025-08-25 18:13   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 11/37] entry: Remove syscall_enter_from_user_mode_prepare() Thomas Gleixner
2025-08-23 16:39 ` [patch V2 12/37] entry: Inline irqentry_enter/exit_from/to_user_mode() Thomas Gleixner
2025-08-23 16:39 ` [patch V2 13/37] sched: Move MM CID related functions to sched.h Thomas Gleixner
2025-08-25 18:14   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 14/37] rseq: Cache CPU ID and MM CID values Thomas Gleixner
2025-08-25 18:19   ` Mathieu Desnoyers
2025-09-02 13:48     ` Thomas Gleixner
2025-09-04 17:21       ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 15/37] rseq: Record interrupt from user space Thomas Gleixner
2025-08-25 18:29   ` Mathieu Desnoyers
2025-09-02 13:54     ` Thomas Gleixner
2025-08-23 16:39 ` [patch V2 16/37] rseq: Provide tracepoint wrappers for inline code Thomas Gleixner
2025-08-25 18:32   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 17/37] rseq: Expose lightweight statistics in debugfs Thomas Gleixner
2025-08-25 18:34   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 18/37] rseq: Provide static branch for runtime debugging Thomas Gleixner
2025-08-25 18:36   ` Mathieu Desnoyers
2025-08-25 20:30   ` Michael Jeanson
2025-09-02 13:56     ` Thomas Gleixner
2025-08-23 16:39 ` [patch V2 19/37] rseq: Provide and use rseq_update_user_cs() Thomas Gleixner
2025-08-25 19:16   ` Mathieu Desnoyers
2025-09-02 15:19     ` Thomas Gleixner
2025-08-23 16:39 ` [patch V2 20/37] rseq: Replace the debug crud Thomas Gleixner
2025-08-26 14:21   ` Mathieu Desnoyers
2025-08-23 16:39 ` [patch V2 21/37] rseq: Make exit debugging static branch based Thomas Gleixner
2025-08-26 14:23   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 22/37] rseq: Use static branch for syscall exit debug when GENERIC_IRQ_ENTRY=y Thomas Gleixner
2025-08-26 14:28   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 23/37] rseq: Provide and use rseq_set_uids() Thomas Gleixner
2025-08-26 14:52   ` Mathieu Desnoyers
2025-09-02 14:08     ` Thomas Gleixner
2025-09-02 16:33       ` Thomas Gleixner
2025-09-04 17:25         ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 24/37] rseq: Seperate the signal delivery path Thomas Gleixner
2025-08-26 15:08   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 25/37] rseq: Rework the TIF_NOTIFY handler Thomas Gleixner
2025-08-26 15:12   ` Mathieu Desnoyers
2025-09-02 17:32     ` Thomas Gleixner
2025-09-04  9:52       ` Sean Christopherson
2025-09-04 10:53         ` Thomas Gleixner
2025-09-04 17:07           ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 26/37] rseq: Optimize event setting Thomas Gleixner
2025-08-26 15:26   ` Mathieu Desnoyers
2025-09-02 14:17     ` Thomas Gleixner
2025-08-23 16:40 ` [patch V2 27/37] rseq: Implement fast path for exit to user Thomas Gleixner
2025-08-26 15:33   ` Mathieu Desnoyers
2025-09-02 18:31     ` Thomas Gleixner
2025-08-23 16:40 ` [patch V2 28/37] rseq: Switch to fast path processing on " Thomas Gleixner
2025-08-26 15:40   ` Mathieu Desnoyers
2025-08-27 13:45     ` Mathieu Desnoyers
2025-09-02 18:36       ` Thomas Gleixner
2025-09-04 17:54         ` Mathieu Desnoyers
2025-09-04 21:31           ` Thomas Gleixner
2025-08-23 16:40 ` [patch V2 29/37] entry: Split up exit_to_user_mode_prepare() Thomas Gleixner
2025-08-26 15:41   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 30/37] rseq: Split up rseq_exit_to_user_mode() Thomas Gleixner
2025-08-26 15:45   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 31/37] asm-generic: Provide generic TIF infrastructure Thomas Gleixner
2025-08-23 20:37   ` Arnd Bergmann
2025-08-25 19:33   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 32/37] x86: Use generic TIF bits Thomas Gleixner
2025-08-25 19:34   ` Mathieu Desnoyers
2025-08-23 16:40 ` [patch V2 33/37] s390: " Thomas Gleixner
2025-08-23 16:40 ` [patch V2 34/37] loongarch: " Thomas Gleixner
2025-08-23 16:40 ` [patch V2 35/37] riscv: " Thomas Gleixner
2025-08-23 16:40 ` [patch V2 36/37] rseq: Switch to TIF_RSEQ if supported Thomas Gleixner
2025-08-25 19:39   ` Mathieu Desnoyers
2025-08-25 20:02   ` Sean Christopherson
2025-09-02 11:03     ` Thomas Gleixner
2025-09-04 10:08       ` Sean Christopherson
2025-09-04 12:26         ` Thomas Gleixner
2025-08-23 16:40 ` [patch V2 37/37] entry/rseq: Optimize for TIF_RSEQ on exit Thomas Gleixner
2025-08-25 19:43   ` Mathieu Desnoyers
2025-08-25 15:10 ` [patch V2 00/37] rseq: Optimize exit to user space Mathieu Desnoyers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9a94b9d4-708c-476a-bf7d-7deb1c14f1ac@efficios.com \
    --to=mathieu.desnoyers@efficios.com \
    --cc=arnd@arndb.de \
    --cc=axboe@kernel.dk \
    --cc=boqun.feng@gmail.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=chenhuacai@kernel.org \
    --cc=decui@microsoft.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=seanjc@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=wei.liu@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).