public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: LKML <linux-kernel@vger.kernel.org>,
	"Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>,
	"Andrè Almeida" <andrealmeid@igalia.com>,
	"Carlos O'Donell" <carlos@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Florian Weimer" <fweimer@redhat.com>,
	"Rich Felker" <dalias@aerifal.cx>,
	"Torvald Riegel" <triegel@redhat.com>,
	"Darren Hart" <dvhart@infradead.org>,
	"Ingo Molnar" <mingo@kernel.org>,
	"Davidlohr Bueso" <dave@stgolabs.net>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	"Uros Bizjak" <ubizjak@gmail.com>,
	"Thomas Weißschuh" <linux@weissschuh.net>
Subject: Re: [PATCH 15/14] selftests: futex: Add tests for robust unlock within the critical section.
Date: Sat, 04 Apr 2026 22:13:28 +0200	[thread overview]
Message-ID: <87mrzi5vrr.ffs@tglx> (raw)
In-Reply-To: <20260404093939.7XgeW_54@linutronix.de>

On Sat, Apr 04 2026 at 11:39, Sebastian Andrzej Siewior wrote:
> From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
>
> I took Thomas’ initial test case from the cover letter and reworked it
> so that it uses ptrace() to single‑step through the VDSO unlock
> operation. The test expects the lock to remain locked, with
> `list_op_pending' pointing somewhere, when entering the VDSO unlock
> path. Once execution steps into the critical section, it expects the
> kernel to perform the fixup that is, to unlock the lock and clear
> `list_op_pending'.

No. The kernel clears list_op_pending when the unlock via CMPXCHG was
successful.

The unlock and clear combo happens in the syscall with the UNLOCK
modifier set.

> +static char *vdso_dbg_file =
> +#ifdef __LP64__
> +	"vdso64.so.dbg";
> +#else
> +	"vdso32.so.dbg";
> +#endif

You wish. There is a zoo of names for the VDSOs and not all
architectures do the vdso64/32 scheme.

> +
> +static bool pc_is_within(struct user_regs_struct *regs, uint64_t start, uint64_t end)
> +{
> +	unsigned long pc;
> +
> +#if defined(__x86_64__)
> +	pc = regs->rip;
> +#elif defined(__riscv)
> +	pc = reg->pc;
> +# error Missing ptrace support

  #else
  # error not supported

> +#endif
> +	if (pc >= (long) start && pc < end)
> +		return true;
> +	return false;

        return pc >= start && pc < end;

perhaps?

> +static int is_vdso_mod(const char *mod)
> +{
> +	return !strncmp(mod, "[vdso: ", 7);
> +}
> +
> +static int module_callback(Dwfl_Module *mod, void **userdata, const char *name,
> +			   Dwarf_Addr start, void *arg)
> +{
> +	const char *sname;

Snip.

> +	if (dwfl_getmodules(dwfl, module_callback, NULL, 0) != 0)
> +		err (1, "dwfl_getmodules: %s", dwfl_errmsg(-1));
> +	dwfl_end(dwfl);
> +}

That's a lot of complicated code. Wouldn't it be simpler to have a
wrapper script which finds the vdso64/32.so.dbg for the system and gets
the offsets via objdump -t and hands them in as command line arguments?

Then you can use the simple function lookup from patch 14. No strong
preference though. Just a thought.

> +
> +static pthread_mutex_t test_mutex;

Newline between the variable and the code please.
 
> +static int unlock_uncontended(struct __test_metadata *_metadata, bool is_32bit)
> +{
> +	struct robust_list_head *rhead;
> +	uint32_t *lock;
> +	pid_t lock_tid;
> +	int error = 1;
> +	pid_t tid;
> +	size_t sz;
> +
> +	syscall(SYS_get_robust_list, 0, &rhead, &sz);

Can fail.

> +	pthread_mutex_init(&test_mutex, NULL);
> +
> +	tid = gettid();
> +	lock = (uint32_t *)&test_mutex.__data.__lock;
> +	*lock = tid;
> +	/* Set the pending prior unlocking */
> +	rhead->list_op_pending = (struct robust_list *)&test_mutex.__data.__list.__next;
> +
> +	raise(SIGTRAP);
> +	if (is_32bit)
> +		lock_tid = frtu32(lock, tid, (uint32_t *)&rhead->list_op_pending);
> +	else
> +		lock_tid = frtu64(lock, tid, (uint64_t *)&rhead->list_op_pending);
> +
> +	if (lock_tid != tid)
> +		TH_LOG("Non contended unlock failed. Return: %08x expected %08x\n",
> +		       lock_tid, tid);
> +	else
> +		error = 0;
> +	return error;
> +}
> +
> +enum trace_state {
> +	STATE_WAIT = 0,
> +	STATE_ENTER_VDSO,
> +	STATE_IN_CS,
> +	STATE_OUT_CS,
> +	STATE_LEAVE_VDSO,
> +};
> +
> +static void trace_child(struct __test_metadata *_metadata, pid_t child, bool is_32bit)
> +{
> +	int state = STATE_WAIT;
> +	struct robust_list_head *rhead;
> +	size_t sz;
> +	bool do_end = false;

Variable ordering reverse fir tree please.

> +	syscall(SYS_get_robust_list, 0, &rhead, &sz);

Can fail.

> +	do {
> +		struct user_regs_struct regs;
> +		uint64_t lock_val;
> +		bool in_vdso;
> +		int wstatus;
> +		pid_t rpid;
> +
> +		rpid = waitpid(child, &wstatus, 0);
> +		if (rpid != child)
> +			errx(1, "waitpid");
> +		if (!do_end) {
> +			if (WSTOPSIG(wstatus) != SIGTRAP)
> +				errx(1, "NOT SIGTRAP");
> +		} else {
> +			if (!WIFEXITED(wstatus))
> +				errx(1, "Did not exit, but we are done");
> +			ASSERT_EQ(WEXITSTATUS(wstatus), 0);
> +			return;
> +		}
> +
> +		if (ptrace(PTRACE_GETREGS, child, 0, &regs) != 0)
> +			errx(1, "PTRACE_GETREGS");
> +
> +		if (is_32bit) {
> +			in_vdso = pc_is_within(&regs, (long)frtu32, frtu32_end);
> +		} else {
> +			in_vdso = pc_is_within(&regs, (long)frtu64, frtu64_end);
> +		}

No brackets required.

> +		if (in_vdso) {
> +			unsigned long rhead_val;
> +
> +			if (state == STATE_WAIT) {
> +				state = STATE_ENTER_VDSO;
> +
> +			} else {
> +				if (is_32bit) {
> +					if (pc_is_within(&regs, __futex_list32_try_unlock_cs_start,
> +							 __futex_list32_try_unlock_cs_end))
> +						state = STATE_IN_CS;
> +					else
> +						state = STATE_OUT_CS;
> +				} else {
> +					if (pc_is_within(&regs, __futex_list64_try_unlock_cs_start,
> +							 __futex_list64_try_unlock_cs_end))
> +						state = STATE_IN_CS;
> +					else
> +						state = STATE_OUT_CS;
> +				}
> +			}

See below.


> +			errno = 0;
> +			rhead_val = ptrace(PTRACE_PEEKDATA, child, &rhead->list_op_pending, 0);
> +			if (rhead_val == -1 && errno != 0)
> +				err(1, "PTRACE_PEEKDATA");
> +
> +			lock_val = ptrace(PTRACE_PEEKDATA, child, &test_mutex.__data.__lock, 0);
> +			if (lock_val == -1 && errno != 0)
> +				err(1, "PTRACE_PEEKDATA");
> +
> +			if (state == STATE_ENTER_VDSO) {
> +				/* Entering VDSO, it supposed to be locked */
> +				ASSERT_NE(rhead_val, 0);
> +				ASSERT_EQ(lock_val, child);
> +
> +			} else if (state == STATE_IN_CS) {
> +				/*
> +				 * If the critical section has been entered then
> +				 * the kernel has to unlock and clean list_op_pending.

If the critical section has been entered then the cmpxchg succeeded for
the uncontended case and failed for the contended case. In the success
case the kernel must clear the pointer. In the failed case the kernel is
not allowed to touch it.

So the tests should validate both scenarios. You can differentiate
between the cases via lockval == child and lockval == child |
FUTEX_WAITERS or via a state flag.

Also in the success case it must reach the success label. In the failure
case it's not allowed to do so.

I think you can simplify the state handling as well.

		if (ptrace(PTRACE_GETREGS, child, 0, &regs) != 0)
			errx(1, "PTRACE_GETREGS");

                rhead_val = ptrace(....);
                if (test_32bit)
                	rhead_val &= UINT_MAX;

		lock_val = ptrace(....);
                /* Get the expected value to see which variant is tested */
                lock_exp = ptrace(....);
                ASSERT_EQ(lock_exp & ~FUTEX_WAITERS, child);

		switch (state) {
                case STATE_WAIT:
                	if (pc_is_within_function(regs, is_32bit))
				state = STATE_IN_FUNCTION;
                        break;
                        
                case STATE_IN_FUNCTION:
                        ASSERT_NE(rhead_val, 0);
                        ASSERT_EQ(lock_val, lock_exp);
                	if (pc_is_within_cs(regs, is_32bit))
                        	state = STATE_IN_CS;
                        break;

		case STATE_IN_CS:
                	if (lock_exp & FUTEX_WAITERS) {
                        	ASSERT_EQ(lock_val, lock_exp);
                        	ASSERT_NE(rhead_val, 0);
                                ASSERT_NE(pc_within_success(regs, is_32bit), true);
                        } else {
                        	ASSERT_EQ(lock_val, 0);
                        	ASSERT_NE(rhead_val, 0);
                                if (pc_within_success(...))
                                	state = STATE_IN_SUCCESS;
                                else
                                       ASSERT_EQ(pc_within_cs(...), true);
                        }
                        break;
                ...
                }

You get the idea.

> +
> +TEST(robust_unlock_64)
> +{
> +	pid_t child;
> +
> +	if (!frtu64)
> +		SKIP(return, "Missing __vdso_futex_robust_list64_try_unlock() in VDSO\n");

Which is valid when this is built as 32-bit executable, so the test
should not be there at all.

Thanks,

        tglx

      reply	other threads:[~2026-04-04 20:13 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-02 15:21 [patch V4 00/14] futex: Address the robust futex unlock race for real Thomas Gleixner
2026-04-02 15:21 ` [patch V4 01/14] futex: Move futex task related data into a struct Thomas Gleixner
2026-04-02 15:21 ` [patch V4 02/14] futex: Make futex_mm_init() void Thomas Gleixner
2026-04-02 15:21 ` [patch V4 03/14] futex: Move futex related mm_struct data into a struct Thomas Gleixner
2026-04-02 15:21 ` [patch V4 04/14] futex: Provide UABI defines for robust list entry modifiers Thomas Gleixner
2026-04-02 15:21 ` [patch V4 05/14] uaccess: Provide unsafe_atomic_store_release_user() Thomas Gleixner
2026-04-02 15:21 ` [patch V4 06/14] x86: Select ARCH_MEMORY_ORDER_TSO Thomas Gleixner
2026-04-02 15:21 ` [patch V4 07/14] futex: Cleanup UAPI defines Thomas Gleixner
2026-04-02 15:21 ` [patch V4 08/14] futex: Add support for unlocking robust futexes Thomas Gleixner
2026-04-02 15:21 ` [patch V4 09/14] futex: Add robust futex unlock IP range Thomas Gleixner
2026-04-02 15:21 ` [patch V4 10/14] futex: Provide infrastructure to plug the non contended robust futex unlock race Thomas Gleixner
2026-04-02 15:21 ` [patch V4 11/14] x86/vdso: Prepare for robust futex unlock support Thomas Gleixner
2026-04-02 15:22 ` [patch V4 12/14] x86/vdso: Implement __vdso_futex_robust_try_unlock() Thomas Gleixner
2026-04-02 15:22 ` [patch V4 13/14] Documentation: futex: Add a note about robust list race condition Thomas Gleixner
2026-04-02 15:22 ` [patch V4 14/14] selftests: futex: Add tests for robust release operations Thomas Gleixner
2026-04-04  9:39 ` [PATCH 15/14] selftests: futex: Add tests for robust unlock within the critical section Sebastian Andrzej Siewior
2026-04-04 20:13   ` Thomas Gleixner [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87mrzi5vrr.ffs@tglx \
    --to=tglx@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=andrealmeid@igalia.com \
    --cc=arnd@arndb.de \
    --cc=bigeasy@linutronix.de \
    --cc=carlos@redhat.com \
    --cc=dalias@aerifal.cx \
    --cc=dave@stgolabs.net \
    --cc=dvhart@infradead.org \
    --cc=fweimer@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@weissschuh.net \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=triegel@redhat.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox