public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] futex: Introduce __vdso_robust_futex_unlock
@ 2026-03-11 18:54 Mathieu Desnoyers
  2026-03-11 20:11 ` Mathieu Desnoyers
                   ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Mathieu Desnoyers @ 2026-03-11 18:54 UTC (permalink / raw)
  To: André Almeida
  Cc: linux-kernel, Mathieu Desnoyers, Carlos O'Donell,
	Sebastian Andrzej Siewior, Peter Zijlstra, Florian Weimer,
	Rich Felker, Torvald Riegel, Darren Hart, Thomas Gleixner,
	Ingo Molnar, Davidlohr Bueso, Arnd Bergmann, Liam R . Howlett

This vDSO unlocks the robust futex by exchanging the content of
*uaddr with 0 with a store-release semantic. If the futex has
waiters, it sets bit 1 of *op_pending_addr, else it clears
*op_pending_addr. Those operations are within a code region
known by the kernel, making them safe with respect to asynchronous
program termination either from thread context or from a nested
signal handler.

Expected use of this vDSO:

if ((__vdso_robust_futex_unlock((u32 *) &mutex->__data.__lock, &pd->robust_head.list_op_pending)
    & FUTEX_WAITERS) != 0)
        futex_wake((u32 *) &mutex->__data.__lock, 1, private);
WRITE_ONCE(pd->robust_head.list_op_pending, 0);

This fixes a long standing data corruption race condition with robust
futexes, as pointed out here:

  "File corruption race condition in robust mutex unlocking"
  https://sourceware.org/bugzilla/show_bug.cgi?id=14485

Known limitation: this only takes care of non-PI futexes.

The approach taken by this vDSO is to extend the x86 vDSO exception
table to track the relevant ip range. The two kernel execution paths
impacted by this change are:

  1) Process exit
  2) Signal delivery

[ This patch is lightly compiled tested only, submitted for feedback. ]

Link: https://lore.kernel.org/lkml/20260220202620.139584-1-andrealmeid@igalia.com/
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "André Almeida" <andrealmeid@igalia.com>
Cc: Carlos O'Donell <carlos@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Rich Felker <dalias@aerifal.cx>
Cc: Torvald Riegel <triegel@redhat.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Thomas Gleixner <tglx@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Liam R . Howlett" <Liam.Howlett@oracle.com>
---
 arch/x86/entry/vdso/common/vfutex.c |  51 ++++++++++
 arch/x86/entry/vdso/extable.c       |  54 +++++++++-
 arch/x86/entry/vdso/extable.h       |  26 +++--
 arch/x86/entry/vdso/vdso64/Makefile |   1 +
 arch/x86/entry/vdso/vdso64/vfutex.c |   1 +
 arch/x86/entry/vdso/vdso64/vsgx.S   |   2 +-
 arch/x86/include/asm/vdso.h         |   3 +
 arch/x86/kernel/signal.c            |   4 +
 include/linux/futex.h               |   1 +
 include/vdso/futex.h                |  35 +++++++
 kernel/futex/core.c                 | 151 ++++++++++++++++++++++++----
 11 files changed, 296 insertions(+), 33 deletions(-)
 create mode 100644 arch/x86/entry/vdso/common/vfutex.c
 create mode 100644 arch/x86/entry/vdso/vdso64/vfutex.c
 create mode 100644 include/vdso/futex.h

diff --git a/arch/x86/entry/vdso/common/vfutex.c b/arch/x86/entry/vdso/common/vfutex.c
new file mode 100644
index 000000000000..fe730e0d3dfa
--- /dev/null
+++ b/arch/x86/entry/vdso/common/vfutex.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+#include <linux/types.h>
+#include <vdso/futex.h>
+#include "extable.h"
+
+#ifdef CONFIG_X86_64
+# define ASM_PTR_BIT_SET	"btsq "
+# define ASM_PTR_SET		"movq "
+#else
+# define ASM_PTR_BIT_SET	"btsl "
+# define ASM_PTR_SET		"movl "
+#endif
+
+u32 __vdso_robust_futex_unlock(u32 *uaddr, uintptr_t *op_pending_addr)
+{
+	u32 val = 0;
+
+	/*
+	 * Within the ip range identified by the futex exception table,
+	 * the register "eax" contains the value loaded by xchg. This is
+	 * expected by futex_vdso_exception() to check whether waiters
+	 * need to be woken up. This register state is transferred to
+	 * bit 1 (NEED_WAKEUP) of *op_pending_addr before the ip range
+	 * ends.
+	 */
+	asm volatile (	_ASM_VDSO_EXTABLE_FUTEX_HANDLE(1f, 3f)
+			/* Exchange uaddr (store-release). */
+			"xchg %[uaddr], %[val]\n\t"
+			"1:\n\t"
+			/* Test if FUTEX_WAITERS (0x80000000) is set. */
+			"test %[val], %[val]\n\t"
+			"js 2f\n\t"
+			/* Clear *op_pending_addr if there are no waiters. */
+			ASM_PTR_SET "$0, %[op_pending_addr]\n\t"
+			"jmp 3f\n\t"
+			"2:\n\t"
+			/* Set bit 1 (NEED_WAKEUP) in *op_pending_addr. */
+			ASM_PTR_BIT_SET "$1, %[op_pending_addr]\n\t"
+			"3:\n\t"
+			: [val] "+a" (val),
+			  [uaddr] "+m" (*uaddr)
+			: [op_pending_addr] "m" (*op_pending_addr)
+			: "memory");
+	return val;
+}
+
+u32 robust_futex_unlock(u32 *, uintptr_t *)
+	__attribute__((weak, alias("__vdso_robust_futex_unlock")));
diff --git a/arch/x86/entry/vdso/extable.c b/arch/x86/entry/vdso/extable.c
index afcf5b65beef..a668fc2c93dd 100644
--- a/arch/x86/entry/vdso/extable.c
+++ b/arch/x86/entry/vdso/extable.c
@@ -1,12 +1,26 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/err.h>
 #include <linux/mm.h>
+#include <linux/futex.h>
 #include <asm/current.h>
 #include <asm/traps.h>
 #include <asm/vdso.h>
 
+enum vdso_extable_entry_type {
+	VDSO_EXTABLE_ENTRY_FIXUP = 0,
+	VDSO_EXTABLE_ENTRY_FUTEX = 1,
+};
+
 struct vdso_exception_table_entry {
-	int insn, fixup;
+	int type;	/* enum vdso_extable_entry_type */
+	union {
+		struct {
+			int insn, fixup_insn;
+		} fixup;
+		struct {
+			int start, end;
+		} futex;
+	};
 };
 
 bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
@@ -33,8 +47,10 @@ bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
 	extable = image->extable;
 
 	for (i = 0; i < nr_entries; i++) {
-		if (regs->ip == base + extable[i].insn) {
-			regs->ip = base + extable[i].fixup;
+		if (extable[i].type != VDSO_EXTABLE_ENTRY_FIXUP)
+			continue;
+		if (regs->ip == base + extable[i].fixup.insn) {
+			regs->ip = base + extable[i].fixup.fixup_insn;
 			regs->di = trapnr;
 			regs->si = error_code;
 			regs->dx = fault_addr;
@@ -44,3 +60,35 @@ bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
 
 	return false;
 }
+
+void futex_vdso_exception(struct pt_regs *regs,
+			  bool *_in_futex_vdso,
+			  bool *_need_wakeup)
+{
+	const struct vdso_image *image = current->mm->context.vdso_image;
+	const struct vdso_exception_table_entry *extable;
+	bool in_futex_vdso = false, need_wakeup = false;
+	unsigned int nr_entries, i;
+	unsigned long base;
+
+	if (!current->mm->context.vdso)
+		goto end;
+
+	base = (unsigned long)current->mm->context.vdso + image->extable_base;
+	nr_entries = image->extable_len / (sizeof(*extable));
+	extable = image->extable;
+
+	for (i = 0; i < nr_entries; i++) {
+		if (extable[i].type != VDSO_EXTABLE_ENTRY_FUTEX)
+			continue;
+		if (regs->ip >= base + extable[i].futex.start &&
+		    regs->ip < base + extable[i].futex.end) {
+			in_futex_vdso = true;
+			if (regs->ax & FUTEX_WAITERS)
+				need_wakeup = true;
+		}
+	}
+end:
+	*_in_futex_vdso = in_futex_vdso;
+	*_need_wakeup = need_wakeup;
+}
diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h
index baba612b832c..7251467ad210 100644
--- a/arch/x86/entry/vdso/extable.h
+++ b/arch/x86/entry/vdso/extable.h
@@ -8,20 +8,32 @@
  * exception table, not each individual entry.
  */
 #ifdef __ASSEMBLER__
-#define _ASM_VDSO_EXTABLE_HANDLE(from, to)	\
-	ASM_VDSO_EXTABLE_HANDLE from to
+#define _ASM_VDSO_EXTABLE_FIXUP_HANDLE(from, to)	\
+	ASM_VDSO_EXTABLE_FIXUP_HANDLE from to
 
-.macro ASM_VDSO_EXTABLE_HANDLE from:req to:req
+.macro ASM_VDSO_EXTABLE_FIXUP_HANDLE from:req to:req
 	.pushsection __ex_table, "a"
+	.long 0		/* type: fixup */
 	.long (\from) - __ex_table
 	.long (\to) - __ex_table
 	.popsection
 .endm
 #else
-#define _ASM_VDSO_EXTABLE_HANDLE(from, to)	\
-	".pushsection __ex_table, \"a\"\n"      \
-	".long (" #from ") - __ex_table\n"      \
-	".long (" #to ") - __ex_table\n"        \
+#define _ASM_VDSO_EXTABLE_FIXUP_HANDLE(from, to)	\
+	".pushsection __ex_table, \"a\"\n"      	\
+	".long 0\n"	/* type: fixup */		\
+	".long (" #from ") - __ex_table\n"      	\
+	".long (" #to ") - __ex_table\n"        	\
+	".popsection\n"
+
+/*
+ * Identify robust futex unlock critical section.
+ */
+#define _ASM_VDSO_EXTABLE_FUTEX_HANDLE(start, end)	\
+	".pushsection __ex_table, \"a\"\n"      	\
+	".long 1\n"	/* type: futex */		\
+	".long (" #start ") - __ex_table\n"      	\
+	".long (" #end ") - __ex_table\n"        	\
 	".popsection\n"
 #endif
 
diff --git a/arch/x86/entry/vdso/vdso64/Makefile b/arch/x86/entry/vdso/vdso64/Makefile
index bfffaf1aeecc..df53c2d0037d 100644
--- a/arch/x86/entry/vdso/vdso64/Makefile
+++ b/arch/x86/entry/vdso/vdso64/Makefile
@@ -10,6 +10,7 @@ vdsos-$(CONFIG_X86_X32_ABI)	+= x32
 # Files to link into the vDSO:
 vobjs-y				:= note.o vclock_gettime.o vgetcpu.o
 vobjs-y				+= vgetrandom.o vgetrandom-chacha.o
+vobjs-y				+= vfutex.o
 vobjs-$(CONFIG_X86_SGX)		+= vsgx.o
 
 # Compilation flags
diff --git a/arch/x86/entry/vdso/vdso64/vfutex.c b/arch/x86/entry/vdso/vdso64/vfutex.c
new file mode 100644
index 000000000000..940a6ee30026
--- /dev/null
+++ b/arch/x86/entry/vdso/vdso64/vfutex.c
@@ -0,0 +1 @@
+#include "common/vfutex.c"
diff --git a/arch/x86/entry/vdso/vdso64/vsgx.S b/arch/x86/entry/vdso/vdso64/vsgx.S
index 37a3d4c02366..0ea5a1ebd455 100644
--- a/arch/x86/entry/vdso/vdso64/vsgx.S
+++ b/arch/x86/entry/vdso/vdso64/vsgx.S
@@ -145,6 +145,6 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
 
 	.cfi_endproc
 
-_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
+_ASM_VDSO_EXTABLE_FIXUP_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
 
 SYM_FUNC_END(__vdso_sgx_enter_enclave)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index e8afbe9faa5b..77e465fb373c 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -38,6 +38,9 @@ extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
 extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
 				 unsigned long error_code,
 				 unsigned long fault_addr);
+extern void futex_vdso_exception(struct pt_regs *regs,
+				 bool *in_futex_vdso,
+				 bool *need_wakeup);
 #endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 2404233336ab..c2e4db89f16d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -28,6 +28,7 @@
 #include <linux/entry-common.h>
 #include <linux/syscalls.h>
 #include <linux/rseq.h>
+#include <linux/futex.h>
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
@@ -235,6 +236,9 @@ unsigned long get_sigframe_size(void)
 static int
 setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 {
+	/* Handle futex robust list fixup. */
+	futex_signal_deliver(ksig, regs);
+
 	/* Perform fixup for the pre-signal frame. */
 	rseq_signal_deliver(ksig, regs);
 
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 9e9750f04980..6c274c79e176 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -81,6 +81,7 @@ void futex_exec_release(struct task_struct *tsk);
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
+void futex_signal_deliver(struct ksignal *ksig, struct pt_regs *regs);
 
 #ifdef CONFIG_FUTEX_PRIVATE_HASH
 int futex_hash_allocate_default(void);
diff --git a/include/vdso/futex.h b/include/vdso/futex.h
new file mode 100644
index 000000000000..1e949ac1ed85
--- /dev/null
+++ b/include/vdso/futex.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _VDSO_FUTEX_H
+#define _VDSO_FUTEX_H
+
+#include <linux/types.h>
+
+/**
+ * __vdso_robust_futex_unlock - Architecture-specific vDSO implementation of robust futex unlock.
+ * @uaddr:		Lock address (points to a 32-bit unsigned integer type).
+ * @op_pending_addr:	Robust list operation pending address (points to a pointer type).
+ *
+ * This vDSO unlocks the robust futex by exchanging the content of
+ * *uaddr with 0 with a store-release semantic. If the futex has
+ * waiters, it sets bit 1 of *op_pending_addr, else it clears
+ * *op_pending_addr. Those operations are within a code region
+ * known by the kernel, making them safe with respect to asynchronous
+ * program termination either from thread context or from a nested
+ * signal handler.
+ *
+ * Expected use of this vDSO:
+ *
+ * if ((__vdso_robust_futex_unlock((u32 *) &mutex->__data.__lock, &pd->robust_head.list_op_pending)
+ *     & FUTEX_WAITERS) != 0)
+ *         futex_wake((u32 *) &mutex->__data.__lock, 1, private);
+ * WRITE_ONCE(pd->robust_head.list_op_pending, 0);
+ *
+ * Returns:	The old value present at *uaddr.
+ */
+extern u32 __vdso_robust_futex_unlock(u32 *uaddr, uintptr_t *op_pending_addr);
+
+#endif /* _VDSO_FUTEX_H */
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index cf7e610eac42..92c0f94c8077 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -48,6 +48,10 @@
 #include "futex.h"
 #include "../locking/rtmutex_common.h"
 
+#define FUTEX_UADDR_PI		(1UL << 0)
+#define FUTEX_UADDR_NEED_WAKEUP	(1UL << 1)
+#define FUTEX_UADDR_MASK	(~(FUTEX_UADDR_PI | FUTEX_UADDR_NEED_WAKEUP))
+
 /*
  * The base of the bucket array and its size are always used together
  * (after initialization only in futex_hash()), so ensure that they
@@ -1004,6 +1008,77 @@ void futex_unqueue_pi(struct futex_q *q)
 	q->pi_state = NULL;
 }
 
+/*
+ * Transfer the need wakeup state from vDSO stack to the
+ * FUTEX_UADDR_NEED_WAKEUP list_op_pending bit so it's observed if the
+ * program is terminated while executing the signal handler.
+ */
+static void signal_delivery_fixup_robust_list(struct task_struct *curr, struct pt_regs *regs)
+{
+	struct robust_list_head __user *head = curr->robust_list;
+	bool in_futex_vdso, need_wakeup;
+	unsigned long pending;
+
+	if (!head)
+		return;
+	futex_vdso_exception(regs, &in_futex_vdso, &need_wakeup);
+	if (!in_futex_vdso)
+		return;
+	if (need_wakeup) {
+		if (get_user(pending, (unsigned long __user *)&head->list_op_pending))
+			goto fault;
+		pending |= FUTEX_UADDR_NEED_WAKEUP;
+		if (put_user(pending, (unsigned long __user *)&head->list_op_pending))
+			goto fault;
+	} else {
+		if (put_user(0UL, (unsigned long __user *)&head->list_op_pending))
+			goto fault;
+	}
+	return;
+fault:
+	force_sig(SIGSEGV);
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_signal_delivery_fixup_robust_list(struct task_struct *curr, struct pt_regs *regs)
+{
+	struct compat_robust_list_head __user *head = curr->compat_robust_list;
+	bool in_futex_vdso, need_wakeup;
+	unsigned int pending;
+
+	if (!head)
+		return;
+	futex_vdso_exception(regs, &in_futex_vdso, &need_wakeup);
+	if (!in_futex_vdso)
+		return;
+	if (need_wakeup) {
+		if (get_user(pending, (compat_uptr_t __user *)&head->list_op_pending))
+			goto fault;
+		pending |= FUTEX_UADDR_NEED_WAKEUP;
+		if (put_user(pending, (compat_uptr_t __user *)&head->list_op_pending))
+			goto fault;
+	} else {
+		if (put_user(0U, (compat_uptr_t __user *)&head->list_op_pending))
+			goto fault;
+	}
+	return;
+fault:
+	force_sig(SIGSEGV);
+}
+#endif
+
+void futex_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+
+	if (unlikely(tsk->robust_list))
+		signal_delivery_fixup_robust_list(tsk, regs);
+#ifdef CONFIG_COMPAT
+	if (unlikely(tsk->compat_robust_list))
+		compat_signal_delivery_fixup_robust_list(tsk, regs);
+#endif
+}
+
 /* Constants for the pending_op argument of handle_futex_death */
 #define HANDLE_DEATH_PENDING	true
 #define HANDLE_DEATH_LIST	false
@@ -1013,12 +1088,31 @@ void futex_unqueue_pi(struct futex_q *q)
  * dying task, and do notification if so:
  */
 static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
-			      bool pi, bool pending_op)
+			      bool pi, bool pending_op, bool need_wakeup)
 {
+	bool unlock_store_done = false;
 	u32 uval, nval, mval;
 	pid_t owner;
 	int err;
 
+	/*
+	 * Process dies after the store unlocking futex, before clearing
+	 * the pending ops. Wake up one waiter if needed. Prevent
+	 * storing to the futex after it was unlocked. Only handle
+	 * non-PI futex.
+	 */
+	if (pending_op && !pi) {
+		bool in_futex_vdso, vdso_need_wakeup;
+
+		futex_vdso_exception(task_pt_regs(curr), &in_futex_vdso, &vdso_need_wakeup);
+		if (need_wakeup || vdso_need_wakeup) {
+			futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+				   FUTEX_BITSET_MATCH_ANY);
+		}
+		if (need_wakeup || in_futex_vdso)
+			return 0;
+	}
+
 	/* Futex address must be 32bit aligned */
 	if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
 		return -1;
@@ -1071,6 +1165,13 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
 		return 0;
 	}
 
+	/*
+	 * Terminated after the unlock store is done. Wake up waiters,
+	 * but do not change the lock state.
+	 */
+	if (unlock_store_done)
+		return 0;
+
 	if (owner != task_pid_vnr(curr))
 		return 0;
 
@@ -1128,19 +1229,23 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
 }
 
 /*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes, bit 1 signals
+ * need wakeup:
  */
 static inline int fetch_robust_entry(struct robust_list __user **entry,
 				     struct robust_list __user * __user *head,
-				     unsigned int *pi)
+				     unsigned int *pi,
+				     unsigned int *need_wakeup)
 {
 	unsigned long uentry;
 
 	if (get_user(uentry, (unsigned long __user *)head))
 		return -EFAULT;
 
-	*entry = (void __user *)(uentry & ~1UL);
-	*pi = uentry & 1;
+	*entry = (void __user *)(uentry & FUTEX_UADDR_MASK);
+	*pi = uentry & FUTEX_UADDR_PI;
+	if (need_wakeup)
+		*need_wakeup = uentry & FUTEX_UADDR_NEED_WAKEUP;
 
 	return 0;
 }
@@ -1155,7 +1260,7 @@ static void exit_robust_list(struct task_struct *curr)
 {
 	struct robust_list_head __user *head = curr->robust_list;
 	struct robust_list __user *entry, *next_entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip, need_wakeup;
 	unsigned int next_pi;
 	unsigned long futex_offset;
 	int rc;
@@ -1164,7 +1269,7 @@ static void exit_robust_list(struct task_struct *curr)
 	 * Fetch the list head (which was registered earlier, via
 	 * sys_set_robust_list()):
 	 */
-	if (fetch_robust_entry(&entry, &head->list.next, &pi))
+	if (fetch_robust_entry(&entry, &head->list.next, &pi, NULL))
 		return;
 	/*
 	 * Fetch the relative futex offset:
@@ -1175,7 +1280,7 @@ static void exit_robust_list(struct task_struct *curr)
 	 * Fetch any possibly pending lock-add first, and handle it
 	 * if it exists:
 	 */
-	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
+	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip, &need_wakeup))
 		return;
 
 	next_entry = NULL;	/* avoid warning with gcc */
@@ -1184,14 +1289,14 @@ static void exit_robust_list(struct task_struct *curr)
 		 * Fetch the next entry in the list before calling
 		 * handle_futex_death:
 		 */
-		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
+		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi, NULL);
 		/*
 		 * A pending lock might already be on the list, so
 		 * don't process it twice:
 		 */
 		if (entry != pending) {
 			if (handle_futex_death((void __user *)entry + futex_offset,
-						curr, pi, HANDLE_DEATH_LIST))
+						curr, pi, HANDLE_DEATH_LIST, false))
 				return;
 		}
 		if (rc)
@@ -1209,7 +1314,7 @@ static void exit_robust_list(struct task_struct *curr)
 
 	if (pending) {
 		handle_futex_death((void __user *)pending + futex_offset,
-				   curr, pip, HANDLE_DEATH_PENDING);
+				   curr, pip, HANDLE_DEATH_PENDING, need_wakeup);
 	}
 }
 
@@ -1224,17 +1329,20 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
 }
 
 /*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes, bit 1 signals
+ * need wakeup:
  */
 static inline int
 compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
-		   compat_uptr_t __user *head, unsigned int *pi)
+		   compat_uptr_t __user *head, unsigned int *pi, unsigned int *need_wakeup)
 {
 	if (get_user(*uentry, head))
 		return -EFAULT;
 
-	*entry = compat_ptr((*uentry) & ~1);
-	*pi = (unsigned int)(*uentry) & 1;
+	*entry = compat_ptr((*uentry) & FUTEX_UADDR_MASK);
+	*pi = (unsigned int)(*uentry) & FUTEX_UADDR_PI;
+	if (need_wakeup)
+		*need_wakeup = (unsigned int)(*uentry) & FUTEX_UADDR_NEED_WAKEUP;
 
 	return 0;
 }
@@ -1249,7 +1357,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 {
 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
 	struct robust_list __user *entry, *next_entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip, need_wakeup;
 	unsigned int next_pi;
 	compat_uptr_t uentry, next_uentry, upending;
 	compat_long_t futex_offset;
@@ -1259,7 +1367,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 	 * Fetch the list head (which was registered earlier, via
 	 * sys_set_robust_list()):
 	 */
-	if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+	if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi, NULL))
 		return;
 	/*
 	 * Fetch the relative futex offset:
@@ -1271,7 +1379,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 	 * if it exists:
 	 */
 	if (compat_fetch_robust_entry(&upending, &pending,
-			       &head->list_op_pending, &pip))
+			       &head->list_op_pending, &pip, &need_wakeup))
 		return;
 
 	next_entry = NULL;	/* avoid warning with gcc */
@@ -1281,7 +1389,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 		 * handle_futex_death:
 		 */
 		rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
-			(compat_uptr_t __user *)&entry->next, &next_pi);
+			(compat_uptr_t __user *)&entry->next, &next_pi, NULL);
 		/*
 		 * A pending lock might already be on the list, so
 		 * dont process it twice:
@@ -1289,8 +1397,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 		if (entry != pending) {
 			void __user *uaddr = futex_uaddr(entry, futex_offset);
 
-			if (handle_futex_death(uaddr, curr, pi,
-					       HANDLE_DEATH_LIST))
+			if (handle_futex_death(uaddr, curr, pi, HANDLE_DEATH_LIST, false))
 				return;
 		}
 		if (rc)
@@ -1309,7 +1416,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
 	if (pending) {
 		void __user *uaddr = futex_uaddr(pending, futex_offset);
 
-		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
+		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING, need_wakeup);
 	}
 }
 #endif
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2026-03-25 14:12 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-11 18:54 [RFC PATCH] futex: Introduce __vdso_robust_futex_unlock Mathieu Desnoyers
2026-03-11 20:11 ` Mathieu Desnoyers
2026-03-12  8:49 ` Florian Weimer
2026-03-12 13:13   ` Mathieu Desnoyers
2026-03-12 14:12     ` Florian Weimer
2026-03-12 14:14       ` André Almeida
2026-03-12 16:09         ` Mathieu Desnoyers
2026-03-12 13:46 ` André Almeida
2026-03-12 14:04   ` Mathieu Desnoyers
2026-03-12 18:40     ` Mathieu Desnoyers
2026-03-12 18:58       ` André Almeida
2026-03-12 19:10     ` Thomas Gleixner
2026-03-12 19:16       ` Mathieu Desnoyers
2026-03-13  8:20         ` Florian Weimer
2026-03-12 20:19   ` Thomas Gleixner
2026-03-12 21:28     ` Mathieu Desnoyers
2026-03-12 22:23 ` Thomas Gleixner
2026-03-12 22:52   ` Mathieu Desnoyers
2026-03-13 12:12     ` Sebastian Andrzej Siewior
2026-03-13 12:17       ` Mathieu Desnoyers
2026-03-13 13:29         ` Sebastian Andrzej Siewior
2026-03-13 13:35           ` Mathieu Desnoyers
2026-03-16 17:12     ` Thomas Gleixner
2026-03-16 19:36       ` Mathieu Desnoyers
2026-03-16 20:27         ` Thomas Gleixner
2026-03-16 21:01           ` Mathieu Desnoyers
2026-03-16 22:19             ` Thomas Gleixner
2026-03-16 22:30               ` Mathieu Desnoyers
2026-03-16 23:29                 ` Thomas Gleixner
2026-03-20 18:13                   ` Mathieu Desnoyers
2026-03-24 21:35                     ` Thomas Gleixner
2026-03-25 14:12                       ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox