public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: "Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>,
	"Andrè Almeida" <andrealmeid@igalia.com>,
	"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>,
	"Carlos O'Donell" <carlos@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Florian Weimer" <fweimer@redhat.com>,
	"Rich Felker" <dalias@aerifal.cx>,
	"Torvald Riegel" <triegel@redhat.com>,
	"Darren Hart" <dvhart@infradead.org>,
	"Ingo Molnar" <mingo@kernel.org>,
	"Davidlohr Bueso" <dave@stgolabs.net>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	"Uros Bizjak" <ubizjak@gmail.com>,
	"Thomas Weißschuh" <linux@weissschuh.net>
Subject: [patch V3 09/14] futex: Add robust futex unlock IP range
Date: Mon, 30 Mar 2026 14:02:46 +0200	[thread overview]
Message-ID: <20260330120117.674216656@kernel.org> (raw)
In-Reply-To: 20260330114212.927686587@kernel.org

There will be a VDSO function to unlock robust futexes in user space. The
unlock sequence is racy vs. clearing the list_pending_op pointer in the
tasks robust list head. To plug this race the kernel needs to know the
instruction window. As the VDSO is per MM the addresses are stored in
mm_struct::futex.

Architectures which implement support for this have to update these
addresses when the VDSO is (re)mapped and indicate the pending op pointer
size which is matching the IP.

Arguably this could be resolved by chasing mm->context->vdso->image, but
that's architecture specific and requires to touch quite some cache
lines. Having it in mm::futex reduces the cache line impact and avoids
having yet another set of architecture specific functionality.

To support multi size robust list applications (gaming) this provides two
ranges when COMPAT is enabled.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
V3: Make the number of ranges depend on COMPAT - Peter
V2: Store ranges in a struct with size information and allow up to two ranges.
---
 include/linux/futex.h       |   22 ++++++++++++++++++---
 include/linux/futex_types.h |   28 ++++++++++++++++++++++++++
 include/linux/mm_types.h    |    1 
 init/Kconfig                |    6 +++++
 kernel/futex/core.c         |   46 ++++++++++++++++++++++++++++++++++----------
 5 files changed, 90 insertions(+), 13 deletions(-)

--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -81,11 +81,9 @@ int futex_hash_prctl(unsigned long arg2,
 #ifdef CONFIG_FUTEX_PRIVATE_HASH
 int futex_hash_allocate_default(void);
 void futex_hash_free(struct mm_struct *mm);
-void futex_mm_init(struct mm_struct *mm);
 #else  /* CONFIG_FUTEX_PRIVATE_HASH */
 static inline int futex_hash_allocate_default(void) { return 0; }
 static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
-static inline void futex_mm_init(struct mm_struct *mm) { }
 #endif /* !CONFIG_FUTEX_PRIVATE_HASH */
 
 #else  /* CONFIG_FUTEX */
@@ -104,7 +102,25 @@ static inline int futex_hash_prctl(unsig
 }
 static inline int futex_hash_allocate_default(void) { return 0; }
 static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
-static inline void futex_mm_init(struct mm_struct *mm) { }
 #endif /* !CONFIG_FUTEX */
 
+#ifdef CONFIG_FUTEX_ROBUST_UNLOCK
+void futex_reset_cs_ranges(struct futex_mm_data *fd);
+
+static inline void futex_set_vdso_cs_range(struct futex_mm_data *fd, unsigned int idx,
+					   unsigned long vdso, unsigned long start,
+					   unsigned long end, bool sz32)
+{
+	fd->unlock.cs_ranges[idx].start_ip = vdso + start;
+	fd->unlock.cs_ranges[idx].len = end - start;
+	fd->unlock.cs_ranges[idx].pop_size32 = sz32;
+}
+#endif /* CONFIG_FUTEX_ROBUST_UNLOCK */
+
+#if defined(CONFIG_FUTEX_PRIVATE_HASH) || defined(CONFIG_FUTEX_ROBUST_UNLOCK)
+void futex_mm_init(struct mm_struct *mm);
+#else
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif
+
 #endif /* _LINUX_FUTEX_H */
--- a/include/linux/futex_types.h
+++ b/include/linux/futex_types.h
@@ -54,12 +54,40 @@ struct futex_mm_phash {
 struct futex_mm_phash { };
 #endif /* !CONFIG_FUTEX_ROBUST_UNLOCK */
 
+#ifdef CONFIG_FUTEX_ROBUST_UNLOCK
+/**
+ * struct futex_unlock_cs_range - Range for the VDSO unlock critical section
+ * @start_ip:	The start IP of the robust futex unlock critical section (inclusive)
+ * @len:	The length of the robust futex unlock critical section
+ * @pop_size32:	Pending OP pointer size indicator. 0 == 64-bit, 1 == 32-bit
+ */
+struct futex_unlock_cs_range {
+	unsigned long	       start_ip;
+	unsigned int	       len;
+	unsigned int	       pop_size32;
+};
+
+#define FUTEX_ROBUST_MAX_CS_RANGES	(1 + IS_ENABLED(CONFIG_COMPAT))
+
+/**
+ * struct futex_unlock_cs_ranges - Futex unlock VSDO critical sections
+ * @cs_ranges:	Array of critical section ranges
+ */
+struct futex_unlock_cs_ranges {
+	struct futex_unlock_cs_range	cs_ranges[FUTEX_ROBUST_MAX_CS_RANGES];
+};
+#else  /* CONFIG_FUTEX_ROBUST_UNLOCK */
+struct futex_unlock_cs_ranges { };
+#endif /* !CONFIG_FUTEX_ROBUST_UNLOCK */
+
 /**
  * struct futex_mm_data - Futex related per MM data
  * @phash:	Futex private hash related data
+ * @unlock:	Futex unlock VDSO critical sections
  */
 struct futex_mm_data {
 	struct futex_mm_phash		phash;
+	struct futex_unlock_cs_ranges	unlock;
 };
 #else  /* CONFIG_FUTEX */
 struct futex_sched_data { };
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -22,6 +22,7 @@
 #include <linux/types.h>
 #include <linux/rseq_types.h>
 #include <linux/bitmap.h>
+#include <linux/futex_types.h>
 
 #include <asm/mmu.h>
 
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1822,6 +1822,12 @@ config FUTEX_MPOL
 	depends on FUTEX && NUMA
 	default y
 
+config HAVE_FUTEX_ROBUST_UNLOCK
+	bool
+
+config FUTEX_ROBUST_UNLOCK
+	def_bool FUTEX && HAVE_GENERIC_VDSO && GENERIC_IRQ_ENTRY && RSEQ && HAVE_FUTEX_ROBUST_UNLOCK
+
 config EPOLL
 	bool "Enable eventpoll support" if EXPERT
 	default y
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -1758,11 +1758,11 @@ static bool futex_ref_is_dead(struct fut
 	return atomic_long_read(&mm->futex.phash.atomic) == 0;
 }
 
-void futex_mm_init(struct mm_struct *mm)
+static void futex_hash_init_mm(struct futex_mm_data *fd)
 {
-	memset(&mm->futex, 0, sizeof(mm->futex));
-	mutex_init(&mm->futex.phash.lock);
-	mm->futex.phash.batches = get_state_synchronize_rcu();
+	memset(&fd->phash, 0, sizeof(fd->phash));
+	mutex_init(&fd->phash.lock);
+	fd->phash.batches = get_state_synchronize_rcu();
 }
 
 void futex_hash_free(struct mm_struct *mm)
@@ -1966,20 +1966,46 @@ static int futex_hash_get_slots(void)
 		return fph->hash_mask + 1;
 	return 0;
 }
+#else  /* CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate(unsigned int hslots, unsigned int flags) { return -EINVAL; }
+static inline int futex_hash_get_slots(void) { return 0; }
+static inline void futex_hash_init_mm(struct futex_mm_data *fd) { }
+#endif /* !CONFIG_FUTEX_PRIVATE_HASH */
 
-#else
+#ifdef CONFIG_FUTEX_ROBUST_UNLOCK
+static void futex_invalidate_cs_ranges(struct futex_mm_data *fd)
+{
+	/*
+	 * Invalidate start_ip so that the quick check fails for ip >= start_ip
+	 * if VDSO is not mapped or the second slot is not available for compat
+	 * tasks as they use VDSO32 which does not provide the 64-bit pointer
+	 * variant.
+	 */
+	for (int i = 0; i < FUTEX_ROBUST_MAX_CS_RANGES; i++)
+		fd->unlock.cs_ranges[i].start_ip = ~0UL;
+}
 
-static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags)
+void futex_reset_cs_ranges(struct futex_mm_data *fd)
 {
-	return -EINVAL;
+	memset(fd->unlock.cs_ranges, 0, sizeof(fd->unlock.cs_ranges));
+	futex_invalidate_cs_ranges(fd);
 }
 
-static int futex_hash_get_slots(void)
+static void futex_robust_unlock_init_mm(struct futex_mm_data *fd)
 {
-	return 0;
+	/* mm_dup() preserves the range, mm_alloc() clears it */
+	if (!fd->unlock.cs_ranges[0].start_ip)
+		futex_invalidate_cs_ranges(fd);
 }
+#else  /* CONFIG_FUTEX_ROBUST_UNLOCK */
+static inline void futex_robust_unlock_init_mm(struct futex_mm_data *fd) { }
+#endif /* !CONFIG_FUTEX_ROBUST_UNLOCK */
 
-#endif
+void futex_mm_init(struct mm_struct *mm)
+{
+	futex_hash_init_mm(&mm->futex);
+	futex_robust_unlock_init_mm(&mm->futex);
+}
 
 int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
 {


  parent reply	other threads:[~2026-03-30 12:02 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-30 12:01 [patch V3 00/14] futex: Address the robust futex unlock race for real Thomas Gleixner
2026-03-30 12:02 ` [patch V3 01/14] futex: Move futex task related data into a struct Thomas Gleixner
2026-03-30 12:02 ` [patch V3 02/14] futex: Make futex_mm_init() void Thomas Gleixner
2026-03-30 12:02 ` [patch V3 03/14] futex: Move futex related mm_struct data into a struct Thomas Gleixner
2026-03-30 15:23   ` Alexander Kuleshov
2026-03-30 12:02 ` [patch V3 04/14] futex: Provide UABI defines for robust list entry modifiers Thomas Gleixner
2026-03-30 12:02 ` [patch V3 05/14] uaccess: Provide unsafe_atomic_store_release_user() Thomas Gleixner
2026-03-30 13:33   ` Mark Rutland
2026-03-30 12:02 ` [patch V3 06/14] x86: Select ARCH_MEMORY_ORDER_TOS Thomas Gleixner
2026-03-30 13:34   ` Mark Rutland
2026-03-30 19:48     ` Thomas Gleixner
2026-03-30 12:02 ` [patch V3 07/14] futex: Cleanup UAPI defines Thomas Gleixner
2026-03-30 12:02 ` [patch V3 08/14] futex: Add support for unlocking robust futexes Thomas Gleixner
2026-03-30 12:02 ` Thomas Gleixner [this message]
2026-03-30 12:02 ` [patch V3 10/14] futex: Provide infrastructure to plug the non contended robust futex unlock race Thomas Gleixner
2026-03-30 12:02 ` [patch V3 11/14] x86/vdso: Prepare for robust futex unlock support Thomas Gleixner
2026-03-30 12:03 ` [patch V3 12/14] x86/vdso: Implement __vdso_futex_robust_try_unlock() Thomas Gleixner
2026-03-30 12:03 ` [patch V3 13/14] Documentation: futex: Add a note about robust list race condition Thomas Gleixner
2026-03-30 12:03 ` [patch V3 14/14] selftests: futex: Add tests for robust release operations Thomas Gleixner
2026-03-30 13:45 ` [patch V3 00/14] futex: Address the robust futex unlock race for real Mark Rutland
2026-03-30 13:51   ` Peter Zijlstra
2026-03-30 19:36   ` Thomas Gleixner
2026-03-31 14:12     ` Mark Rutland
2026-03-31 12:59   ` André Almeida
2026-03-31 13:03     ` Sebastian Andrzej Siewior
2026-03-31 14:13     ` Mark Rutland
2026-03-31 15:22   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260330120117.674216656@kernel.org \
    --to=tglx@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=andrealmeid@igalia.com \
    --cc=arnd@arndb.de \
    --cc=bigeasy@linutronix.de \
    --cc=carlos@redhat.com \
    --cc=dalias@aerifal.cx \
    --cc=dave@stgolabs.net \
    --cc=dvhart@infradead.org \
    --cc=fweimer@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@weissschuh.net \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=triegel@redhat.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox