From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: linux-kernel@vger.kernel.org
Cc: "André Almeida" <andrealmeid@igalia.com>,
"Darren Hart" <dvhart@infradead.org>,
"Davidlohr Bueso" <dave@stgolabs.net>,
"Ingo Molnar" <mingo@redhat.com>,
"Juri Lelli" <juri.lelli@redhat.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Valentin Schneider" <vschneid@redhat.com>,
"Waiman Long" <longman@redhat.com>
Subject: Re: [PATCH v10 00/21] futex: Add support task local hash maps, FUTEX2_NUMA and FUTEX2_MPOL
Date: Wed, 12 Mar 2025 16:18:48 +0100 [thread overview]
Message-ID: <20250312151848.RlB_XuHA@linutronix.de> (raw)
In-Reply-To: <20250312151634.2183278-1-bigeasy@linutronix.de>
On 2025-03-12 16:16:13 [+0100], To linux-kernel@vger.kernel.org wrote:
> The complete tree is at
> https://git.kernel.org/pub/scm/linux/kernel/git/bigeasy/staging.git/log/?h=futex_local_v10
> https://git.kernel.org/pub/scm/linux/kernel/git/bigeasy/staging.git futex_local_v10
>
> v9…v10: https://lore.kernel.org/all/20250225170914.289358-1-bigeasy@linutronix.de/
The exact diff vs peterz/locking/futex:
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 0cdd5882e89c1..19c37afa0432a 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -82,12 +82,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
int futex_hash_prctl(unsigned long arg2, unsigned long arg3);
-#ifdef CONFIG_BASE_SMALL
-static inline int futex_hash_allocate_default(void) { return 0; }
-static inline void futex_hash_free(struct mm_struct *mm) { }
-static inline void futex_mm_init(struct mm_struct *mm) { }
-#else /* !CONFIG_BASE_SMALL */
-
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
int futex_hash_allocate_default(void);
void futex_hash_free(struct mm_struct *mm);
@@ -97,7 +92,11 @@ static inline void futex_mm_init(struct mm_struct *mm)
mutex_init(&mm->futex_hash_lock);
}
-#endif /* CONFIG_BASE_SMALL */
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate_default(void) { return 0; }
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
#else /* !CONFIG_FUTEX */
static inline void futex_init_task(struct task_struct *tsk) { }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9399ee7d40201..e0e8adbe66bdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -938,7 +938,7 @@ struct mm_struct {
*/
seqcount_t mm_lock_seq;
#endif
-#if defined(CONFIG_FUTEX) && !defined(CONFIG_BASE_SMALL)
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
struct mutex futex_hash_lock;
struct futex_private_hash __rcu *futex_phash;
struct futex_private_hash *futex_phash_new;
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
index 6322d8c1c6b42..2fb2af6d98249 100644
--- a/include/linux/rcuref.h
+++ b/include/linux/rcuref.h
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
* rcuref_read - Read the number of held reference counts of a rcuref
* @ref: Pointer to the reference count
*
- * Return: The number of held references (0 ... N)
+ * Return: The number of held references (0 ... N). The value 0 does not
+ * indicate that it is safe to schedule the object, protected by this reference
+ * counter, for deconstruction.
+ * If you want to know if the reference counter has been marked DEAD (as
+ * signaled by rcuref_put()) please use rcuread_is_dead().
*/
static inline unsigned int rcuref_read(rcuref_t *ref)
{
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
return c >= RCUREF_RELEASED ? 0 : c + 1;
}
+/**
+ * rcuref_is_dead - Check if the rcuref has been already marked dead
+ * @ref: Pointer to the reference count
+ *
+ * Return: True if the object has been marked DEAD. This signals that a previous
+ * invocation of rcuref_put() returned true on this reference counter meaning
+ * the protected object can safely be scheduled for deconstruction.
+ * Otherwise, returns false.
+ */
+static inline bool rcuref_is_dead(rcuref_t *ref)
+{
+ unsigned int c = atomic_read(&ref->refcnt);
+
+ return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
+}
+
extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
/**
diff --git a/init/Kconfig b/init/Kconfig
index a0ea04c177842..a4502a9077e03 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1683,6 +1683,16 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y
+config FUTEX_PRIVATE_HASH
+ bool
+ depends on FUTEX && !BASE_SMALL && MMU
+ default y
+
+config FUTEX_MPOL
+ bool
+ depends on FUTEX && NUMA
+ default y
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 976a487bf3ad5..65523f3cfe32e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -136,7 +136,7 @@ static inline bool futex_key_is_private(union futex_key *key)
static struct futex_hash_bucket *
__futex_hash(union futex_key *key, struct futex_private_hash *fph);
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
static struct futex_hash_bucket *
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
{
@@ -196,12 +196,12 @@ static bool __futex_pivot_hash(struct mm_struct *mm,
{
struct futex_private_hash *fph;
- lockdep_assert_held(&mm->futex_hash_lock);
WARN_ON_ONCE(mm->futex_phash_new);
- fph = mm->futex_phash;
+ fph = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
if (fph) {
- if (rcuref_read(&fph->users) != 0) {
+ if (!rcuref_is_dead(&fph->users)) {
mm->futex_phash_new = new;
return false;
}
@@ -262,6 +262,10 @@ bool futex_private_hash_get(struct futex_private_hash *fph)
void futex_private_hash_put(struct futex_private_hash *fph)
{
+ /*
+ * Ignore the result; the DEAD state is picked up
+ * when rcuref_get() starts failing via rcuref_is_dead().
+ */
if (rcuref_put(&fph->users))
wake_up_var(fph->mm);
}
@@ -301,7 +305,7 @@ void futex_hash_put(struct futex_hash_bucket *hb)
futex_private_hash_put(fph);
}
-#else
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
static inline struct futex_hash_bucket *
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
@@ -314,8 +318,9 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
return __futex_hash(key, NULL);
}
-#endif /* CONFIG_BASE_SMALL */
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+#ifdef CONFIG_FUTEX_MPOL
static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = vma_lookup(mm, addr);
@@ -325,7 +330,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
if (!vma)
return FUTEX_NO_NODE;
- mpol = vma->vm_policy;
+ mpol = vma_policy(vma);
if (!mpol)
return FUTEX_NO_NODE;
@@ -373,6 +378,14 @@ static int futex_mpol(struct mm_struct *mm, unsigned long addr)
guard(mmap_read_lock)(mm);
return __futex_key_to_node(mm, addr);
}
+#else /* !CONFIG_FUTEX_MPOL */
+
+static int futex_mpol(struct mm_struct *mm, unsigned long addr)
+{
+ return FUTEX_NO_NODE;
+}
+
+#endif /* CONFIG_FUTEX_MPOL */
/**
* futex_hash - Return the hash bucket in the global hash
@@ -420,7 +433,6 @@ __futex_hash(union futex_key *key, struct futex_private_hash *fph)
return &futex_queues[node][hash & futex_hashmask];
}
-
/**
* futex_setup_timer - set up the sleeping hrtimer.
* @time: ptr to the given timeout value
@@ -932,9 +944,6 @@ int futex_unqueue(struct futex_q *q)
void futex_q_lockptr_lock(struct futex_q *q)
{
-#if 0
- struct futex_hash_bucket *hb;
-#endif
spinlock_t *lock_ptr;
/*
@@ -949,18 +958,6 @@ void futex_q_lockptr_lock(struct futex_q *q)
spin_unlock(lock_ptr);
goto retry;
}
-#if 0
- hb = container_of(lock_ptr, struct futex_hash_bucket, lock);
- /*
- * The caller needs to either hold a reference on the hash (to ensure
- * that the hash is not resized) _or_ be enqueued on the hash. This
- * ensures that futex_q::lock_ptr is updated while moved to the new
- * hash during resize.
- * Once the hash bucket is locked the resize operation, which might be
- * in progress, will block on the lock.
- */
- return hb;
-#endif
}
/*
@@ -1497,7 +1494,7 @@ void futex_exit_release(struct task_struct *tsk)
static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
struct futex_private_hash *fph)
{
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
fhb->priv = fph;
#endif
atomic_set(&fhb->waiters, 0);
@@ -1505,21 +1502,30 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
spin_lock_init(&fhb->lock);
}
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
void futex_hash_free(struct mm_struct *mm)
{
+ struct futex_private_hash *fph;
+
kvfree(mm->futex_phash_new);
- kvfree(mm->futex_phash);
+ fph = rcu_dereference_raw(mm->futex_phash);
+ if (fph) {
+ WARN_ON_ONCE(rcuref_read(&fph->users) > 1);
+ kvfree(fph);
+ }
}
static bool futex_pivot_pending(struct mm_struct *mm)
{
+ struct futex_private_hash *fph;
+
guard(rcu)();
if (!mm->futex_phash_new)
return false;
- return !rcuref_read(&mm->futex_phash->users);
+ fph = rcu_dereference(mm->futex_phash);
+ return !rcuref_read(&fph->users);
}
static bool futex_hash_less(struct futex_private_hash *a,
@@ -1560,7 +1566,7 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
*/
scoped_guard (rcu) {
fph = rcu_dereference(mm->futex_phash);
- if (fph && !mm->futex_phash->hash_mask) {
+ if (fph && !fph->hash_mask) {
if (custom)
return -EBUSY;
return 0;
@@ -1591,7 +1597,8 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
struct futex_private_hash *free __free(kvfree) = NULL;
struct futex_private_hash *cur, *new;
- cur = mm->futex_phash;
+ cur = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
new = mm->futex_phash_new;
mm->futex_phash_new = NULL;
@@ -1602,7 +1609,7 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
* allocated a replacement hash, drop the initial
* reference on the existing hash.
*/
- futex_private_hash_put(mm->futex_phash);
+ futex_private_hash_put(cur);
}
if (new) {
@@ -1683,7 +1690,7 @@ static int futex_hash_get_slots(void)
static int futex_hash_allocate(unsigned int hash_slots, bool custom)
{
- return 0;
+ return -EINVAL;
}
static int futex_hash_get_slots(void)
@@ -1723,6 +1730,7 @@ static int __init futex_init(void)
#else
hashsize = 256 * num_possible_cpus();
hashsize /= num_possible_nodes();
+ hashsize = max(4, hashsize);
hashsize = roundup_pow_of_two(hashsize);
#endif
futex_hashshift = ilog2(hashsize);
@@ -1740,12 +1748,15 @@ static int __init futex_init(void)
BUG_ON(!table);
for (i = 0; i < hashsize; i++)
- futex_hash_bucket_init(&table[i], 0);
+ futex_hash_bucket_init(&table[i], NULL);
futex_queues[n] = table;
}
futex_hashmask = hashsize - 1;
+ pr_info("futex hash table entries: %lu (%lu bytes on %d NUMA nodes, total %lu KiB, %s).\n",
+ hashsize, size, num_possible_nodes(), size * num_possible_nodes() / 1024,
+ order > MAX_PAGE_ORDER ? "vmalloc" : "linear");
return 0;
}
core_initcall(futex_init);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 40f06523a3565..52e9c0c4b6c87 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -223,14 +223,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
extern void futex_hash_get(struct futex_hash_bucket *hb);
extern void futex_hash_put(struct futex_hash_bucket *hb);
extern struct futex_private_hash *futex_private_hash(void);
extern bool futex_private_hash_get(struct futex_private_hash *fph);
extern void futex_private_hash_put(struct futex_private_hash *fph);
-#else
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
diff --git a/mm/nommu.c b/mm/nommu.c
index baa79abdaf037..d04e601a8f4d7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -209,6 +209,11 @@ EXPORT_SYMBOL(vmalloc_noprof);
void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof);
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
+{
+ return vmalloc_huge_noprof(size, gfp_mask);
+}
+
/*
* vzalloc - allocate virtually contiguous memory with zero fill
*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 39fe43183a64f..69247b46413ca 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3968,9 +3968,9 @@ EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
{
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
- node, __builtin_return_address(0));
+ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+ gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ node, __builtin_return_address(0));
}
/**
Sebastian
next prev parent reply other threads:[~2025-03-12 15:18 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-12 15:16 [PATCH v10 00/21] futex: Add support task local hash maps, FUTEX2_NUMA and FUTEX2_MPOL Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 01/21] rcuref: Provide rcuref_is_dead() Sebastian Andrzej Siewior
2025-03-13 4:23 ` Joel Fernandes
2025-03-13 7:55 ` Sebastian Andrzej Siewior
2025-03-14 10:36 ` Peter Zijlstra
2025-03-12 15:16 ` [PATCH v10 02/21] futex: Move futex_queue() into futex_wait_setup() Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 03/21] futex: Pull futex_hash() out of futex_q_lock() Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 04/21] futex: Create hb scopes Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 05/21] futex: Create futex_hash() get/put class Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 06/21] futex: Create helper function to initialize a hash slot Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 07/21] futex: Add basic infrastructure for local task local hash Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 08/21] futex: Hash only the address for private futexes Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 09/21] futex: Allow automatic allocation of process wide futex hash Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 10/21] futex: Decrease the waiter count before the unlock operation Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 11/21] futex: Introduce futex_q_lockptr_lock() Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 12/21] futex: Acquire a hash reference in futex_wait_multiple_setup() Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 13/21] futex: Allow to re-allocate the private local hash Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 14/21] futex: Resize local futex hash table based on number of threads Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 15/21] futex: s/hb_p/fph/ Sebastian Andrzej Siewior
2025-03-14 12:36 ` Peter Zijlstra
2025-03-14 13:10 ` Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 16/21] futex: Remove superfluous state Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 17/21] futex: Untangle and naming Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 18/21] futex: Rework SET_SLOTS Sebastian Andrzej Siewior
2025-03-26 15:37 ` Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 19/21] mm: Add vmalloc_huge_node() Sebastian Andrzej Siewior
2025-03-12 22:02 ` Andrew Morton
2025-03-13 7:59 ` Sebastian Andrzej Siewior
2025-03-13 22:08 ` Andrew Morton
2025-03-14 9:59 ` Sebastian Andrzej Siewior
2025-03-14 10:34 ` Andrew Morton
2025-03-12 15:16 ` [PATCH v10 20/21] futex: Implement FUTEX2_NUMA Sebastian Andrzej Siewior
2025-03-25 19:52 ` Shrikanth Hegde
2025-03-25 22:52 ` Peter Zijlstra
2025-03-25 22:56 ` Peter Zijlstra
2025-03-26 12:57 ` Shrikanth Hegde
2025-03-26 13:37 ` Peter Zijlstra
2025-03-26 15:06 ` Shrikanth Hegde
2025-03-26 8:03 ` Sebastian Andrzej Siewior
2025-03-12 15:16 ` [PATCH v10 21/21] futex: Implement FUTEX2_MPOL Sebastian Andrzej Siewior
2025-03-12 15:18 ` Sebastian Andrzej Siewior [this message]
2025-03-14 10:42 ` [PATCH v10 00/21] futex: Add support task local hash maps, FUTEX2_NUMA and FUTEX2_MPOL Peter Zijlstra
2025-03-14 10:58 ` Peter Zijlstra
2025-03-14 11:28 ` Sebastian Andrzej Siewior
2025-03-14 11:41 ` Peter Zijlstra
2025-03-14 12:00 ` Sebastian Andrzej Siewior
2025-03-14 12:30 ` Peter Zijlstra
2025-03-14 13:30 ` Sebastian Andrzej Siewior
2025-03-14 14:18 ` Peter Zijlstra
2025-03-14 14:40 ` Paul E. McKenney
2025-03-18 13:24 ` Shrikanth Hegde
2025-03-18 16:12 ` Davidlohr Bueso
2025-03-25 19:04 ` Shrikanth Hegde
2025-03-26 9:31 ` Sebastian Andrzej Siewior
2025-03-26 12:54 ` Shrikanth Hegde
2025-03-26 14:01 ` Sebastian Andrzej Siewior
2025-03-26 8:49 ` Sebastian Andrzej Siewior
2025-04-07 16:15 ` Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250312151848.RlB_XuHA@linutronix.de \
--to=bigeasy@linutronix.de \
--cc=andrealmeid@igalia.com \
--cc=dave@stgolabs.net \
--cc=dvhart@infradead.org \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox