* [PATCH v2 1/8] closures: Improve closure_put_after_sub_checks
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 2/8] closures: closure_sub() uses cmpxchg Kent Overstreet
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet
Add a guard bit for CLOSURE_DESTRUCTOR underflow, print the full
cl->remaining value, and rename variables for clarity.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/closure.h | 2 +-
lib/closure.c | 28 +++++++++++++++-------------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 880fe85e35e9..a6fcc33fafce 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -135,7 +135,7 @@ enum closure_state {
};
#define CLOSURE_GUARD_MASK \
- ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+ (((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)|(CLOSURE_BITS_START >> 1))
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
diff --git a/lib/closure.c b/lib/closure.c
index 2bfe7d2a0048..5fafc8b0e99d 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,23 +13,25 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static inline void closure_put_after_sub_checks(int flags)
+static void closure_val_checks(struct closure *cl, unsigned new)
{
- int r = flags & CLOSURE_REMAINING_MASK;
-
- if (WARN(flags & CLOSURE_GUARD_MASK,
- "closure has guard bits set: %x (%u)",
- flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
- r &= ~CLOSURE_GUARD_MASK;
-
- WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
- "closure ref hit 0 with incorrect flags set: %x (%u)",
- flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+ unsigned count = new & CLOSURE_REMAINING_MASK;
+
+ if (WARN(new & CLOSURE_GUARD_MASK,
+ "closure %ps has guard bits set: %x (%u)",
+ cl->fn,
+ new, (unsigned) __fls(new & CLOSURE_GUARD_MASK)))
+ new &= ~CLOSURE_GUARD_MASK;
+
+ WARN(!count && (new & ~CLOSURE_DESTRUCTOR),
+ "closure %ps ref hit 0 with incorrect flags set: %x (%u)",
+ cl->fn,
+ new, (unsigned) __fls(new));
}
static inline void closure_put_after_sub(struct closure *cl, int flags)
{
- closure_put_after_sub_checks(flags);
+ closure_val_checks(cl, flags);
if (!(flags & CLOSURE_REMAINING_MASK)) {
smp_acquire__after_ctrl_dep();
@@ -167,7 +169,7 @@ void __sched closure_return_sync(struct closure *cl)
unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
&cl->remaining);
- closure_put_after_sub_checks(flags);
+ closure_val_checks(cl, flags);
if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
while (1) {
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 2/8] closures: closure_sub() uses cmpxchg
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 1/8] closures: Improve closure_put_after_sub_checks Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 3/8] closures: CLOSURE_SLEEPING Kent Overstreet
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet
Part one of fixing a race in __closure_sync_timeout().
lib/closure.c was initially designed with the assumption that refs only
hit 0 once, and when they hit 0 the thread that caused it to hit 0 now
owns it - which avoids many races without the need for cmpxchg.
But __closure_sync_timeout() broke this, because the timeout may elapse
and the thread doing the sync may race with the final put.
Switch to cmpxchg for manipulating cl->remaining; the next patch will
fix some issues with the other thread's closure_put() accessing
variables in struct closure after that final put.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/closure.h | 9 ++++-
lib/closure.c | 79 ++++++++++++++++++++++-------------------
2 files changed, 50 insertions(+), 38 deletions(-)
diff --git a/include/linux/closure.h b/include/linux/closure.h
index a6fcc33fafce..f626044d6ca2 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -169,11 +169,18 @@ struct closure {
};
void closure_sub(struct closure *cl, int v);
-void closure_put(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void __closure_sync(struct closure *cl);
+/*
+ * closure_put - decrement a closure's refcount
+ */
+static inline void closure_put(struct closure *cl)
+{
+ closure_sub(cl, 1);
+}
+
static inline unsigned closure_nr_remaining(struct closure *cl)
{
return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
diff --git a/lib/closure.c b/lib/closure.c
index 5fafc8b0e99d..21fadd12093c 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,14 +13,14 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static void closure_val_checks(struct closure *cl, unsigned new)
+static void closure_val_checks(struct closure *cl, unsigned new, int d)
{
unsigned count = new & CLOSURE_REMAINING_MASK;
if (WARN(new & CLOSURE_GUARD_MASK,
- "closure %ps has guard bits set: %x (%u)",
+ "closure %ps has guard bits set: %x (%u), delta %i",
cl->fn,
- new, (unsigned) __fls(new & CLOSURE_GUARD_MASK)))
+ new, (unsigned) __fls(new & CLOSURE_GUARD_MASK), d))
new &= ~CLOSURE_GUARD_MASK;
WARN(!count && (new & ~CLOSURE_DESTRUCTOR),
@@ -29,49 +29,54 @@ static void closure_val_checks(struct closure *cl, unsigned new)
new, (unsigned) __fls(new));
}
-static inline void closure_put_after_sub(struct closure *cl, int flags)
-{
- closure_val_checks(cl, flags);
+enum new_closure_state {
+ CLOSURE_normal_put,
+ CLOSURE_requeue,
+ CLOSURE_done,
+};
- if (!(flags & CLOSURE_REMAINING_MASK)) {
- smp_acquire__after_ctrl_dep();
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
+{
+ enum new_closure_state s;
- cl->closure_get_happened = false;
+ int old = atomic_read(&cl->remaining), new;
+ do {
+ new = old - v;
- if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
- atomic_set(&cl->remaining,
- CLOSURE_REMAINING_INITIALIZER);
- closure_queue(cl);
+ if (new & CLOSURE_REMAINING_MASK) {
+ s = CLOSURE_normal_put;
} else {
- struct closure *parent = cl->parent;
- closure_fn *destructor = cl->fn;
+ if (cl->fn && !(new & CLOSURE_DESTRUCTOR)) {
+ s = CLOSURE_requeue;
+ new += CLOSURE_REMAINING_INITIALIZER;
+ } else
+ s = CLOSURE_done;
+ }
- closure_debug_destroy(cl);
+ closure_val_checks(cl, new, -v);
+ } while (!atomic_try_cmpxchg_release(&cl->remaining, &old, new));
- if (destructor)
- destructor(&cl->work);
+ if (s == CLOSURE_normal_put)
+ return;
- if (parent)
- closure_put(parent);
- }
- }
-}
+ if (s == CLOSURE_requeue) {
+ cl->closure_get_happened = false;
+ closure_queue(cl);
+ } else {
+ struct closure *parent = cl->parent;
+ closure_fn *destructor = cl->fn;
-/* For clearing flags with the same atomic op as a put */
-void closure_sub(struct closure *cl, int v)
-{
- closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
-}
-EXPORT_SYMBOL(closure_sub);
+ closure_debug_destroy(cl);
-/*
- * closure_put - decrement a closure's refcount
- */
-void closure_put(struct closure *cl)
-{
- closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
+ if (destructor)
+ destructor(&cl->work);
+
+ if (parent)
+ closure_put(parent);
+ }
}
-EXPORT_SYMBOL(closure_put);
+EXPORT_SYMBOL(closure_sub);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -169,7 +174,7 @@ void __sched closure_return_sync(struct closure *cl)
unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
&cl->remaining);
- closure_val_checks(cl, flags);
+ closure_val_checks(cl, flags, 1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR);
if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
while (1) {
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 3/8] closures: CLOSURE_SLEEPING
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 1/8] closures: Improve closure_put_after_sub_checks Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 2/8] closures: closure_sub() uses cmpxchg Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 4/8] closures: kill closure.closure_get_happened Kent Overstreet
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet
Part two of fixing __closure_sync_timeout().
Add a bit in cl->remaining (protected by cmpxchg()) for indicating
whether a closure has a sleeper that should be woken up after the final
put; we can now read the pointer to the waiting task_struct before the
final put
This elimating accesses to another thread's closure after the final put,
which are racy because it may be unwinding after the timeout elapses.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/closure.h | 9 +--
lib/closure.c | 118 +++++++++++++++++++---------------------
2 files changed, 62 insertions(+), 65 deletions(-)
diff --git a/include/linux/closure.h b/include/linux/closure.h
index f626044d6ca2..d0195570514a 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -128,14 +128,15 @@ enum closure_state {
* annotate where references are being transferred.
*/
- CLOSURE_BITS_START = (1U << 26),
- CLOSURE_DESTRUCTOR = (1U << 26),
+ CLOSURE_BITS_START = (1U << 24),
+ CLOSURE_DESTRUCTOR = (1U << 24),
+ CLOSURE_SLEEPING = (1U << 26),
CLOSURE_WAITING = (1U << 28),
CLOSURE_RUNNING = (1U << 30),
};
#define CLOSURE_GUARD_MASK \
- (((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)|(CLOSURE_BITS_START >> 1))
+ (((CLOSURE_DESTRUCTOR|CLOSURE_SLEEPING|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)|(CLOSURE_BITS_START >> 1))
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
@@ -144,7 +145,7 @@ struct closure {
union {
struct {
struct workqueue_struct *wq;
- struct closure_syncer *s;
+ struct task_struct *sleeper;
struct llist_node list;
closure_fn *fn;
};
diff --git a/lib/closure.c b/lib/closure.c
index 21fadd12093c..c49d49916788 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -23,7 +23,7 @@ static void closure_val_checks(struct closure *cl, unsigned new, int d)
new, (unsigned) __fls(new & CLOSURE_GUARD_MASK), d))
new &= ~CLOSURE_GUARD_MASK;
- WARN(!count && (new & ~CLOSURE_DESTRUCTOR),
+ WARN(!count && (new & ~(CLOSURE_DESTRUCTOR|CLOSURE_SLEEPING)),
"closure %ps ref hit 0 with incorrect flags set: %x (%u)",
cl->fn,
new, (unsigned) __fls(new));
@@ -39,19 +39,27 @@ enum new_closure_state {
void closure_sub(struct closure *cl, int v)
{
enum new_closure_state s;
+ struct task_struct *sleeper;
- int old = atomic_read(&cl->remaining), new;
+ /* rcu_read_lock, atomic_read_acquire() are both for cl->sleeper: */
+ guard(rcu)();
+
+ int old = atomic_read_acquire(&cl->remaining), new;
do {
new = old - v;
if (new & CLOSURE_REMAINING_MASK) {
s = CLOSURE_normal_put;
} else {
- if (cl->fn && !(new & CLOSURE_DESTRUCTOR)) {
+ if ((cl->fn || (new & CLOSURE_SLEEPING)) &&
+ !(new & CLOSURE_DESTRUCTOR)) {
s = CLOSURE_requeue;
new += CLOSURE_REMAINING_INITIALIZER;
} else
s = CLOSURE_done;
+
+ sleeper = new & CLOSURE_SLEEPING ? cl->sleeper : NULL;
+ new &= ~CLOSURE_SLEEPING;
}
closure_val_checks(cl, new, -v);
@@ -60,6 +68,12 @@ void closure_sub(struct closure *cl, int v)
if (s == CLOSURE_normal_put)
return;
+ if (sleeper) {
+ smp_mb();
+ wake_up_process(sleeper);
+ return;
+ }
+
if (s == CLOSURE_requeue) {
cl->closure_get_happened = false;
closure_queue(cl);
@@ -114,41 +128,25 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
cl->closure_get_happened = true;
closure_set_waiting(cl, _RET_IP_);
- atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
+ unsigned r = atomic_add_return(CLOSURE_WAITING + 1, &cl->remaining);
+ closure_val_checks(cl, r, CLOSURE_WAITING + 1);
+
llist_add(&cl->list, &waitlist->list);
return true;
}
EXPORT_SYMBOL(closure_wait);
-struct closure_syncer {
- struct task_struct *task;
- int done;
-};
-
-static CLOSURE_CALLBACK(closure_sync_fn)
-{
- struct closure *cl = container_of(ws, struct closure, work);
- struct closure_syncer *s = cl->s;
- struct task_struct *p;
-
- rcu_read_lock();
- p = READ_ONCE(s->task);
- s->done = 1;
- wake_up_process(p);
- rcu_read_unlock();
-}
-
void __sched __closure_sync(struct closure *cl)
{
- struct closure_syncer s = { .task = current };
-
- cl->s = &s;
- continue_at(cl, closure_sync_fn, NULL);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_SLEEPING);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
+ if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
break;
schedule();
}
@@ -162,31 +160,25 @@ EXPORT_SYMBOL(__closure_sync);
* for outstanding get()s to finish) and returning once closure refcount is 0.
*
* Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
- * closure_get_not_zero() calls waill fail.
+ * closure_get_not_zero() calls will fail.
*/
void __sched closure_return_sync(struct closure *cl)
{
- struct closure_syncer s = { .task = current };
-
- cl->s = &s;
- set_closure_fn(cl, closure_sync_fn, NULL);
-
- unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
- &cl->remaining);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_DESTRUCTOR -
+ CLOSURE_SLEEPING);
- closure_val_checks(cl, flags, 1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR);
-
- if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
- break;
- schedule();
- }
-
- __set_current_state(TASK_RUNNING);
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
+ break;
+ schedule();
}
+ __set_current_state(TASK_RUNNING);
+
if (cl->parent)
closure_put(cl->parent);
}
@@ -194,31 +186,35 @@ EXPORT_SYMBOL(closure_return_sync);
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
- struct closure_syncer s = { .task = current };
int ret = 0;
- cl->s = &s;
- continue_at(cl, closure_sync_fn, NULL);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_SLEEPING);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
- break;
+ /*
+ * Carefully undo the continue_at() - but only if it
+ * hasn't completed, i.e. the final closure_put() hasn't
+ * happened yet:
+ */
+ unsigned old = atomic_read(&cl->remaining), new;
+ if (!(old & CLOSURE_SLEEPING))
+ goto success;
+
if (!timeout) {
- /*
- * Carefully undo the continue_at() - but only if it
- * hasn't completed, i.e. the final closure_put() hasn't
- * happened yet:
- */
- unsigned old, new, v = atomic_read(&cl->remaining);
do {
- old = v;
- if (!old || (old & CLOSURE_RUNNING))
+ if (!(old & CLOSURE_SLEEPING))
goto success;
- new = old + CLOSURE_REMAINING_INITIALIZER;
- } while ((v = atomic_cmpxchg(&cl->remaining, old, new)) != old);
+ new = old + CLOSURE_REMAINING_INITIALIZER - CLOSURE_SLEEPING;
+ closure_val_checks(cl, new, CLOSURE_REMAINING_INITIALIZER - CLOSURE_SLEEPING);
+ } while (!atomic_try_cmpxchg(&cl->remaining, &old, new));
+
ret = -ETIME;
+ break;
}
timeout = schedule_timeout(timeout);
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 4/8] closures: kill closure.closure_get_happened
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (2 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 3/8] closures: CLOSURE_SLEEPING Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 5/8] lib: Give closures, min_heap config opts names Kent Overstreet
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet
With closure_put() now using cmpxchg, this is no longer needed.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_foreground.h | 2 +-
fs/bcachefs/fs-io-direct.c | 1 -
include/linux/closure.h | 15 ++-------------
lib/closure.c | 2 --
4 files changed, 3 insertions(+), 17 deletions(-)
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 1b3fc8460096..5a6f0dfe3df5 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -311,7 +311,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
- if (cl->closure_get_happened)
+ if (closure_nr_remaining(cl) > 1)
__bch2_wait_on_allocator(c, cl);
}
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 1f5154d9676b..e306eba734a1 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -117,7 +117,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
} else {
atomic_set(&dio->cl.remaining,
CLOSURE_REMAINING_INITIALIZER + 1);
- dio->cl.closure_get_happened = true;
}
dio->req = req;
diff --git a/include/linux/closure.h b/include/linux/closure.h
index d0195570514a..83a0dde389bc 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -155,7 +155,6 @@ struct closure {
struct closure *parent;
atomic_t remaining;
- bool closure_get_happened;
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
@@ -195,11 +194,7 @@ static inline unsigned closure_nr_remaining(struct closure *cl)
*/
static inline void closure_sync(struct closure *cl)
{
-#ifdef CONFIG_DEBUG_CLOSURES
- BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
-#endif
-
- if (cl->closure_get_happened)
+ if (closure_nr_remaining(cl) > 1)
__closure_sync(cl);
}
@@ -207,10 +202,7 @@ int __closure_sync_timeout(struct closure *cl, unsigned long timeout);
static inline int closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
-#ifdef CONFIG_DEBUG_CLOSURES
- BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
-#endif
- return cl->closure_get_happened
+ return closure_nr_remaining(cl) > 1
? __closure_sync_timeout(cl, timeout)
: 0;
}
@@ -283,8 +275,6 @@ static inline void closure_queue(struct closure *cl)
*/
static inline void closure_get(struct closure *cl)
{
- cl->closure_get_happened = true;
-
#ifdef CONFIG_DEBUG_CLOSURES
BUG_ON((atomic_inc_return(&cl->remaining) &
CLOSURE_REMAINING_MASK) <= 1);
@@ -322,7 +312,6 @@ static inline void closure_init(struct closure *cl, struct closure *parent)
closure_get(parent);
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
- cl->closure_get_happened = false;
closure_debug_create(cl);
closure_set_ip(cl);
diff --git a/lib/closure.c b/lib/closure.c
index c49d49916788..f1b4a797c9db 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -75,7 +75,6 @@ void closure_sub(struct closure *cl, int v)
}
if (s == CLOSURE_requeue) {
- cl->closure_get_happened = false;
closure_queue(cl);
} else {
struct closure *parent = cl->parent;
@@ -126,7 +125,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
return false;
- cl->closure_get_happened = true;
closure_set_waiting(cl, _RET_IP_);
unsigned r = atomic_add_return(CLOSURE_WAITING + 1, &cl->remaining);
closure_val_checks(cl, r, CLOSURE_WAITING + 1);
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 5/8] lib: Give closures, min_heap config opts names
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (3 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 4/8] closures: kill closure.closure_get_happened Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 6/8] lib: Give XOR_BLOCKS, RAID6_PQ " Kent Overstreet
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet, Eric Biggers
Give these config options names so that they show up under the "Library
routines" kernel configuration menu, and can be enabled by
distributions.
These are needed for bcachefs to be built out of tree.
The closures and min_heap code originates from bcache and bcachefs.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
lib/Kconfig | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/Kconfig b/lib/Kconfig
index c483951b624f..badcb5ca9efd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -367,7 +367,7 @@ config ASSOCIATIVE_ARRAY
for more information.
config CLOSURES
- bool
+ bool "bcache/bcachefs async widgets"
config HAS_IOMEM
bool
@@ -646,4 +646,4 @@ config UNION_FIND
bool
config MIN_HEAP
- bool
+ bool "Generic minheap algorithm"
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 6/8] lib: Give XOR_BLOCKS, RAID6_PQ config opts names
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (4 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 5/8] lib: Give closures, min_heap config opts names Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 7/8] lib: Give compression, checksum, crypto " Kent Overstreet
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet, NeilBrown, Eric Biggers
Give these config options names so that they show up under the "Library
routines" kernel configuration menu, and can be enabled by
distributions.
These libraries are needed for bcachefs to be built out of tree.
Per Neal - "the proposed change seems appropriate in that it makes our
code more available and hence more useful. It has no apparent cost and
I can (now) see no better way to achieve a comparable outcome."
These libraries are both for RAID5/6.
Reviewed-By: NeilBrown <neil@brown.name>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
crypto/Kconfig | 2 +-
lib/Kconfig | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 23bd98981ae8..da4f072abae0 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -3,7 +3,7 @@
# Generic algorithms support
#
config XOR_BLOCKS
- tristate
+ tristate "Accelerated block xor algorithm"
#
# async_tx api: hardware offloaded memory transfer/transform support
diff --git a/lib/Kconfig b/lib/Kconfig
index badcb5ca9efd..e831f4462453 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -9,7 +9,9 @@ config BINARY_PRINTF
menu "Library routines"
config RAID6_PQ
- tristate
+ tristate "Reed-solomon RAID5/6 algorithms"
+ help
+ Provides routines for block level reed-solomon, for RAID5/6
config RAID6_PQ_BENCHMARK
bool "Automatically choose fastest RAID6 PQ functions"
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 7/8] lib: Give compression, checksum, crypto config opts names
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (5 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 6/8] lib: Give XOR_BLOCKS, RAID6_PQ " Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-12 21:24 ` [PATCH v2 8/8] generix-radix-tree: Overflow checking Kent Overstreet
2025-10-13 6:38 ` [PATCH v2 0/8] bcachefs out-of-tree series Christoph Hellwig
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet, Eric Biggers
Give these config options names so that they show up under the "Library
routines" kernel configuration menu, and can be enabled by
distributions.
These are needed for bcachefs to be built out of tree.
Most (but not all, notably CRC64) of these libraries have crypto API
wrappers which are already selectable under the "Cryptographic API"
configuration menu; however, Eric Biggers has been working to reduce
usage of the cryptographic API in favor of much more lightweight direct
APIs.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
lib/Kconfig | 16 ++++++++--------
lib/crc/Kconfig | 4 ++--
lib/crypto/Kconfig | 4 ++--
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/lib/Kconfig b/lib/Kconfig
index e831f4462453..eb3b6b96b303 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -142,7 +142,7 @@ source "lib/crc/Kconfig"
source "lib/crypto/Kconfig"
config XXHASH
- tristate
+ tristate "XXHASH checksum algorithm"
config AUDIT_GENERIC
bool
@@ -176,10 +176,10 @@ config 842_DECOMPRESS
tristate
config ZLIB_INFLATE
- tristate
+ tristate "Zlib decompression"
config ZLIB_DEFLATE
- tristate
+ tristate "Zlib compression"
select BITREVERSE
config ZLIB_DFLTCC
@@ -196,13 +196,13 @@ config LZO_DECOMPRESS
tristate
config LZ4_COMPRESS
- tristate
+ tristate "LZ4 compression"
config LZ4HC_COMPRESS
- tristate
+ tristate "LZ4HC compression"
config LZ4_DECOMPRESS
- tristate
+ tristate "LZC decompression"
config ZSTD_COMMON
select XXHASH
@@ -210,11 +210,11 @@ config ZSTD_COMMON
config ZSTD_COMPRESS
select ZSTD_COMMON
- tristate
+ tristate "ZSTD compression"
config ZSTD_DECOMPRESS
select ZSTD_COMMON
- tristate
+ tristate "ZSTD decompression"
source "lib/xz/Kconfig"
diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig
index 70e7a6016de3..3a3d6290211b 100644
--- a/lib/crc/Kconfig
+++ b/lib/crc/Kconfig
@@ -54,7 +54,7 @@ config CRC_T10DIF_ARCH
default y if X86
config CRC32
- tristate
+ tristate "CRC32 algorithm"
select BITREVERSE
help
The CRC32 library functions. Select this if your module uses any of
@@ -74,7 +74,7 @@ config CRC32_ARCH
default y if X86
config CRC64
- tristate
+ tristate "CRC64 algorithm"
help
The CRC64 library functions. Select this if your module uses any of
the functions from <linux/crc64.h>.
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 1e6b008f8fca..51e4e8d3b793 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -62,7 +62,7 @@ config CRYPTO_LIB_CHACHA_GENERIC
enabled, this implementation serves the users of CRYPTO_LIB_CHACHA.
config CRYPTO_LIB_CHACHA
- tristate
+ tristate "Chacha encryption algorithm"
help
Enable the ChaCha library interface. This interface may be fulfilled
by either the generic implementation or an arch-specific one, if one
@@ -125,7 +125,7 @@ config CRYPTO_LIB_POLY1305_GENERIC
enabled, this implementation serves the users of CRYPTO_LIB_POLY1305.
config CRYPTO_LIB_POLY1305
- tristate
+ tristate "Poly1305 authentication algorithm"
help
Enable the Poly1305 library interface. This interface may be fulfilled
by either the generic implementation or an arch-specific one, if one
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 8/8] generix-radix-tree: Overflow checking
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (6 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 7/8] lib: Give compression, checksum, crypto " Kent Overstreet
@ 2025-10-12 21:24 ` Kent Overstreet
2025-10-13 6:38 ` [PATCH v2 0/8] bcachefs out-of-tree series Christoph Hellwig
8 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2025-10-12 21:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, Kent Overstreet
Internally, genradixes index based on the byte offset into the radix
tree - index * obj_size - which means accidental overflows are a real
concern.
This was noticed in bcachefs where we were using them to index inode
numbers, for hardlink checking/detection.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/generic-radix-tree.h | 110 +++++++++++++++++------------
1 file changed, 65 insertions(+), 45 deletions(-)
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index 5b51c3d582d6..52b012efe8f4 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -155,20 +155,27 @@ void __genradix_free(struct __genradix *);
*/
#define genradix_free(_radix) __genradix_free(&(_radix)->tree)
-static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+static inline bool __idx_to_offset(size_t idx, size_t obj_size, size_t *offset)
{
+ /*
+ * XXX: check for overflow, we have a bug in multiple places where we
+ * assume idx fits in 64 bits (because it's an inode number; already a
+ * bug on 32 bit) - but we then multiply by the object size and we
+ * overflow
+ */
if (__builtin_constant_p(obj_size))
BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE);
else
BUG_ON(obj_size > GENRADIX_NODE_SIZE);
if (!is_power_of_2(obj_size)) {
- size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size;
+ size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size, node, node_offset;
- return (idx / objs_per_page) * GENRADIX_NODE_SIZE +
- (idx % objs_per_page) * obj_size;
+ return check_mul_overflow(idx / objs_per_page, GENRADIX_NODE_SIZE, &node) ? true :
+ check_mul_overflow(idx % objs_per_page, obj_size, &node_offset) ? true :
+ check_add_overflow(node, node_offset, offset);
} else {
- return idx * obj_size;
+ return check_mul_overflow(idx, obj_size, offset);
}
}
@@ -179,8 +186,8 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
#define __genradix_page_remainder(_radix) \
(GENRADIX_NODE_SIZE % sizeof((_radix)->type[0]))
-#define __genradix_idx_to_offset(_radix, _idx) \
- __idx_to_offset(_idx, __genradix_obj_size(_radix))
+#define __genradix_idx_to_offset(_radix, _idx, _offset) \
+ __idx_to_offset(_idx, __genradix_obj_size(_radix), _offset)
static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset)
{
@@ -202,9 +209,13 @@ static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offs
}
#define genradix_ptr_inlined(_radix, _idx) \
- (__genradix_cast(_radix) \
- __genradix_ptr_inlined(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx)))
+({ \
+ size_t _offset; \
+ !__genradix_idx_to_offset(_radix, _idx, &_offset) \
+ ? __genradix_cast(_radix) \
+ __genradix_ptr_inlined(&(_radix)->tree, _offset) \
+ : NULL; \
+})
void *__genradix_ptr(struct __genradix *, size_t);
@@ -216,28 +227,39 @@ void *__genradix_ptr(struct __genradix *, size_t);
* Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
*/
#define genradix_ptr(_radix, _idx) \
- (__genradix_cast(_radix) \
- __genradix_ptr(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx)))
+({ \
+ size_t _offset; \
+ !__genradix_idx_to_offset(_radix, _idx, &_offset) \
+ ? __genradix_cast(_radix) \
+ __genradix_ptr(&(_radix)->tree, _offset) \
+ : NULL; \
+})
void *__genradix_ptr_alloc(struct __genradix *, size_t,
struct genradix_node **, gfp_t);
-#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \
- (__genradix_cast(_radix) \
- (__genradix_ptr_inlined(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx)) ?: \
- __genradix_ptr_alloc(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx), \
- NULL, _gfp)))
-
#define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\
- (__genradix_cast(_radix) \
- (__genradix_ptr_inlined(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx)) ?: \
- __genradix_ptr_alloc(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx), \
- _new_node, _gfp)))
+({ \
+ size_t _offset; \
+ !__genradix_idx_to_offset(_radix, _idx, &_offset) \
+ ? __genradix_cast(_radix) \
+ (__genradix_ptr_inlined(&(_radix)->tree, _offset) ?: \
+ __genradix_ptr_alloc(&(_radix)->tree, _offset, \
+ _new_node, _gfp)) \
+ : NULL; \
+})
+
+#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \
+ genradix_ptr_alloc_preallocated_inlined(_radix, _idx, NULL, _gfp)
+
+#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp) \
+({ \
+ size_t _offset; \
+ !__genradix_idx_to_offset(_radix, _idx, &_offset) \
+ ? __genradix_cast(_radix) \
+ __genradix_ptr_alloc(&(_radix)->tree, _offset, _new_node, _gfp) \
+ : NULL; \
+})
/**
* genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
@@ -248,17 +270,8 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t,
*
* Returns a pointer to entry at @_idx, or NULL on allocation failure
*/
-#define genradix_ptr_alloc(_radix, _idx, _gfp) \
- (__genradix_cast(_radix) \
- __genradix_ptr_alloc(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx), \
- NULL, _gfp))
-
-#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\
- (__genradix_cast(_radix) \
- __genradix_ptr_alloc(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _idx), \
- _new_node, _gfp))
+#define genradix_ptr_alloc(_radix, _idx, _gfp) \
+ genradix_ptr_alloc_preallocated(_radix, _idx, NULL, _gfp)
struct genradix_iter {
size_t offset;
@@ -271,10 +284,15 @@ struct genradix_iter {
* @_idx: index to start iterating from
*/
#define genradix_iter_init(_radix, _idx) \
- ((struct genradix_iter) { \
+({ \
+ size_t _offset; \
+ __genradix_idx_to_offset(_radix, _idx, &_offset); \
+ \
+ (struct genradix_iter) { \
.pos = (_idx), \
- .offset = __genradix_idx_to_offset((_radix), (_idx)),\
- })
+ .offset = _offset, \
+ }; \
+})
void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
@@ -394,9 +412,11 @@ int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
* Returns 0 on success, -ENOMEM on failure
*/
#define genradix_prealloc(_radix, _nr, _gfp) \
- __genradix_prealloc(&(_radix)->tree, \
- __genradix_idx_to_offset(_radix, _nr + 1),\
- _gfp)
-
+({ \
+ size_t _offset; \
+ !__genradix_idx_to_offset(_radix, _nr + 1, &_offset) \
+ ? __genradix_prealloc(&(_radix)->tree, _offset, _gfp) \
+ : -ENOMEM; \
+})
#endif /* _LINUX_GENERIC_RADIX_TREE_H */
--
2.51.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 0/8] bcachefs out-of-tree series
2025-10-12 21:24 [PATCH v2 0/8] bcachefs out-of-tree series Kent Overstreet
` (7 preceding siblings ...)
2025-10-12 21:24 ` [PATCH v2 8/8] generix-radix-tree: Overflow checking Kent Overstreet
@ 2025-10-13 6:38 ` Christoph Hellwig
8 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2025-10-13 6:38 UTC (permalink / raw)
To: Kent Overstreet; +Cc: akpm, linux-kernel, Greg Kroah-Hartman
Hi Kent,
just as explained last time Kconfig adjustments are a no-go. If you
want to submit fixes to other code do that, but don't mix these already
NAKed bits in.
On Sun, Oct 12, 2025 at 05:24:03PM -0400, Kent Overstreet wrote:
> Now includes a genradix patch adding overflow checks that ought to make
> Kees happy, if he sees it. Otherwise - just commit message fixups.
>
> (I'm the author and listed maintainer of that code).
>
> Kent Overstreet (8):
> closures: Improve closure_put_after_sub_checks
> closures: closure_sub() uses cmpxchg
> closures: CLOSURE_SLEEPING
> closures: kill closure.closure_get_happened
> lib: Give closures, min_heap config opts names
> lib: Give XOR_BLOCKS, RAID6_PQ config opts names
> lib: Give compression, checksum, crypto config opts names
> generix-radix-tree: Overflow checking
>
> crypto/Kconfig | 2 +-
> fs/bcachefs/alloc_foreground.h | 2 +-
> fs/bcachefs/fs-io-direct.c | 1 -
> include/linux/closure.h | 33 +++--
> include/linux/generic-radix-tree.h | 110 +++++++++-------
> lib/Kconfig | 24 ++--
> lib/closure.c | 203 +++++++++++++++--------------
> lib/crc/Kconfig | 4 +-
> lib/crypto/Kconfig | 4 +-
> 9 files changed, 201 insertions(+), 182 deletions(-)
>
> --
> 2.51.0
>
>
---end quoted text---
^ permalink raw reply [flat|nested] 10+ messages in thread