From: Tal Zussman <tz2294@columbia.edu>
To: Jens Axboe <axboe@kernel.dk>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
Christian Brauner <brauner@kernel.org>,
"Darrick J. Wong" <djwong@kernel.org>,
Carlos Maiolino <cem@kernel.org>,
Alexander Viro <viro@zeniv.linux.org.uk>, Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@infradead.org>,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, Tal Zussman <tz2294@columbia.edu>
Subject: [PATCH RFC v4 1/3] block: add BIO_COMPLETE_IN_TASK for task-context completion
Date: Wed, 25 Mar 2026 14:43:00 -0400 [thread overview]
Message-ID: <20260325-blk-dontcache-v4-1-c4b56db43f64@columbia.edu> (raw)
In-Reply-To: <20260325-blk-dontcache-v4-0-c4b56db43f64@columbia.edu>
Some bio completion handlers need to run in task context but bio_endio()
can be called from IRQ context (e.g. buffer_head writeback). Add a
BIO_COMPLETE_IN_TASK flag that bio submitters can set to request
task-context completion of their bi_end_io callback.
When bio_endio() sees this flag and is running in non-task context, it
queues the bio to a per-cpu list and schedules a work item to call
bi_end_io() from task context. A CPU hotplug dead callback drains any
remaining bios from the departing CPU's batch.
This will be used to enable RWF_DONTCACHE for block devices, and could
be used for other subsystems like fscrypt that need task-context bio
completion.
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
block/bio.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/blk_types.h | 1 +
2 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/block/bio.c b/block/bio.c
index 8203bb7455a9..69ee0d93041f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -18,6 +18,7 @@
#include <linux/highmem.h>
#include <linux/blk-crypto.h>
#include <linux/xarray.h>
+#include <linux/local_lock.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -1714,6 +1715,60 @@ void bio_check_pages_dirty(struct bio *bio)
}
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
+struct bio_complete_batch {
+ local_lock_t lock;
+ struct bio_list list;
+ struct work_struct work;
+};
+
+static DEFINE_PER_CPU(struct bio_complete_batch, bio_complete_batch) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
+
+static void bio_complete_work_fn(struct work_struct *w)
+{
+ struct bio_complete_batch *batch;
+ struct bio_list list;
+
+again:
+ local_lock_irq(&bio_complete_batch.lock);
+ batch = this_cpu_ptr(&bio_complete_batch);
+ list = batch->list;
+ bio_list_init(&batch->list);
+ local_unlock_irq(&bio_complete_batch.lock);
+
+ while (!bio_list_empty(&list)) {
+ struct bio *bio = bio_list_pop(&list);
+ bio->bi_end_io(bio);
+ }
+
+ local_lock_irq(&bio_complete_batch.lock);
+ batch = this_cpu_ptr(&bio_complete_batch);
+ if (!bio_list_empty(&batch->list)) {
+ local_unlock_irq(&bio_complete_batch.lock);
+
+ if (!need_resched())
+ goto again;
+
+ schedule_work_on(smp_processor_id(), &batch->work);
+ return;
+ }
+ local_unlock_irq(&bio_complete_batch.lock);
+}
+
+static void bio_queue_completion(struct bio *bio)
+{
+ struct bio_complete_batch *batch;
+ unsigned long flags;
+
+ local_lock_irqsave(&bio_complete_batch.lock, flags);
+ batch = this_cpu_ptr(&bio_complete_batch);
+ bio_list_add(&batch->list, bio);
+ local_unlock_irqrestore(&bio_complete_batch.lock, flags);
+
+ schedule_work_on(smp_processor_id(), &batch->work);
+}
+
static inline bool bio_remaining_done(struct bio *bio)
{
/*
@@ -1788,7 +1843,9 @@ void bio_endio(struct bio *bio)
}
#endif
- if (bio->bi_end_io)
+ if (!in_task() && bio_flagged(bio, BIO_COMPLETE_IN_TASK))
+ bio_queue_completion(bio);
+ else if (bio->bi_end_io)
bio->bi_end_io(bio);
}
EXPORT_SYMBOL(bio_endio);
@@ -1974,6 +2031,21 @@ int bioset_init(struct bio_set *bs,
}
EXPORT_SYMBOL(bioset_init);
+/*
+ * Drain a dead CPU's deferred bio completions. The CPU is dead so no locking
+ * is needed -- no new bios will be queued to it.
+ */
+static int bio_complete_batch_cpu_dead(unsigned int cpu)
+{
+ struct bio_complete_batch *batch = per_cpu_ptr(&bio_complete_batch, cpu);
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&batch->list)))
+ bio->bi_end_io(bio);
+
+ return 0;
+}
+
static int __init init_bio(void)
{
int i;
@@ -1988,6 +2060,16 @@ static int __init init_bio(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
}
+ for_each_possible_cpu(i) {
+ struct bio_complete_batch *batch =
+ per_cpu_ptr(&bio_complete_batch, i);
+
+ bio_list_init(&batch->list);
+ INIT_WORK(&batch->work, bio_complete_work_fn);
+ }
+
+ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "block/bio:complete:dead",
+ NULL, bio_complete_batch_cpu_dead);
cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
bio_cpu_dead);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8808ee76e73c..d49d97a050d0 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -322,6 +322,7 @@ enum {
BIO_REMAPPED,
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
+ BIO_COMPLETE_IN_TASK, /* complete bi_end_io() in task context */
BIO_FLAG_LAST
};
--
2.39.5
next prev parent reply other threads:[~2026-03-25 19:38 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-25 18:42 [PATCH RFC v4 0/3] block: enable RWF_DONTCACHE for block devices Tal Zussman
2026-03-25 18:43 ` Tal Zussman [this message]
2026-03-25 19:54 ` [PATCH RFC v4 1/3] block: add BIO_COMPLETE_IN_TASK for task-context completion Matthew Wilcox
2026-03-25 20:14 ` Jens Axboe
2026-03-25 20:26 ` Dave Chinner
2026-03-25 20:39 ` Matthew Wilcox
2026-03-26 2:44 ` Dave Chinner
2026-03-25 21:03 ` Bart Van Assche
2026-03-26 3:18 ` Dave Chinner
2026-03-25 18:43 ` [PATCH RFC v4 2/3] iomap: use BIO_COMPLETE_IN_TASK for dropbehind writeback Tal Zussman
2026-03-25 20:21 ` Matthew Wilcox
2026-03-25 20:34 ` Dave Chinner
2026-03-25 18:43 ` [PATCH RFC v4 3/3] block: enable RWF_DONTCACHE for block devices Tal Zussman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260325-blk-dontcache-v4-1-c4b56db43f64@columbia.edu \
--to=tz2294@columbia.edu \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=cem@kernel.org \
--cc=djwong@kernel.org \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox