* [PATCH v6 1/4] block: add task-context bio completion infrastructure
2026-05-14 21:51 [PATCH v6 0/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
@ 2026-05-14 21:51 ` Tal Zussman
2026-05-15 2:38 ` Hillf Danton
2026-05-14 21:51 ` [PATCH v6 2/4] iomap: use BIO_COMPLETE_IN_TASK for dropbehind writeback Tal Zussman
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Tal Zussman @ 2026-05-14 21:51 UTC (permalink / raw)
To: Jens Axboe, Matthew Wilcox (Oracle), Christian Brauner,
Darrick J. Wong, Carlos Maiolino, Alexander Viro, Jan Kara,
Christoph Hellwig
Cc: Dave Chinner, Bart Van Assche, linux-block, linux-kernel,
linux-xfs, linux-fsdevel, linux-mm, Gao Xiang, Tal Zussman
Some bio completion handlers need to run from preemptible task context,
but bio_endio() may be called from IRQ context (e.g., buffer_head
writeback). Callers need a way to ensure their callback eventually runs
from a sleepable context. Add infrastructure for that, in two forms:
1. BIO_COMPLETE_IN_TASK, a bio flag the submitter sets when it knows
in advance that its callback needs task context (e.g., dropbehind
writeback). bio_endio() sees the flag and offloads completion to a
worker automatically.
2. bio_complete_in_task(), a helper that completion callbacks can
invoke from within bi_end_io() when the deferral decision is
dynamic (e.g., fserror reporting).
Both share a per-CPU batch list drained by a delayed work item on a
WQ_PERCPU workqueue. Producers push the bio onto the local CPU's batch
and schedule the work item, which then dispatches each bio's bi_end_io()
from task context. The delayed work item uses a 1-jiffie delay to allow
batches of completions to accumulate before processing.
Both methods are gated on bio_in_atomic(), which returns true in any
context where a sleeping bi_end_io() is unsafe, including
non-preemptible task context. This logic is copied from commit
c99fab6e80b7 ("erofs: fix atomic context detection when
!CONFIG_DEBUG_LOCK_ALLOC").
Two CPU hotplug callbacks are used to drain remaining bios from the
departing CPU's batch, while maintaining the per-CPU behavior. The
CPUHP_AP_ONLINE_DYN callback disables the per-CPU delayed work while the
CPU is still online, preventing it from running on an unbound worker
later. CPUHP_BP_PREPARE_DYN then drains any bios added between disabling
the work item and CPU offline.
Link: https://lore.kernel.org/all/20260409160243.1008358-1-hch@lst.de/
Suggested-by: Matthew Wilcox <willy@infradead.org>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
block/bio.c | 147 +++++++++++++++++++++++++++++++++++++++++++++-
include/linux/bio.h | 32 ++++++++++
include/linux/blk_types.h | 1 +
3 files changed, 179 insertions(+), 1 deletion(-)
diff --git a/block/bio.c b/block/bio.c
index b8972dba68a0..6864ee737400 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -19,6 +19,7 @@
#include <linux/blk-crypto.h>
#include <linux/xarray.h>
#include <linux/kmemleak.h>
+#include <linux/local_lock.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -1717,6 +1718,79 @@ void bio_check_pages_dirty(struct bio *bio)
}
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
+/*
+ * Infrastructure for deferring bio completions to task-context via a per-CPU
+ * workqueue. Triggered either by the BIO_COMPLETE_IN_TASK bio flag (static
+ * decision at submit time) or by calling bio_complete_in_task() from
+ * bi_end_io() (dynamic decision at completion time).
+ */
+
+struct bio_complete_batch {
+ local_lock_t lock;
+ struct bio_list list;
+ struct delayed_work work;
+ int cpu;
+};
+
+static DEFINE_PER_CPU(struct bio_complete_batch, bio_complete_batch) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
+static struct workqueue_struct *bio_complete_wq;
+
+static void bio_complete_work_fn(struct work_struct *w)
+{
+ struct delayed_work *dw = to_delayed_work(w);
+ struct bio_complete_batch *batch =
+ container_of(dw, struct bio_complete_batch, work);
+
+ while (1) {
+ struct bio_list list;
+ struct bio *bio;
+
+ local_lock_irq(&bio_complete_batch.lock);
+ list = batch->list;
+ bio_list_init(&batch->list);
+ local_unlock_irq(&bio_complete_batch.lock);
+
+ if (bio_list_empty(&list))
+ break;
+
+ while ((bio = bio_list_pop(&list)))
+ bio->bi_end_io(bio);
+
+ if (need_resched()) {
+ bool is_empty;
+
+ local_lock_irq(&bio_complete_batch.lock);
+ is_empty = bio_list_empty(&batch->list);
+ local_unlock_irq(&bio_complete_batch.lock);
+ if (!is_empty)
+ mod_delayed_work_on(batch->cpu,
+ bio_complete_wq,
+ &batch->work, 0);
+ break;
+ }
+ }
+}
+
+void __bio_complete_in_task(struct bio *bio)
+{
+ struct bio_complete_batch *batch;
+ unsigned long flags;
+ bool was_empty;
+
+ local_lock_irqsave(&bio_complete_batch.lock, flags);
+ batch = this_cpu_ptr(&bio_complete_batch);
+ was_empty = bio_list_empty(&batch->list);
+ bio_list_add(&batch->list, bio);
+ local_unlock_irqrestore(&bio_complete_batch.lock, flags);
+
+ if (was_empty)
+ mod_delayed_work_on(batch->cpu, bio_complete_wq,
+ &batch->work, 1);
+}
+EXPORT_SYMBOL_GPL(__bio_complete_in_task);
+
static inline bool bio_remaining_done(struct bio *bio)
{
/*
@@ -1791,7 +1865,9 @@ void bio_endio(struct bio *bio)
}
#endif
- if (bio->bi_end_io)
+ if (bio_flagged(bio, BIO_COMPLETE_IN_TASK) && bio_in_atomic())
+ __bio_complete_in_task(bio);
+ else if (bio->bi_end_io)
bio->bi_end_io(bio);
}
EXPORT_SYMBOL(bio_endio);
@@ -1977,6 +2053,51 @@ int bioset_init(struct bio_set *bs,
}
EXPORT_SYMBOL(bioset_init);
+static int bio_complete_batch_cpu_online(unsigned int cpu)
+{
+ enable_delayed_work(&per_cpu(bio_complete_batch, cpu).work);
+ return 0;
+}
+
+/*
+ * Disable this CPU's delayed work so that it cannot run on an unbound worker
+ * after the CPU is offlined.
+ */
+static int bio_complete_batch_cpu_down_prep(unsigned int cpu)
+{
+ disable_delayed_work_sync(&per_cpu(bio_complete_batch, cpu).work);
+ return 0;
+}
+
+/*
+ * Drain a dead CPU's deferred bio completions. The CPU is dead and the worker
+ * is canceled so no locking is needed.
+ */
+static int bio_complete_batch_cpu_dead(unsigned int cpu)
+{
+ struct bio_complete_batch *batch =
+ per_cpu_ptr(&bio_complete_batch, cpu);
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&batch->list)))
+ bio->bi_end_io(bio);
+
+ return 0;
+}
+
+static void __init bio_complete_batch_init(int cpu)
+{
+ struct bio_complete_batch *batch =
+ per_cpu_ptr(&bio_complete_batch, cpu);
+
+ bio_list_init(&batch->list);
+ INIT_DELAYED_WORK(&batch->work, bio_complete_work_fn);
+ batch->cpu = cpu;
+
+ if (!cpu_online(cpu))
+ disable_delayed_work_sync(&batch->work);
+}
+
static int __init init_bio(void)
{
int i;
@@ -1991,6 +2112,30 @@ static int __init init_bio(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
}
+ for_each_possible_cpu(i)
+ bio_complete_batch_init(i);
+
+ bio_complete_wq = alloc_workqueue("bio_complete",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
+ if (!bio_complete_wq)
+ panic("bio: can't allocate bio_complete workqueue\n");
+
+ /*
+ * bio task-context completion draining on hot-unplugged CPUs:
+ *
+ * 1. Stop the per-CPU delayed work while the CPU is still online, so
+ * that it cannot run on an unbound worker later.
+ * 2. Drain leftover bios added between worker disabling and CPU
+ * offlining.
+ */
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "block/bio:complete:online",
+ bio_complete_batch_cpu_online,
+ bio_complete_batch_cpu_down_prep);
+ cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
+ "block/bio:complete:dead",
+ NULL, bio_complete_batch_cpu_dead);
+
cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
bio_cpu_dead);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97d747320b35..c0214d6c28d6 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -369,6 +369,38 @@ static inline struct bio *bio_alloc(struct block_device *bdev,
void submit_bio(struct bio *bio);
+/**
+ * bio_in_atomic - check if the current context is unsafe for bio completion
+ *
+ * Return: %true in atomic contexts (e.g. hard/soft IRQ, preempt-disabled);
+ * %false when a bio can be safely completed in the current context.
+ */
+static inline bool bio_in_atomic(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth())
+ return true;
+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ return true;
+ return !preemptible();
+}
+
+void __bio_complete_in_task(struct bio *bio);
+
+/**
+ * bio_complete_in_task - ensure a bio is completed in preemptible task context
+ * @bio: bio to complete
+ *
+ * If called from non-task context, offload the bio completion to a worker
+ * thread and return %true. Else return %false and do nothing.
+ */
+static inline bool bio_complete_in_task(struct bio *bio)
+{
+ if (!bio_in_atomic())
+ return false;
+ __bio_complete_in_task(bio);
+ return true;
+}
+
extern void bio_endio(struct bio *);
static inline void bio_io_error(struct bio *bio)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8808ee76e73c..d49d97a050d0 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -322,6 +322,7 @@ enum {
BIO_REMAPPED,
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
+ BIO_COMPLETE_IN_TASK, /* complete bi_end_io() in task context */
BIO_FLAG_LAST
};
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH v6 1/4] block: add task-context bio completion infrastructure
2026-05-14 21:51 ` [PATCH v6 1/4] block: add task-context bio completion infrastructure Tal Zussman
@ 2026-05-15 2:38 ` Hillf Danton
0 siblings, 0 replies; 6+ messages in thread
From: Hillf Danton @ 2026-05-15 2:38 UTC (permalink / raw)
To: Tal Zussman
Cc: Matthew Wilcox (Oracle), Christoph Hellwig, linux-block,
linux-kernel
On Thu, 14 May 2026 17:51:14 -0400 Tal Zussman wrote:
> +
> +static void bio_complete_work_fn(struct work_struct *w)
> +{
> + struct delayed_work *dw = to_delayed_work(w);
> + struct bio_complete_batch *batch =
> + container_of(dw, struct bio_complete_batch, work);
> +
> + while (1) {
> + struct bio_list list;
> + struct bio *bio;
> +
> + local_lock_irq(&bio_complete_batch.lock);
> + list = batch->list;
> + bio_list_init(&batch->list);
> + local_unlock_irq(&bio_complete_batch.lock);
> +
> + if (bio_list_empty(&list))
> + break;
> +
> + while ((bio = bio_list_pop(&list)))
> + bio->bi_end_io(bio);
> +
> + if (need_resched()) {
> + bool is_empty;
> +
Checking resched is not needed as workqueue worker can be preempted
while processing bios.Given batch and delayed work, I suspect completing
more than batch, the bios accumulated within a jiff, makes sense.
> + local_lock_irq(&bio_complete_batch.lock);
> + is_empty = bio_list_empty(&batch->list);
> + local_unlock_irq(&bio_complete_batch.lock);
> + if (!is_empty)
> + mod_delayed_work_on(batch->cpu,
> + bio_complete_wq,
> + &batch->work, 0);
> + break;
> + }
> + }
> +}
> +
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v6 2/4] iomap: use BIO_COMPLETE_IN_TASK for dropbehind writeback
2026-05-14 21:51 [PATCH v6 0/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
2026-05-14 21:51 ` [PATCH v6 1/4] block: add task-context bio completion infrastructure Tal Zussman
@ 2026-05-14 21:51 ` Tal Zussman
2026-05-14 21:51 ` [PATCH v6 3/4] buffer: add dropbehind writeback support Tal Zussman
2026-05-14 21:51 ` [PATCH v6 4/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
3 siblings, 0 replies; 6+ messages in thread
From: Tal Zussman @ 2026-05-14 21:51 UTC (permalink / raw)
To: Jens Axboe, Matthew Wilcox (Oracle), Christian Brauner,
Darrick J. Wong, Carlos Maiolino, Alexander Viro, Jan Kara,
Christoph Hellwig
Cc: Dave Chinner, Bart Van Assche, linux-block, linux-kernel,
linux-xfs, linux-fsdevel, linux-mm, Gao Xiang, Tal Zussman
Set BIO_COMPLETE_IN_TASK on iomap writeback bios when a dropbehind folio
is added. This ensures that bi_end_io runs in task context, where
folio_end_dropbehind() can safely invalidate folios.
With the bio layer now handling task-context deferral generically,
IOMAP_IOEND_DONTCACHE is no longer needed, as XFS no longer needs to
route DONTCACHE ioends through its completion workqueue. Remove the flag
and its NOMERGE entry.
Without the NOMERGE, regular I/Os that get merged with a dropbehind
folio will also have their completion deferred to task context.
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
fs/iomap/ioend.c | 5 +++--
fs/xfs/xfs_aops.c | 4 ----
include/linux/iomap.h | 5 +----
3 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
index acf3cf98b23a..892dbfc77ae9 100644
--- a/fs/iomap/ioend.c
+++ b/fs/iomap/ioend.c
@@ -237,8 +237,6 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
- if (folio_test_dropbehind(folio))
- ioend_flags |= IOMAP_IOEND_DONTCACHE;
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
ioend_flags |= IOMAP_IOEND_BOUNDARY;
@@ -255,6 +253,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
goto new_ioend;
+ if (folio_test_dropbehind(folio))
+ bio_set_flag(&ioend->io_bio, BIO_COMPLETE_IN_TASK);
+
/*
* Clamp io_offset and io_size to the incore EOF so that ondisk
* file size updates in the ioend completion are byte-accurate.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f279055fcea0..0dcf78beae8a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -511,10 +511,6 @@ xfs_ioend_needs_wq_completion(
if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))
return true;
- /* Page cache invalidation cannot be done in irq context. */
- if (ioend->io_flags & IOMAP_IOEND_DONTCACHE)
- return true;
-
return false;
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 2c5685adf3a9..fef04e01116f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -399,16 +399,13 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
#define IOMAP_IOEND_BOUNDARY (1U << 2)
/* is direct I/O */
#define IOMAP_IOEND_DIRECT (1U << 3)
-/* is DONTCACHE I/O */
-#define IOMAP_IOEND_DONTCACHE (1U << 4)
/*
* Flags that if set on either ioend prevent the merge of two ioends.
* (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
*/
#define IOMAP_IOEND_NOMERGE_FLAGS \
- (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \
- IOMAP_IOEND_DONTCACHE)
+ (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
/*
* Structure for writeback I/O completions.
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v6 3/4] buffer: add dropbehind writeback support
2026-05-14 21:51 [PATCH v6 0/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
2026-05-14 21:51 ` [PATCH v6 1/4] block: add task-context bio completion infrastructure Tal Zussman
2026-05-14 21:51 ` [PATCH v6 2/4] iomap: use BIO_COMPLETE_IN_TASK for dropbehind writeback Tal Zussman
@ 2026-05-14 21:51 ` Tal Zussman
2026-05-14 21:51 ` [PATCH v6 4/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
3 siblings, 0 replies; 6+ messages in thread
From: Tal Zussman @ 2026-05-14 21:51 UTC (permalink / raw)
To: Jens Axboe, Matthew Wilcox (Oracle), Christian Brauner,
Darrick J. Wong, Carlos Maiolino, Alexander Viro, Jan Kara,
Christoph Hellwig
Cc: Dave Chinner, Bart Van Assche, linux-block, linux-kernel,
linux-xfs, linux-fsdevel, linux-mm, Gao Xiang, Tal Zussman
Add block_write_begin_iocb() which threads the kiocb through to
__filemap_get_folio() so that buffer_head-based I/O can use DONTCACHE
behavior. When the iocb has IOCB_DONTCACHE set, FGP_DONTCACHE is
passed to mark the folio for dropbehind. The existing
block_write_begin() is preserved as a wrapper that passes a NULL iocb.
Set BIO_COMPLETE_IN_TASK in submit_bh_wbc() when the folio has
dropbehind set, so that buffer_head writeback completions get deferred
to task context.
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
fs/buffer.c | 19 +++++++++++++++++--
include/linux/buffer_head.h | 3 +++
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index b0b3792b1496..d0abaf44d782 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2138,14 +2138,19 @@ EXPORT_SYMBOL(block_commit_write);
*
* The filesystem needs to handle block truncation upon failure.
*/
-int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+int block_write_begin_iocb(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos, unsigned len,
struct folio **foliop, get_block_t *get_block)
{
pgoff_t index = pos >> PAGE_SHIFT;
+ fgf_t fgp_flags = FGP_WRITEBEGIN;
struct folio *folio;
int status;
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ if (iocb && iocb->ki_flags & IOCB_DONTCACHE)
+ fgp_flags |= FGP_DONTCACHE;
+
+ folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -2160,6 +2165,13 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
*foliop = folio;
return status;
}
+
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+ struct folio **foliop, get_block_t *get_block)
+{
+ return block_write_begin_iocb(NULL, mapping, pos, len, foliop,
+ get_block);
+}
EXPORT_SYMBOL(block_write_begin);
int block_write_end(loff_t pos, unsigned len, unsigned copied,
@@ -2715,6 +2727,9 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
+ if (folio_test_dropbehind(bh->b_folio))
+ bio_set_flag(bio, BIO_COMPLETE_IN_TASK);
+
if (IS_ENABLED(CONFIG_FS_ENCRYPTION))
buffer_set_crypto_ctx(bio, bh, GFP_NOIO);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e4939e33b4b5..4ce50882d621 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -260,6 +260,9 @@ int block_read_full_folio(struct folio *, get_block_t *);
bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
struct folio **foliop, get_block_t *get_block);
+int block_write_begin_iocb(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos, unsigned len,
+ struct folio **foliop, get_block_t *get_block);
int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
get_block_t *get_block);
int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *);
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v6 4/4] block: enable RWF_DONTCACHE for block devices
2026-05-14 21:51 [PATCH v6 0/4] block: enable RWF_DONTCACHE for block devices Tal Zussman
` (2 preceding siblings ...)
2026-05-14 21:51 ` [PATCH v6 3/4] buffer: add dropbehind writeback support Tal Zussman
@ 2026-05-14 21:51 ` Tal Zussman
3 siblings, 0 replies; 6+ messages in thread
From: Tal Zussman @ 2026-05-14 21:51 UTC (permalink / raw)
To: Jens Axboe, Matthew Wilcox (Oracle), Christian Brauner,
Darrick J. Wong, Carlos Maiolino, Alexander Viro, Jan Kara,
Christoph Hellwig
Cc: Dave Chinner, Bart Van Assche, linux-block, linux-kernel,
linux-xfs, linux-fsdevel, linux-mm, Gao Xiang, Tal Zussman
Block device buffered reads and writes already pass through
filemap_read() and iomap_file_buffered_write() respectively, both of
which handle IOCB_DONTCACHE. Enable RWF_DONTCACHE for block device files
by setting FOP_DONTCACHE in def_blk_fops.
For CONFIG_BUFFER_HEAD=y paths, use block_write_begin_iocb() in
blkdev_write_begin() to thread the kiocb through so that buffer_head
writeback gets dropbehind support.
CONFIG_BUFFER_HEAD=n paths are handled by the previously added iomap
BIO_COMPLETE_IN_TASK support.
This support is useful for databases that operate on raw block devices,
among other userspace applications.
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
block/fops.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index bb6642b45937..31b073181d87 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -504,7 +504,8 @@ static int blkdev_write_begin(const struct kiocb *iocb,
unsigned len, struct folio **foliop,
void **fsdata)
{
- return block_write_begin(mapping, pos, len, foliop, blkdev_get_block);
+ return block_write_begin_iocb(iocb, mapping, pos, len, foliop,
+ blkdev_get_block);
}
static int blkdev_write_end(const struct kiocb *iocb,
@@ -966,7 +967,7 @@ const struct file_operations def_blk_fops = {
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
.uring_cmd = blkdev_uring_cmd,
- .fop_flags = FOP_BUFFER_RASYNC,
+ .fop_flags = FOP_BUFFER_RASYNC | FOP_DONTCACHE,
};
static __init int blkdev_init(void)
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread