* [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
@ 2025-04-09 13:09 Christoph Hellwig
2025-04-09 13:44 ` Ming Lei
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Christoph Hellwig @ 2025-04-09 13:09 UTC (permalink / raw)
To: axboe; +Cc: djwong, linux-block, ming.lei
vfs_iter_{read,write} always perform direct I/O when the file has the
O_DIRECT flag set, which breaks disabling direct I/O using the
LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
This was recenly reported as a regression, but as far as I can tell
was only uncovered by better checking for block sizes and has been
around since the direct I/O support was added.
Fix this by using the existing aio code that calls the raw read/write
iter methods instead. Note that despite the comments there is no need
for block drivers to ever call flush_dcache_page themselves, and the
call is a left-over from prehistoric times.
Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
Reported-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/block/loop.c | 112 +++++++------------------------------------
1 file changed, 17 insertions(+), 95 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 674527d770dc..0e925f1642cc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
}
-static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
-{
- struct iov_iter i;
- ssize_t bw;
-
- iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
-
- bw = vfs_iter_write(file, &i, ppos, 0);
-
- if (likely(bw == bvec->bv_len))
- return 0;
-
- printk_ratelimited(KERN_ERR
- "loop: Write error at byte offset %llu, length %i.\n",
- (unsigned long long)*ppos, bvec->bv_len);
- if (bw >= 0)
- bw = -EIO;
- return bw;
-}
-
-static int lo_write_simple(struct loop_device *lo, struct request *rq,
- loff_t pos)
-{
- struct bio_vec bvec;
- struct req_iterator iter;
- int ret = 0;
-
- rq_for_each_segment(bvec, rq, iter) {
- ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
- if (ret < 0)
- break;
- cond_resched();
- }
-
- return ret;
-}
-
-static int lo_read_simple(struct loop_device *lo, struct request *rq,
- loff_t pos)
-{
- struct bio_vec bvec;
- struct req_iterator iter;
- struct iov_iter i;
- ssize_t len;
-
- rq_for_each_segment(bvec, rq, iter) {
- iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len);
- len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
- if (len < 0)
- return len;
-
- flush_dcache_page(bvec.bv_page);
-
- if (len != bvec.bv_len) {
- struct bio *bio;
-
- __rq_for_each_bio(bio, rq)
- zero_fill_bio(bio);
- break;
- }
- cond_resched();
- }
-
- return 0;
-}
-
static void loop_clear_limits(struct loop_device *lo, int mode)
{
struct queue_limits lim = queue_limits_start_update(lo->lo_queue);
@@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq)
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
blk_status_t ret = BLK_STS_OK;
- if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
+ if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
req_op(rq) != REQ_OP_READ) {
if (cmd->ret < 0)
ret = errno_to_blk_status(cmd->ret);
@@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq)
cmd->ret = 0;
blk_mq_requeue_request(rq, true);
} else {
- if (cmd->use_aio) {
- struct bio *bio = rq->bio;
+ struct bio *bio = rq->bio;
- while (bio) {
- zero_fill_bio(bio);
- bio = bio->bi_next;
- }
+ while (bio) {
+ zero_fill_bio(bio);
+ bio = bio->bi_next;
}
+
ret = BLK_STS_IOERR;
end_io:
blk_mq_end_request(rq, ret);
@@ -445,9 +378,14 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
- cmd->iocb.ki_complete = lo_rw_aio_complete;
- cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ if (cmd->use_aio) {
+ cmd->iocb.ki_complete = lo_rw_aio_complete;
+ cmd->iocb.ki_flags = IOCB_DIRECT;
+ } else {
+ cmd->iocb.ki_complete = NULL;
+ cmd->iocb.ki_flags = 0;
+ }
if (rw == ITER_SOURCE)
ret = file->f_op->write_iter(&cmd->iocb, &iter);
@@ -458,7 +396,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
if (ret != -EIOCBQUEUED)
lo_rw_aio_complete(&cmd->iocb, ret);
- return 0;
+ return -EIOCBQUEUED;
}
static int do_req_filebacked(struct loop_device *lo, struct request *rq)
@@ -466,15 +404,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
- /*
- * lo_write_simple and lo_read_simple should have been covered
- * by io submit style function like lo_rw_aio(), one blocker
- * is that lo_read_simple() need to call flush_dcache_page after
- * the page is written from kernel, and it isn't easy to handle
- * this in io submit style function which submits all segments
- * of the req at one time. And direct read IO doesn't need to
- * run flush_dcache_page().
- */
switch (req_op(rq)) {
case REQ_OP_FLUSH:
return lo_req_flush(lo, rq);
@@ -490,15 +419,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
case REQ_OP_DISCARD:
return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
- if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
- else
- return lo_write_simple(lo, rq, pos);
+ return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
case REQ_OP_READ:
- if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, ITER_DEST);
- else
- return lo_read_simple(lo, rq, pos);
+ return lo_rw_aio(lo, cmd, pos, ITER_DEST);
default:
WARN_ON_ONCE(1);
return -EIO;
@@ -1921,7 +1844,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
struct mem_cgroup *old_memcg = NULL;
- const bool use_aio = cmd->use_aio;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
@@ -1951,7 +1873,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
failed:
/* complete non-aio request */
- if (!use_aio || ret) {
+ if (ret != -EIOCBQUEUED) {
if (ret == -EOPNOTSUPP)
cmd->ret = ret;
else
--
2.47.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-09 13:09 [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O Christoph Hellwig
@ 2025-04-09 13:44 ` Ming Lei
2025-04-10 7:34 ` Christoph Hellwig
2025-04-15 3:41 ` Ming Lei
2025-04-16 0:59 ` Jens Axboe
2 siblings, 1 reply; 8+ messages in thread
From: Ming Lei @ 2025-04-09 13:44 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, djwong, linux-block
On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> vfs_iter_{read,write} always perform direct I/O when the file has the
> O_DIRECT flag set, which breaks disabling direct I/O using the
> LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
So dio is disabled automatically because lo_offset is changed in
LOOP_SET_STATUS, but backing file is still opened with O_DIRECT,
then dio fails?
But Darrick reports it is caused by changing sector size, instead of
LOOP_SET_STATUS.
>
> This was recenly reported as a regression, but as far as I can tell
> was only uncovered by better checking for block sizes and has been
> around since the direct I/O support was added.
What is the 1st real bad commit for this regression? I think it is useful
for backporting. Or it is new test case?
>
> Fix this by using the existing aio code that calls the raw read/write
> iter methods instead. Note that despite the comments there is no need
> for block drivers to ever call flush_dcache_page themselves, and the
> call is a left-over from prehistoric times.
>
> Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
Why is the issue related with ioctl(LOOP_SET_DIRECT_IO)?
Thanks,
Ming
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-09 13:44 ` Ming Lei
@ 2025-04-10 7:34 ` Christoph Hellwig
2025-04-10 22:07 ` Darrick J. Wong
2025-04-15 3:33 ` Ming Lei
0 siblings, 2 replies; 8+ messages in thread
From: Christoph Hellwig @ 2025-04-10 7:34 UTC (permalink / raw)
To: Ming Lei; +Cc: Christoph Hellwig, axboe, djwong, linux-block
On Wed, Apr 09, 2025 at 09:44:41PM +0800, Ming Lei wrote:
> On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> > vfs_iter_{read,write} always perform direct I/O when the file has the
> > O_DIRECT flag set, which breaks disabling direct I/O using the
> > LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
>
> So dio is disabled automatically because lo_offset is changed in
> LOOP_SET_STATUS, but backing file is still opened with O_DIRECT,
> then dio fails?
>
> But Darrick reports it is caused by changing sector size, instead of
> LOOP_SET_STATUS.
LOOP_SET_STATUS changes the direct I/O flag.
This is the minimal reproducer, dev needs to be a 4k lba size device:
dev=/dev/nvme0n1
mkfs.xfs -f $dev
mount $dev /mnt
truncate -s 30g /mnt/a
losetup --direct-io=on -f --show /mnt/a
losetup --direct-io=off /dev/loop0
losetup --sector-size 2048 /dev/loop0
mkfs.xfs /dev/loop0
mkfs then fails with an I/O error.
(I plan to wire up something like this for blktests)
> > This was recenly reported as a regression, but as far as I can tell
> > was only uncovered by better checking for block sizes and has been
> > around since the direct I/O support was added.
>
> What is the 1st real bad commit for this regression? I think it is useful
> for backporting. Or it is new test case?
Not entirely sure, maybe Darrick can fill in.
>
> >
> > Fix this by using the existing aio code that calls the raw read/write
> > iter methods instead. Note that despite the comments there is no need
> > for block drivers to ever call flush_dcache_page themselves, and the
> > call is a left-over from prehistoric times.
> >
> > Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
>
> Why is the issue related with ioctl(LOOP_SET_DIRECT_IO)?
>
>
> Thanks,
> Ming
---end quoted text---
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-10 7:34 ` Christoph Hellwig
@ 2025-04-10 22:07 ` Darrick J. Wong
2025-04-15 3:33 ` Ming Lei
1 sibling, 0 replies; 8+ messages in thread
From: Darrick J. Wong @ 2025-04-10 22:07 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, axboe, linux-block
On Thu, Apr 10, 2025 at 09:34:39AM +0200, Christoph Hellwig wrote:
> On Wed, Apr 09, 2025 at 09:44:41PM +0800, Ming Lei wrote:
> > On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> > > vfs_iter_{read,write} always perform direct I/O when the file has the
> > > O_DIRECT flag set, which breaks disabling direct I/O using the
> > > LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
> >
> > So dio is disabled automatically because lo_offset is changed in
> > LOOP_SET_STATUS, but backing file is still opened with O_DIRECT,
> > then dio fails?
> >
> > But Darrick reports it is caused by changing sector size, instead of
> > LOOP_SET_STATUS.
>
> LOOP_SET_STATUS changes the direct I/O flag.
>
> This is the minimal reproducer, dev needs to be a 4k lba size device:
>
> dev=/dev/nvme0n1
>
> mkfs.xfs -f $dev
> mount $dev /mnt
>
> truncate -s 30g /mnt/a
> losetup --direct-io=on -f --show /mnt/a
> losetup --direct-io=off /dev/loop0
> losetup --sector-size 2048 /dev/loop0
> mkfs.xfs /dev/loop0
>
> mkfs then fails with an I/O error.
>
> (I plan to wire up something like this for blktests)
Please cc me so I can pick through the test. :)
> > > This was recenly reported as a regression, but as far as I can tell
> > > was only uncovered by better checking for block sizes and has been
> > > around since the direct I/O support was added.
> >
> > What is the 1st real bad commit for this regression? I think it is useful
> > for backporting. Or it is new test case?
>
> Not entirely sure, maybe Darrick can fill in.
It was a surprisingly hard struggle to get losetup on RHEL8 and 9 to
cooperate. It doesn't look like 5.4 or 5.15 are affected. Regrettably
it's going to be harder to test 6.1 and 6.6 because I don't have kernels
at the ready, and grubby is a PITA to deal with.
--D
> >
> > >
> > > Fix this by using the existing aio code that calls the raw read/write
> > > iter methods instead. Note that despite the comments there is no need
> > > for block drivers to ever call flush_dcache_page themselves, and the
> > > call is a left-over from prehistoric times.
> > >
> > > Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
> >
> > Why is the issue related with ioctl(LOOP_SET_DIRECT_IO)?
> >
> >
> > Thanks,
> > Ming
> ---end quoted text---
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-10 7:34 ` Christoph Hellwig
2025-04-10 22:07 ` Darrick J. Wong
@ 2025-04-15 3:33 ` Ming Lei
1 sibling, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-04-15 3:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, djwong, linux-block
On Thu, Apr 10, 2025 at 09:34:39AM +0200, Christoph Hellwig wrote:
> On Wed, Apr 09, 2025 at 09:44:41PM +0800, Ming Lei wrote:
> > On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> > > vfs_iter_{read,write} always perform direct I/O when the file has the
> > > O_DIRECT flag set, which breaks disabling direct I/O using the
> > > LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
> >
> > So dio is disabled automatically because lo_offset is changed in
> > LOOP_SET_STATUS, but backing file is still opened with O_DIRECT,
> > then dio fails?
> >
> > But Darrick reports it is caused by changing sector size, instead of
> > LOOP_SET_STATUS.
>
> LOOP_SET_STATUS changes the direct I/O flag.
It isn't true in the following test case.
>
> This is the minimal reproducer, dev needs to be a 4k lba size device:
>
> dev=/dev/nvme0n1
>
> mkfs.xfs -f $dev
> mount $dev /mnt
>
> truncate -s 30g /mnt/a
> losetup --direct-io=on -f --show /mnt/a
> losetup --direct-io=off /dev/loop0
direct I/O flag is changed by LOOP_SET_DIRECT_IO instead of LOOP_SET_STATUS.
losetup forgets the backfile is opened as O_DIRECT, and util-linux
need fix too, such as call F_GETFL/F_SETFL to clear O_DIRECT.
I guess it is hard to figure out one simple kernel fix to clear IOCB_DIRECT of
file->f_iocb_flags for backporting.
Thanks,
Ming
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-09 13:09 [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O Christoph Hellwig
2025-04-09 13:44 ` Ming Lei
@ 2025-04-15 3:41 ` Ming Lei
2025-04-16 0:47 ` Darrick J. Wong
2025-04-16 0:59 ` Jens Axboe
2 siblings, 1 reply; 8+ messages in thread
From: Ming Lei @ 2025-04-15 3:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, djwong, linux-block
On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> vfs_iter_{read,write} always perform direct I/O when the file has the
> O_DIRECT flag set, which breaks disabling direct I/O using the
> LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
>
> This was recenly reported as a regression, but as far as I can tell
> was only uncovered by better checking for block sizes and has been
> around since the direct I/O support was added.
>
> Fix this by using the existing aio code that calls the raw read/write
> iter methods instead. Note that despite the comments there is no need
> for block drivers to ever call flush_dcache_page themselves, and the
> call is a left-over from prehistoric times.
>
> Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
> Reported-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Looks fine,
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-15 3:41 ` Ming Lei
@ 2025-04-16 0:47 ` Darrick J. Wong
0 siblings, 0 replies; 8+ messages in thread
From: Darrick J. Wong @ 2025-04-16 0:47 UTC (permalink / raw)
To: Ming Lei; +Cc: Christoph Hellwig, axboe, linux-block
On Tue, Apr 15, 2025 at 11:41:00AM +0800, Ming Lei wrote:
> On Wed, Apr 09, 2025 at 03:09:40PM +0200, Christoph Hellwig wrote:
> > vfs_iter_{read,write} always perform direct I/O when the file has the
> > O_DIRECT flag set, which breaks disabling direct I/O using the
> > LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
> >
> > This was recenly reported as a regression, but as far as I can tell
> > was only uncovered by better checking for block sizes and has been
> > around since the direct I/O support was added.
> >
> > Fix this by using the existing aio code that calls the raw read/write
> > iter methods instead. Note that despite the comments there is no need
> > for block drivers to ever call flush_dcache_page themselves, and the
> > call is a left-over from prehistoric times.
> >
> > Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
> > Reported-by: Darrick J. Wong <djwong@kernel.org>
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> Looks fine,
>
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
This seems to resolve the problem, thank you!
Tested-by: "Darrick J. Wong" <djwong@kernel.org>
--D
>
>
> Thanks,
> Ming
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O
2025-04-09 13:09 [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O Christoph Hellwig
2025-04-09 13:44 ` Ming Lei
2025-04-15 3:41 ` Ming Lei
@ 2025-04-16 0:59 ` Jens Axboe
2 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2025-04-16 0:59 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: djwong, linux-block, ming.lei
On Wed, 09 Apr 2025 15:09:40 +0200, Christoph Hellwig wrote:
> vfs_iter_{read,write} always perform direct I/O when the file has the
> O_DIRECT flag set, which breaks disabling direct I/O using the
> LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.
>
> This was recenly reported as a regression, but as far as I can tell
> was only uncovered by better checking for block sizes and has been
> around since the direct I/O support was added.
>
> [...]
Applied, thanks!
[1/1] loop: stop using vfs_iter_{read,write} for buffered I/O
commit: f2fed441c69b9237760840a45a004730ff324faf
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-04-16 0:59 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-09 13:09 [PATCH] loop: stop using vfs_iter_{read,write} for buffered I/O Christoph Hellwig
2025-04-09 13:44 ` Ming Lei
2025-04-10 7:34 ` Christoph Hellwig
2025-04-10 22:07 ` Darrick J. Wong
2025-04-15 3:33 ` Ming Lei
2025-04-15 3:41 ` Ming Lei
2025-04-16 0:47 ` Darrick J. Wong
2025-04-16 0:59 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).