* [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size
[not found] <20240925113641.1297102-1-sashal@kernel.org>
@ 2024-09-25 11:27 ` Sasha Levin
2024-09-25 13:01 ` Dave Chinner
2024-09-25 11:27 ` [PATCH AUTOSEL 6.11 242/244] iomap: handle a post-direct I/O invalidate race in iomap_write_delalloc_release Sasha Levin
1 sibling, 1 reply; 5+ messages in thread
From: Sasha Levin @ 2024-09-25 11:27 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Pankaj Raghav, Hannes Reinecke, Darrick J . Wong, Dave Chinner,
Daniel Gomez, Christian Brauner, Sasha Levin, linux-xfs,
linux-fsdevel
From: Pankaj Raghav <p.raghav@samsung.com>
[ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
< fs block size. iomap_dio_zero() has an implicit assumption that fs block
size < page_size. This is true for most filesystems at the moment.
If the block size > page size, this will send the contents of the page
next to zero page(as len > PAGE_SIZE) to the underlying block device,
causing FS corruption.
iomap is a generic infrastructure and it should not make any assumptions
about the fs block size and the page size of the system.
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240822135018.1931258-7-kernel@pankajraghav.com
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/iomap/buffered-io.c | 4 ++--
fs/iomap/direct-io.c | 45 ++++++++++++++++++++++++++++++++++++------
2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f420c53d86acc..d745f718bcde8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -2007,10 +2007,10 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
}
EXPORT_SYMBOL_GPL(iomap_writepages);
-static int __init iomap_init(void)
+static int __init iomap_buffered_init(void)
{
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
offsetof(struct iomap_ioend, io_bio),
BIOSET_NEED_BVECS);
}
-fs_initcall(iomap_init);
+fs_initcall(iomap_buffered_init);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index f3b43d223a46e..c02b266bba525 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -11,6 +11,7 @@
#include <linux/iomap.h>
#include <linux/backing-dev.h>
#include <linux/uio.h>
+#include <linux/set_memory.h>
#include <linux/task_io_accounting_ops.h>
#include "trace.h"
@@ -27,6 +28,13 @@
#define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_DIRTY (1U << 31)
+/*
+ * Used for sub block zeroing in iomap_dio_zero()
+ */
+#define IOMAP_ZERO_PAGE_SIZE (SZ_64K)
+#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE))
+static struct page *zero_page;
+
struct iomap_dio {
struct kiocb *iocb;
const struct iomap_dio_ops *dops;
@@ -232,13 +240,20 @@ void iomap_dio_bio_end_io(struct bio *bio)
}
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
-static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
+static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
loff_t pos, unsigned len)
{
struct inode *inode = file_inode(dio->iocb->ki_filp);
- struct page *page = ZERO_PAGE(0);
struct bio *bio;
+ if (!len)
+ return 0;
+ /*
+ * Max block size supported is 64k
+ */
+ if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE))
+ return -EINVAL;
+
bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
@@ -246,8 +261,9 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- __bio_add_page(bio, page, len, 0);
+ __bio_add_page(bio, zero_page, len, 0);
iomap_dio_submit_bio(iter, dio, bio, pos);
+ return 0;
}
/*
@@ -356,8 +372,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
if (need_zeroout) {
/* zero out from the start of the block to the write offset */
pad = pos & (fs_block_size - 1);
- if (pad)
- iomap_dio_zero(iter, dio, pos - pad, pad);
+
+ ret = iomap_dio_zero(iter, dio, pos - pad, pad);
+ if (ret)
+ goto out;
}
/*
@@ -431,7 +449,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
- iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
+ ret = iomap_dio_zero(iter, dio, pos,
+ fs_block_size - pad);
}
out:
/* Undo iter limitation to current extent */
@@ -753,3 +772,17 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);
+
+static int __init iomap_dio_init(void)
+{
+ zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ IOMAP_ZERO_PAGE_ORDER);
+
+ if (!zero_page)
+ return -ENOMEM;
+
+ set_memory_ro((unsigned long)page_address(zero_page),
+ 1U << IOMAP_ZERO_PAGE_ORDER);
+ return 0;
+}
+fs_initcall(iomap_dio_init);
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH AUTOSEL 6.11 242/244] iomap: handle a post-direct I/O invalidate race in iomap_write_delalloc_release
[not found] <20240925113641.1297102-1-sashal@kernel.org>
2024-09-25 11:27 ` [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size Sasha Levin
@ 2024-09-25 11:27 ` Sasha Levin
1 sibling, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2024-09-25 11:27 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Christoph Hellwig, Darrick J . Wong, Christian Brauner,
Sasha Levin, linux-xfs, linux-fsdevel
From: Christoph Hellwig <hch@lst.de>
[ Upstream commit 7a9d43eace888a0ee6095035997bb138425844d3 ]
When direct I/O completions invalidates the page cache it holds neither the
i_rwsem nor the invalidate_lock so it can be racing with
iomap_write_delalloc_release. If the search for the end of the region that
contains data returns the start offset we hit such a race and just need to
look for the end of the newly created hole instead.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240910043949.3481298-2-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/iomap/buffered-io.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d745f718bcde8..ca928cc9be49a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1241,7 +1241,15 @@ static int iomap_write_delalloc_release(struct inode *inode,
error = data_end;
goto out_unlock;
}
- WARN_ON_ONCE(data_end <= start_byte);
+
+ /*
+ * If we race with post-direct I/O invalidation of the page cache,
+ * there might be no data left at start_byte.
+ */
+ if (data_end == start_byte)
+ continue;
+
+ WARN_ON_ONCE(data_end < start_byte);
WARN_ON_ONCE(data_end > scan_end_byte);
error = iomap_write_delalloc_scan(inode, &punch_start_byte,
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size
2024-09-25 11:27 ` [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size Sasha Levin
@ 2024-09-25 13:01 ` Dave Chinner
2024-09-26 7:58 ` Pankaj Raghav (Samsung)
2024-10-06 0:30 ` Sasha Levin
0 siblings, 2 replies; 5+ messages in thread
From: Dave Chinner @ 2024-09-25 13:01 UTC (permalink / raw)
To: Sasha Levin
Cc: linux-kernel, stable, Pankaj Raghav, Hannes Reinecke,
Darrick J . Wong, Dave Chinner, Daniel Gomez, Christian Brauner,
linux-xfs, linux-fsdevel
On Wed, Sep 25, 2024 at 07:27:38AM -0400, Sasha Levin wrote:
> From: Pankaj Raghav <p.raghav@samsung.com>
>
> [ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
>
> iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
> < fs block size. iomap_dio_zero() has an implicit assumption that fs block
> size < page_size. This is true for most filesystems at the moment.
>
> If the block size > page size, this will send the contents of the page
> next to zero page(as len > PAGE_SIZE) to the underlying block device,
> causing FS corruption.
>
> iomap is a generic infrastructure and it should not make any assumptions
> about the fs block size and the page size of the system.
Please drop this. It is for support of new functionality that was
just merged and has no relevance to older kernels. It is not a bug
fix.
And ....
> +
> + set_memory_ro((unsigned long)page_address(zero_page),
> + 1U << IOMAP_ZERO_PAGE_ORDER);
.... this will cause stable kernel regressions.
It was removed later in the merge because it is unnecessary and
causes boot failures on (at least) some Power architectures.
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Re: [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size
2024-09-25 13:01 ` Dave Chinner
@ 2024-09-26 7:58 ` Pankaj Raghav (Samsung)
2024-10-06 0:30 ` Sasha Levin
1 sibling, 0 replies; 5+ messages in thread
From: Pankaj Raghav (Samsung) @ 2024-09-26 7:58 UTC (permalink / raw)
To: Dave Chinner
Cc: Sasha Levin, linux-kernel, stable, Pankaj Raghav, Hannes Reinecke,
Darrick J . Wong, Dave Chinner, Daniel Gomez, Christian Brauner,
linux-xfs, linux-fsdevel
On Wed, Sep 25, 2024 at 11:01:46PM +1000, Dave Chinner wrote:
> On Wed, Sep 25, 2024 at 07:27:38AM -0400, Sasha Levin wrote:
> > From: Pankaj Raghav <p.raghav@samsung.com>
> >
> > [ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
> >
> > iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
> > < fs block size. iomap_dio_zero() has an implicit assumption that fs block
> > size < page_size. This is true for most filesystems at the moment.
> >
> > If the block size > page size, this will send the contents of the page
> > next to zero page(as len > PAGE_SIZE) to the underlying block device,
> > causing FS corruption.
> >
> > iomap is a generic infrastructure and it should not make any assumptions
> > about the fs block size and the page size of the system.
>
> Please drop this. It is for support of new functionality that was
> just merged and has no relevance to older kernels. It is not a bug
> fix.
>
I did not have any fixes by tag for this reason. So please drop this commit
from the queue.
> And ....
>
> > +
> > + set_memory_ro((unsigned long)page_address(zero_page),
> > + 1U << IOMAP_ZERO_PAGE_ORDER);
>
> .... this will cause stable kernel regressions.
>
> It was removed later in the merge because it is unnecessary and
> causes boot failures on (at least) some Power architectures.
>
> -Dave.
> --
> Dave Chinner
> david@fromorbit.com
--
Pankaj Raghav
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size
2024-09-25 13:01 ` Dave Chinner
2024-09-26 7:58 ` Pankaj Raghav (Samsung)
@ 2024-10-06 0:30 ` Sasha Levin
1 sibling, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2024-10-06 0:30 UTC (permalink / raw)
To: Dave Chinner
Cc: linux-kernel, stable, Pankaj Raghav, Hannes Reinecke,
Darrick J . Wong, Dave Chinner, Daniel Gomez, Christian Brauner,
linux-xfs, linux-fsdevel
On Wed, Sep 25, 2024 at 11:01:46PM +1000, Dave Chinner wrote:
>On Wed, Sep 25, 2024 at 07:27:38AM -0400, Sasha Levin wrote:
>> From: Pankaj Raghav <p.raghav@samsung.com>
>>
>> [ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
>>
>> iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
>> < fs block size. iomap_dio_zero() has an implicit assumption that fs block
>> size < page_size. This is true for most filesystems at the moment.
>>
>> If the block size > page size, this will send the contents of the page
>> next to zero page(as len > PAGE_SIZE) to the underlying block device,
>> causing FS corruption.
>>
>> iomap is a generic infrastructure and it should not make any assumptions
>> about the fs block size and the page size of the system.
>
>Please drop this. It is for support of new functionality that was
>just merged and has no relevance to older kernels. It is not a bug
>fix.
>
>And ....
>
>> +
>> + set_memory_ro((unsigned long)page_address(zero_page),
>> + 1U << IOMAP_ZERO_PAGE_ORDER);
>
>.... this will cause stable kernel regressions.
>
>It was removed later in the merge because it is unnecessary and
>causes boot failures on (at least) some Power architectures.
Dropped, and sorry about sending this one out twice!
--
Thanks,
Sasha
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-10-06 0:30 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20240925113641.1297102-1-sashal@kernel.org>
2024-09-25 11:27 ` [PATCH AUTOSEL 6.11 237/244] iomap: fix iomap_dio_zero() for fs bs > system page size Sasha Levin
2024-09-25 13:01 ` Dave Chinner
2024-09-26 7:58 ` Pankaj Raghav (Samsung)
2024-10-06 0:30 ` Sasha Levin
2024-09-25 11:27 ` [PATCH AUTOSEL 6.11 242/244] iomap: handle a post-direct I/O invalidate race in iomap_write_delalloc_release Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox