* [PATCH 1/8] iomap: Pass struct iomap to iomap_alloc_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 2/8] iomap: Remove iomap_writepage_ctx from iomap_can_add_to_ioend() Matthew Wilcox (Oracle)
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
iomap_alloc_ioend() does not need the rest of iomap_writepage_ctx;
only the iomap contained in it.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c1c8cd41ea81..3bf65daf55fc 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1190,15 +1190,15 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
return 0;
}
-static struct iomap_ioend *
-iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
- loff_t offset, sector_t sector, struct writeback_control *wbc)
+static struct iomap_ioend *iomap_alloc_ioend(struct inode *inode,
+ struct iomap *iomap, loff_t offset, sector_t sector,
+ struct writeback_control *wbc)
{
struct iomap_ioend *ioend;
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
- bio_set_dev(bio, wpc->iomap.bdev);
+ bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
bio->bi_write_hint = inode->i_write_hint;
@@ -1206,8 +1206,8 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
- ioend->io_type = wpc->iomap.type;
- ioend->io_flags = wpc->iomap.flags;
+ ioend->io_type = iomap->type;
+ ioend->io_flags = iomap->flags;
ioend->io_inode = inode;
ioend->io_size = 0;
ioend->io_offset = offset;
@@ -1264,14 +1264,16 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct list_head *iolist)
{
- sector_t sector = iomap_sector(&wpc->iomap, offset);
+ struct iomap *iomap = &wpc->iomap;
+ sector_t sector = iomap_sector(iomap, offset);
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
if (wpc->ioend)
list_add(&wpc->ioend->io_list, iolist);
- wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc);
+ wpc->ioend = iomap_alloc_ioend(inode, iomap, offset, sector,
+ wbc);
}
if (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len) {
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/8] iomap: Remove iomap_writepage_ctx from iomap_can_add_to_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 1/8] iomap: Pass struct iomap to iomap_alloc_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 3/8] iomap: Do not pass iomap_writepage_ctx to iomap_add_to_ioend() Matthew Wilcox (Oracle)
` (5 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
In preparation for using this function without an iomap_writepage_ctx,
pass in the iomap and ioend. Also simplify iomap_add_to_ioend() by
using the iomap & ioend directly.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 3bf65daf55fc..5ff3369718a1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1239,18 +1239,17 @@ iomap_chain_bio(struct bio *prev)
return new;
}
-static bool
-iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
- sector_t sector)
+static bool iomap_can_add_to_ioend(struct iomap *iomap,
+ struct iomap_ioend *ioend, loff_t offset, sector_t sector)
{
- if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
- (wpc->ioend->io_flags & IOMAP_F_SHARED))
+ if ((iomap->flags & IOMAP_F_SHARED) !=
+ (ioend->io_flags & IOMAP_F_SHARED))
return false;
- if (wpc->iomap.type != wpc->ioend->io_type)
+ if (iomap->type != ioend->io_type)
return false;
- if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
+ if (offset != ioend->io_offset + ioend->io_size)
return false;
- if (sector != bio_end_sector(wpc->ioend->io_bio))
+ if (sector != bio_end_sector(ioend->io_bio))
return false;
return true;
}
@@ -1265,25 +1264,26 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
struct writeback_control *wbc, struct list_head *iolist)
{
struct iomap *iomap = &wpc->iomap;
+ struct iomap_ioend *ioend = wpc->ioend;
sector_t sector = iomap_sector(iomap, offset);
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
- if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
- if (wpc->ioend)
- list_add(&wpc->ioend->io_list, iolist);
- wpc->ioend = iomap_alloc_ioend(inode, iomap, offset, sector,
- wbc);
+ if (!ioend || !iomap_can_add_to_ioend(iomap, ioend, offset, sector)) {
+ if (ioend)
+ list_add(&ioend->io_list, iolist);
+ ioend = iomap_alloc_ioend(inode, iomap, offset, sector, wbc);
+ wpc->ioend = ioend;
}
- if (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len) {
- wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio);
- __bio_add_page(wpc->ioend->io_bio, page, len, poff);
+ if (bio_add_page(ioend->io_bio, page, len, poff) != len) {
+ ioend->io_bio = iomap_chain_bio(ioend->io_bio);
+ __bio_add_page(ioend->io_bio, page, len, poff);
}
if (iop)
atomic_add(len, &iop->write_bytes_pending);
- wpc->ioend->io_size += len;
+ ioend->io_size += len;
wbc_account_cgroup_owner(wbc, page, len);
}
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/8] iomap: Do not pass iomap_writepage_ctx to iomap_add_to_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 1/8] iomap: Pass struct iomap to iomap_alloc_ioend() Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 2/8] iomap: Remove iomap_writepage_ctx from iomap_can_add_to_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 4/8] iomap: Accept a NULL iomap_writepage_ctx in iomap_submit_ioend() Matthew Wilcox (Oracle)
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
In preparation for calling iomap_add_to_ioend() without a writepage_ctx
available, pass in the iomap and the (current) ioend, and return the
current ioend.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 5ff3369718a1..d92943332c6c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1258,22 +1258,19 @@ static bool iomap_can_add_to_ioend(struct iomap *iomap,
* Test to see if we have an existing ioend structure that we could append to
* first; otherwise finish off the current ioend and start another.
*/
-static void
-iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
- struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
+static struct iomap_ioend *iomap_add_to_ioend(struct inode *inode,
+ loff_t pos, struct page *page, struct iomap_page *iop,
+ struct iomap *iomap, struct iomap_ioend *ioend,
struct writeback_control *wbc, struct list_head *iolist)
{
- struct iomap *iomap = &wpc->iomap;
- struct iomap_ioend *ioend = wpc->ioend;
- sector_t sector = iomap_sector(iomap, offset);
+ sector_t sector = iomap_sector(iomap, pos);
unsigned len = i_blocksize(inode);
- unsigned poff = offset & (PAGE_SIZE - 1);
+ unsigned poff = offset_in_page(pos);
- if (!ioend || !iomap_can_add_to_ioend(iomap, ioend, offset, sector)) {
+ if (!ioend || !iomap_can_add_to_ioend(iomap, ioend, pos, sector)) {
if (ioend)
list_add(&ioend->io_list, iolist);
- ioend = iomap_alloc_ioend(inode, iomap, offset, sector, wbc);
- wpc->ioend = ioend;
+ ioend = iomap_alloc_ioend(inode, iomap, pos, sector, wbc);
}
if (bio_add_page(ioend->io_bio, page, len, poff) != len) {
@@ -1285,6 +1282,7 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
atomic_add(len, &iop->write_bytes_pending);
ioend->io_size += len;
wbc_account_cgroup_owner(wbc, page, len);
+ return ioend;
}
/*
@@ -1335,8 +1333,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
continue;
if (wpc->iomap.type == IOMAP_HOLE)
continue;
- iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
- &submit_list);
+ wpc->ioend = iomap_add_to_ioend(inode, file_offset, page, iop,
+ &wpc->iomap, wpc->ioend, wbc, &submit_list);
count++;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 4/8] iomap: Accept a NULL iomap_writepage_ctx in iomap_submit_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2021-08-11 2:46 ` [PATCH 3/8] iomap: Do not pass iomap_writepage_ctx to iomap_add_to_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 5/8] iomap: Pass iomap_write_ctx to iomap_write_actor() Matthew Wilcox (Oracle)
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
Prepare for I/O without an iomap_writepage_ctx() by accepting a NULL
wpc.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d92943332c6c..ad9d8fe97f2e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1172,7 +1172,7 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
- if (wpc->ops->prepare_ioend)
+ if (wpc && wpc->ops->prepare_ioend)
error = wpc->ops->prepare_ioend(ioend, error);
if (error) {
/*
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 5/8] iomap: Pass iomap_write_ctx to iomap_write_actor()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
` (3 preceding siblings ...)
2021-08-11 2:46 ` [PATCH 4/8] iomap: Accept a NULL iomap_writepage_ctx in iomap_submit_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 6/8] iomap: Allow a NULL writeback_control argument to iomap_alloc_ioend() Matthew Wilcox (Oracle)
` (2 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
We need to pass a little more information to iomap_write_actor(),
so define our own little struct for it.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ad9d8fe97f2e..a74da66e64a7 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -514,6 +514,13 @@ enum {
IOMAP_WRITE_F_UNSHARE = (1 << 0),
};
+struct iomap_write_ctx {
+ struct iov_iter *iter;
+ struct iomap_ioend *ioend;
+ struct list_head iolist;
+ bool write_through;
+};
+
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
@@ -734,7 +741,8 @@ static loff_t
iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap, struct iomap *srcmap)
{
- struct iov_iter *i = data;
+ struct iomap_write_ctx *iwc = data;
+ struct iov_iter *i = iwc->iter;
long status = 0;
ssize_t written = 0;
@@ -804,12 +812,17 @@ ssize_t
iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops)
{
+ struct iomap_write_ctx iwc = {
+ .iter = iter,
+ .iolist = LIST_HEAD_INIT(iwc.iolist),
+ .write_through = iocb->ki_flags & IOCB_SYNC,
+ };
struct inode *inode = iocb->ki_filp->f_mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, written = 0;
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter),
- IOMAP_WRITE, ops, iter, iomap_write_actor);
+ IOMAP_WRITE, ops, &iwc, iomap_write_actor);
if (ret <= 0)
break;
pos += ret;
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 6/8] iomap: Allow a NULL writeback_control argument to iomap_alloc_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
` (4 preceding siblings ...)
2021-08-11 2:46 ` [PATCH 5/8] iomap: Pass iomap_write_ctx to iomap_write_actor() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 7/8] iomap: Pass a length to iomap_add_to_ioend() Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 8/8] iomap: Add writethrough for O_SYNC Matthew Wilcox (Oracle)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
When we're doing writethrough, we don't have a writeback_control to
pass in.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index a74da66e64a7..2b89c43aedd7 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1213,9 +1213,13 @@ static struct iomap_ioend *iomap_alloc_ioend(struct inode *inode,
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector = sector;
- bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ bio->bi_opf = REQ_OP_WRITE;
bio->bi_write_hint = inode->i_write_hint;
- wbc_init_bio(wbc, bio);
+
+ if (wbc) {
+ bio->bi_opf |= wbc_to_write_flags(wbc);
+ wbc_init_bio(wbc, bio);
+ }
ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 7/8] iomap: Pass a length to iomap_add_to_ioend()
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
` (5 preceding siblings ...)
2021-08-11 2:46 ` [PATCH 6/8] iomap: Allow a NULL writeback_control argument to iomap_alloc_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-11 2:46 ` [PATCH 8/8] iomap: Add writethrough for O_SYNC Matthew Wilcox (Oracle)
7 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
Allow the caller to specify how much of the page to add to the ioend
instead of assuming a single sector. Somebody should probably enhance
iomap_writepage_map() to make one call per extent instead of one per
block.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 2b89c43aedd7..eb068e21d3bb 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1276,12 +1276,12 @@ static bool iomap_can_add_to_ioend(struct iomap *iomap,
* first; otherwise finish off the current ioend and start another.
*/
static struct iomap_ioend *iomap_add_to_ioend(struct inode *inode,
- loff_t pos, struct page *page, struct iomap_page *iop,
- struct iomap *iomap, struct iomap_ioend *ioend,
- struct writeback_control *wbc, struct list_head *iolist)
+ loff_t pos, size_t len, struct page *page,
+ struct iomap_page *iop, struct iomap *iomap,
+ struct iomap_ioend *ioend, struct writeback_control *wbc,
+ struct list_head *iolist)
{
sector_t sector = iomap_sector(iomap, pos);
- unsigned len = i_blocksize(inode);
unsigned poff = offset_in_page(pos);
if (!ioend || !iomap_can_add_to_ioend(iomap, ioend, pos, sector)) {
@@ -1350,8 +1350,9 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
continue;
if (wpc->iomap.type == IOMAP_HOLE)
continue;
- wpc->ioend = iomap_add_to_ioend(inode, file_offset, page, iop,
- &wpc->iomap, wpc->ioend, wbc, &submit_list);
+ wpc->ioend = iomap_add_to_ioend(inode, file_offset, len, page,
+ iop, &wpc->iomap, wpc->ioend, wbc,
+ &submit_list);
count++;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 8/8] iomap: Add writethrough for O_SYNC
2021-08-11 2:46 [PATCH 0/8] iomap writethrough for O_SYNC writes Matthew Wilcox (Oracle)
` (6 preceding siblings ...)
2021-08-11 2:46 ` [PATCH 7/8] iomap: Pass a length to iomap_add_to_ioend() Matthew Wilcox (Oracle)
@ 2021-08-11 2:46 ` Matthew Wilcox (Oracle)
2021-08-12 13:16 ` Christoph Hellwig
7 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-08-11 2:46 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel; +Cc: Matthew Wilcox (Oracle)
For O_SYNC writes, if the filesystem has already allocated blocks for
the range, we can avoid marking the page as dirty and skip straight to
marking the page as writeback.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 78 +++++++++++++++++++++++++++++++++++-------
1 file changed, 66 insertions(+), 12 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index eb068e21d3bb..93b889338172 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -657,8 +657,45 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return status;
}
-static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
- size_t copied, struct page *page)
+/* Rearrange file so we don't need this forward declaration */
+static struct iomap_ioend *iomap_add_to_ioend(struct inode *inode,
+ loff_t pos, size_t len, struct page *page,
+ struct iomap_page *iop, struct iomap *iomap,
+ struct iomap_ioend *ioend, struct writeback_control *wbc,
+ struct list_head *iolist);
+
+/* Returns true if we can skip dirtying the page */
+static bool iomap_write_through(struct iomap_write_ctx *iwc,
+ struct iomap *iomap, struct inode *inode, struct page *page,
+ loff_t pos, size_t len)
+{
+ unsigned int blksize = i_blocksize(inode);
+
+ if (!iwc || !iwc->write_through)
+ return false;
+ if (PageDirty(page))
+ return true;
+ if (PageWriteback(page))
+ return false;
+
+ /* Can't allocate blocks here because we don't have ->prepare_ioend */
+ if (iomap->type != IOMAP_MAPPED || iomap->type != IOMAP_UNWRITTEN ||
+ iomap->flags & IOMAP_F_SHARED)
+ return false;
+
+ len = round_up(pos + len - 1, blksize);
+ pos = round_down(pos, blksize);
+ len -= pos;
+ iwc->ioend = iomap_add_to_ioend(inode, pos, len, page,
+ iomap_page_create(inode, page), iomap, iwc->ioend, NULL,
+ &iwc->iolist);
+ set_page_writeback(page);
+ return true;
+}
+
+static size_t __iomap_write_end(struct iomap_write_ctx *iwc,
+ struct iomap *iomap, struct inode *inode, loff_t pos,
+ size_t len, size_t copied, struct page *page)
{
flush_dcache_page(page);
@@ -676,7 +713,8 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
if (unlikely(copied < len && !PageUptodate(page)))
return 0;
iomap_set_range_uptodate(page, offset_in_page(pos), len);
- __set_page_dirty_nobuffers(page);
+ if (!iomap_write_through(iwc, iomap, inode, page, pos, len))
+ __set_page_dirty_nobuffers(page);
return copied;
}
@@ -698,9 +736,9 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
}
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
-static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
- size_t copied, struct page *page, struct iomap *iomap,
- struct iomap *srcmap)
+static size_t iomap_write_end(struct iomap_write_ctx *iwc, struct inode *inode,
+ loff_t pos, size_t len, size_t copied, struct page *page,
+ struct iomap *iomap, struct iomap *srcmap)
{
const struct iomap_page_ops *page_ops = iomap->page_ops;
loff_t old_size = inode->i_size;
@@ -712,7 +750,8 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
page, NULL);
} else {
- ret = __iomap_write_end(inode, pos, len, copied, page);
+ ret = __iomap_write_end(iwc, iomap, inode, pos, len, copied,
+ page);
}
/*
@@ -780,8 +819,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
- status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
- srcmap);
+ status = iomap_write_end(iwc, inode, pos, bytes, copied, page,
+ iomap, srcmap);
if (unlikely(copied != status))
iov_iter_revert(i, copied - status);
@@ -808,6 +847,10 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
return written ? written : status;
}
+/* Also rearrange */
+static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc,
+ struct iomap_ioend *ioend, int error);
+
ssize_t
iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops)
@@ -817,6 +860,7 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
.iolist = LIST_HEAD_INIT(iwc.iolist),
.write_through = iocb->ki_flags & IOCB_SYNC,
};
+ struct iomap_ioend *ioend, *next;
struct inode *inode = iocb->ki_filp->f_mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, written = 0;
@@ -829,6 +873,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
written += ret;
}
+ if (ret > 0)
+ ret = 0;
+
+ list_for_each_entry_safe(ioend, next, &iwc.iolist, io_list) {
+ list_del_init(&ioend->io_list);
+ ret = iomap_submit_ioend(NULL, ioend, ret);
+ }
+ if (iwc.ioend)
+ ret = iomap_submit_ioend(NULL, iwc.ioend, ret);
return written ? written : ret;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
@@ -857,8 +910,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (unlikely(status))
return status;
- status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
- srcmap);
+ status = iomap_write_end(NULL, inode, pos, bytes, bytes, page,
+ iomap, srcmap);
if (WARN_ON_ONCE(status == 0))
return -EIO;
@@ -908,7 +961,8 @@ static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
zero_user(page, offset, bytes);
mark_page_accessed(page);
- return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
+ return iomap_write_end(NULL, inode, pos, bytes, bytes, page, iomap,
+ srcmap);
}
static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
--
2.30.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 8/8] iomap: Add writethrough for O_SYNC
2021-08-11 2:46 ` [PATCH 8/8] iomap: Add writethrough for O_SYNC Matthew Wilcox (Oracle)
@ 2021-08-12 13:16 ` Christoph Hellwig
2021-08-12 13:28 ` Matthew Wilcox
0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2021-08-12 13:16 UTC (permalink / raw)
To: Matthew Wilcox (Oracle); +Cc: linux-xfs, linux-fsdevel
On Wed, Aug 11, 2021 at 03:46:47AM +0100, Matthew Wilcox (Oracle) wrote:
> For O_SYNC writes, if the filesystem has already allocated blocks for
> the range, we can avoid marking the page as dirty and skip straight to
> marking the page as writeback.
So this just optimizes O_SYNC overwrites. How common are those for
bufered I/O? I know databases use them a lot with direct I/O, but for
buffere I/O this seems like an odd I/O pattern.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 8/8] iomap: Add writethrough for O_SYNC
2021-08-12 13:16 ` Christoph Hellwig
@ 2021-08-12 13:28 ` Matthew Wilcox
0 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox @ 2021-08-12 13:28 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel
On Thu, Aug 12, 2021 at 02:16:03PM +0100, Christoph Hellwig wrote:
> On Wed, Aug 11, 2021 at 03:46:47AM +0100, Matthew Wilcox (Oracle) wrote:
> > For O_SYNC writes, if the filesystem has already allocated blocks for
> > the range, we can avoid marking the page as dirty and skip straight to
> > marking the page as writeback.
>
> So this just optimizes O_SYNC overwrites. How common are those for
> bufered I/O? I know databases use them a lot with direct I/O, but for
> buffere I/O this seems like an odd I/O pattern.
As the comment says:
+ /* Can't allocate blocks here because we don't have ->prepare_ioend */
Give me a way to allocate blocks and it can do better! I didn't realise
this was going to be a problem when I embarked on this, but attempting
to do IO to wild addresses made me realise that most appending O_SYNC
writes are IOMAP_DELALLOC and so don't have allocated blocks.
And it's not just overwrites. If you open(O_SYNC|O_TRUNC) and then
write ten bytes at a time, the first write to each block will cause us
to fall back to writeback pages, but subsequent writes to a block will
writethrough.
^ permalink raw reply [flat|nested] 11+ messages in thread