From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] fs: Move mark_inode_dirty out of __generic_write_end
Date: Mon, 24 Jun 2019 20:22:43 +0200 [thread overview]
Message-ID: <20190624182243.22447-1-agruenba@redhat.com> (raw)
In-Reply-To: <20190624065408.GA3565@lst.de>
On Mon, 24 Jun 2019 at 08:55, Christoph Hellwig <hch@lst.de> wrote:
> At least for xfs we don't need the mark_inode_dirty at all. Can you
> solve your gfs2 requirements on top of something like the patch below?
> Note that in general it seems like you should try to only update the
> on-disk inode size in writeback completion anyway, otherwise you can
> have a stale i_size update before the data was actually written.
>
>
> diff --git a/fs/iomap.c b/fs/iomap.c
> index c98107a6bf81..fcf2cbd39114 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -785,6 +785,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> unsigned copied, struct page *page, struct iomap *iomap)
> {
> const struct iomap_page_ops *page_ops = iomap->page_ops;
> + loff_t old_size = inode->i_size;
> int ret;
>
> if (iomap->type == IOMAP_INLINE) {
> @@ -796,7 +797,12 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
> }
>
> - __generic_write_end(inode, pos, ret, page);
> + if (pos + ret > inode->i_size)
> + i_size_write(inode, pos + ret);
> + unlock_page(page);
> +
> + if (old_size < pos)
> + pagecache_isize_extended(inode, old_size, pos);
> if (page_ops && page_ops->page_done)
> page_ops->page_done(inode, pos, copied, page, iomap);
> put_page(page);
That would work, but I don't like how this leaves us with a vfs function
that updates i_size without bothering to dirty the inode very much.
How about if we move the __generic_write_end call into the page_done
callback and leave special handling to the filesystem code if needed
instead? The below patch seems to work for gfs2.
Thanks,
Andreas
---
fs/gfs2/bmap.c | 42 ++++++++++++++++++++++++++++++++++++------
fs/gfs2/incore.h | 1 +
fs/iomap.c | 5 +++--
3 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93ea1d529aa3..7569770e6871 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -991,10 +991,13 @@ static void gfs2_write_unlock(struct inode *inode)
static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
unsigned len, struct iomap *iomap)
{
- unsigned int blockmask = i_blocksize(inode) - 1;
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- unsigned int blocks;
+ unsigned int blockmask, blocks;
+ if (!(gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)))
+ return 0;
+ blockmask = i_blocksize(inode) - 1;
blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
}
@@ -1005,10 +1008,33 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ loff_t old_size;
+
+ if (!page)
+ goto out;
- if (page && !gfs2_is_stuffed(ip))
+ /*
+ * Avoid calling __generic_write_end here to prevent mark_inode_dirty
+ * from being called for each page: it's relatively expensive on gfs2,
+ * so we defer that to gfs2_iomap_end.
+ */
+ old_size = inode->i_size;
+ if (pos + copied > old_size) {
+ i_size_write(inode, pos + copied);
+ set_bit(GIF_SIZE_CHANGED, &ip->i_flags);
+ }
+
+ unlock_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+
+ if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip))
gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
- gfs2_trans_end(sdp);
+
+out:
+ if (current->journal_info)
+ gfs2_trans_end(sdp);
}
static const struct iomap_page_ops gfs2_iomap_page_ops = {
@@ -1106,8 +1132,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
gfs2_trans_end(sdp);
}
- if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
- iomap->page_ops = &gfs2_iomap_page_ops;
+ iomap->page_ops = &gfs2_iomap_page_ops;
return 0;
out_trans_end:
@@ -1160,6 +1185,11 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
goto out;
+ if (test_bit(GIF_SIZE_CHANGED, &ip->i_flags)) {
+ clear_bit(GIF_SIZE_CHANGED, &ip->i_flags);
+ mark_inode_dirty(inode);
+ }
+
if (!gfs2_is_stuffed(ip))
gfs2_ordered_add_inode(ip);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c9af93ac6c73..9f620807b396 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -396,6 +396,7 @@ enum {
GIF_ORDERED = 4,
GIF_FREE_VFS_INODE = 5,
GIF_GLOP_PENDING = 6,
+ GIF_SIZE_CHANGED = 7,
};
struct gfs2_inode {
diff --git a/fs/iomap.c b/fs/iomap.c
index 12654c2e78f8..b5c761827966 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -788,9 +788,10 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
- __generic_write_end(inode, pos, ret, page);
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, copied, page, iomap);
+ page_ops->page_done(inode, pos, ret, page, iomap);
+ else
+ __generic_write_end(inode, pos, ret, page);
put_page(page);
if (ret < len)
--
2.20.1
next prev parent reply other threads:[~2019-06-24 18:22 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-18 14:47 [Cluster-devel] [PATCH] fs: Move mark_inode_dirty out of __generic_write_end Andreas Gruenbacher
2019-06-19 16:01 ` Jan Kara
2019-06-20 4:47 ` Dave Chinner
2019-06-24 6:54 ` Christoph Hellwig
2019-06-24 18:22 ` Andreas Gruenbacher [this message]
2019-06-25 9:57 ` Christoph Hellwig
2019-06-25 10:50 ` Christoph Hellwig
2019-06-25 18:13 ` Andreas Gruenbacher
2019-06-26 6:03 ` Christoph Hellwig
2019-06-26 12:07 ` Andreas Gruenbacher
2019-06-25 15:00 ` Andreas Gruenbacher
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190624182243.22447-1-agruenba@redhat.com \
--to=agruenba@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).