cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] fs: Move mark_inode_dirty out of __generic_write_end
Date: Mon, 24 Jun 2019 20:22:43 +0200	[thread overview]
Message-ID: <20190624182243.22447-1-agruenba@redhat.com> (raw)
In-Reply-To: <20190624065408.GA3565@lst.de>

On Mon, 24 Jun 2019 at 08:55, Christoph Hellwig <hch@lst.de> wrote:
> At least for xfs we don't need the mark_inode_dirty at all.  Can you
> solve your gfs2 requirements on top of something like the patch below?
> Note that in general it seems like you should try to only update the
> on-disk inode size in writeback completion anyway, otherwise you can
> have a stale i_size update before the data was actually written.
>
>
> diff --git a/fs/iomap.c b/fs/iomap.c
> index c98107a6bf81..fcf2cbd39114 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -785,6 +785,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
>                 unsigned copied, struct page *page, struct iomap *iomap)
>  {
>         const struct iomap_page_ops *page_ops = iomap->page_ops;
> +       loff_t old_size = inode->i_size;
>         int ret;
>
>         if (iomap->type == IOMAP_INLINE) {
> @@ -796,7 +797,12 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
>                 ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
>         }
>
> -       __generic_write_end(inode, pos, ret, page);
> +       if (pos + ret > inode->i_size)
> +               i_size_write(inode, pos + ret);
> +       unlock_page(page);
> +
> +       if (old_size < pos)
> +               pagecache_isize_extended(inode, old_size, pos);
>         if (page_ops && page_ops->page_done)
>                 page_ops->page_done(inode, pos, copied, page, iomap);
>         put_page(page);

That would work, but I don't like how this leaves us with a vfs function
that updates i_size without bothering to dirty the inode very much.

How about if we move the __generic_write_end call into the page_done
callback and leave special handling to the filesystem code if needed
instead?  The below patch seems to work for gfs2.

Thanks,
Andreas

---
 fs/gfs2/bmap.c   | 42 ++++++++++++++++++++++++++++++++++++------
 fs/gfs2/incore.h |  1 +
 fs/iomap.c       |  5 +++--
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93ea1d529aa3..7569770e6871 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -991,10 +991,13 @@ static void gfs2_write_unlock(struct inode *inode)
 static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
 				   unsigned len, struct iomap *iomap)
 {
-	unsigned int blockmask = i_blocksize(inode) - 1;
+	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	unsigned int blocks;
+	unsigned int blockmask, blocks;
 
+	if (!(gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)))
+		return 0;
+	blockmask = i_blocksize(inode) - 1;
 	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
 	return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
 }
@@ -1005,10 +1008,33 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t old_size;
+
+	if (!page)
+		goto out;
 
-	if (page && !gfs2_is_stuffed(ip))
+	/*
+	 * Avoid calling __generic_write_end here to prevent mark_inode_dirty
+	 * from being called for each page: it's relatively expensive on gfs2,
+	 * so we defer that to gfs2_iomap_end.
+	 */
+	old_size = inode->i_size;
+	if (pos + copied > old_size) {
+		i_size_write(inode, pos + copied);
+		set_bit(GIF_SIZE_CHANGED, &ip->i_flags);
+	}
+
+	unlock_page(page);
+
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
+
+	if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip))
 		gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
-	gfs2_trans_end(sdp);
+
+out:
+	if (current->journal_info)
+		gfs2_trans_end(sdp);
 }
 
 static const struct iomap_page_ops gfs2_iomap_page_ops = {
@@ -1106,8 +1132,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 		gfs2_trans_end(sdp);
 	}
 
-	if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
-		iomap->page_ops = &gfs2_iomap_page_ops;
+	iomap->page_ops = &gfs2_iomap_page_ops;
 	return 0;
 
 out_trans_end:
@@ -1160,6 +1185,11 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 	if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
 		goto out;
 
+	if (test_bit(GIF_SIZE_CHANGED, &ip->i_flags)) {
+		clear_bit(GIF_SIZE_CHANGED, &ip->i_flags);
+		mark_inode_dirty(inode);
+	}
+
 	if (!gfs2_is_stuffed(ip))
 		gfs2_ordered_add_inode(ip);
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c9af93ac6c73..9f620807b396 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -396,6 +396,7 @@ enum {
 	GIF_ORDERED		= 4,
 	GIF_FREE_VFS_INODE      = 5,
 	GIF_GLOP_PENDING	= 6,
+	GIF_SIZE_CHANGED	= 7,
 };
 
 struct gfs2_inode {
diff --git a/fs/iomap.c b/fs/iomap.c
index 12654c2e78f8..b5c761827966 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -788,9 +788,10 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 		ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
 	}
 
-	__generic_write_end(inode, pos, ret, page);
 	if (page_ops && page_ops->page_done)
-		page_ops->page_done(inode, pos, copied, page, iomap);
+		page_ops->page_done(inode, pos, ret, page, iomap);
+	else
+		__generic_write_end(inode, pos, ret, page);
 	put_page(page);
 
 	if (ret < len)
-- 
2.20.1



  reply	other threads:[~2019-06-24 18:22 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-18 14:47 [Cluster-devel] [PATCH] fs: Move mark_inode_dirty out of __generic_write_end Andreas Gruenbacher
2019-06-19 16:01 ` Jan Kara
2019-06-20  4:47 ` Dave Chinner
2019-06-24  6:54 ` Christoph Hellwig
2019-06-24 18:22   ` Andreas Gruenbacher [this message]
2019-06-25  9:57     ` Christoph Hellwig
2019-06-25 10:50       ` Christoph Hellwig
2019-06-25 18:13         ` Andreas Gruenbacher
2019-06-26  6:03           ` Christoph Hellwig
2019-06-26 12:07             ` Andreas Gruenbacher
2019-06-25 15:00       ` Andreas Gruenbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190624182243.22447-1-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).