linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: stable@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, Eryu Guan <eguan@redhat.com>,
	"Darrick J . Wong" <darrick.wong@oracle.com>
Subject: [PATCH 05/16] xfs: update i_size after unwritten conversion in dio completion
Date: Thu, 19 Oct 2017 16:22:48 +0200	[thread overview]
Message-ID: <20171019142259.20082-6-hch@lst.de> (raw)
In-Reply-To: <20171019142259.20082-1-hch@lst.de>

From: Eryu Guan <eguan@redhat.com>

commit ee70daaba82d70766d0723b743d9fdeb3b06102a upstream.

Since commit d531d91d6990 ("xfs: always use unwritten extents for
direct I/O writes"), we start allocating unwritten extents for all
direct writes to allow appending aio in XFS.

But for dio writes that could extend file size we update the in-core
inode size first, then convert the unwritten extents to real
allocations at dio completion time in xfs_dio_write_end_io(). Thus a
racing direct read could see the new i_size and find the unwritten
extents first and read zeros instead of actual data, if the direct
writer also takes a shared iolock.

Fix it by updating the in-core inode size after the unwritten extent
conversion. To do this, introduce a new boolean argument to
xfs_iomap_write_unwritten() to tell if we want to update in-core
i_size or not.

Suggested-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
[hch: backported to the old direct I/O code before Linux 4.10]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 25 +++++++++++++++++--------
 fs/xfs/xfs_iomap.c |  7 +++++--
 fs/xfs/xfs_iomap.h |  2 +-
 fs/xfs/xfs_pnfs.c  |  2 +-
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index d23889e0bedc..2b9d7c5800ee 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -335,7 +335,8 @@ xfs_end_io(
 		error = xfs_reflink_end_cow(ip, offset, size);
 		break;
 	case XFS_IO_UNWRITTEN:
-		error = xfs_iomap_write_unwritten(ip, offset, size);
+		/* writeback should never update isize */
+		error = xfs_iomap_write_unwritten(ip, offset, size, false);
 		break;
 	default:
 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
@@ -1532,6 +1533,21 @@ xfs_end_io_direct_write(
 		return 0;
 	}
 
+	if (flags & XFS_DIO_FLAG_COW)
+		error = xfs_reflink_end_cow(ip, offset, size);
+
+	/*
+	 * Unwritten conversion updates the in-core isize after extent
+	 * conversion but before updating the on-disk size. Updating isize any
+	 * earlier allows a racing dio read to find unwritten extents before
+	 * they are converted.
+	 */
+	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
+
+		return xfs_iomap_write_unwritten(ip, offset, size, true);
+	}
+
 	/*
 	 * We need to update the in-core inode size here so that we don't end up
 	 * with the on-disk inode size being outside the in-core inode size. We
@@ -1548,13 +1564,6 @@ xfs_end_io_direct_write(
 		i_size_write(inode, offset + size);
 	spin_unlock(&ip->i_flags_lock);
 
-	if (flags & XFS_DIO_FLAG_COW)
-		error = xfs_reflink_end_cow(ip, offset, size);
-	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
-		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
-
-		error = xfs_iomap_write_unwritten(ip, offset, size);
-	}
 	if (flags & XFS_DIO_FLAG_APPEND) {
 		trace_xfs_end_io_direct_write_append(ip, offset, size);
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 65740d1cbd92..f286f63c430c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -836,7 +836,8 @@ int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
 	xfs_off_t	offset,
-	xfs_off_t	count)
+	xfs_off_t	count,
+	bool		update_isize)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb;
@@ -847,6 +848,7 @@ xfs_iomap_write_unwritten(
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap;
 	struct xfs_defer_ops dfops;
+	struct inode	*inode = VFS_I(ip);
 	xfs_fsize_t	i_size;
 	uint		resblks;
 	int		error;
@@ -906,7 +908,8 @@ xfs_iomap_write_unwritten(
 		i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
 		if (i_size > offset + count)
 			i_size = offset + count;
-
+		if (update_isize && i_size > i_size_read(inode))
+			i_size_write(inode, i_size);
 		i_size = xfs_new_eof(ip, i_size);
 		if (i_size) {
 			ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 6d45cf01fcff..d71703af5c76 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
 			struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
 		struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 93a7aafa56d6..cecd37569ddb 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -279,7 +279,7 @@ xfs_fs_commit_blocks(
 					(end - 1) >> PAGE_SHIFT);
 		WARN_ON_ONCE(error);
 
-		error = xfs_iomap_write_unwritten(ip, start, length);
+		error = xfs_iomap_write_unwritten(ip, start, length, false);
 		if (error)
 			goto out_drop_iolock;
 	}
-- 
2.14.2


  parent reply	other threads:[~2017-10-19 14:23 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-19 14:22 4.9-stable updates for XFS Christoph Hellwig
2017-10-19 14:22 ` [PATCH 01/16] xfs: don't unconditionally clear the reflink flag on zero-block files Christoph Hellwig
2017-10-19 14:22 ` [PATCH 02/16] xfs: evict CoW fork extents when performing finsert/fcollapse Christoph Hellwig
2017-10-19 14:22 ` [PATCH 03/16] fs/xfs: Use %pS printk format for direct addresses Christoph Hellwig
2017-10-19 14:22 ` [PATCH 04/16] xfs: report zeroed or not correctly in xfs_zero_range() Christoph Hellwig
2017-10-19 14:22 ` Christoph Hellwig [this message]
2017-10-19 14:22 ` [PATCH 06/16] xfs: perag initialization should only touch m_ag_max_usable for AG 0 Christoph Hellwig
2017-10-19 14:22 ` [PATCH 07/16] xfs: Capture state of the right inode in xfs_iflush_done Christoph Hellwig
2017-10-19 14:22 ` [PATCH 08/16] xfs: always swap the cow forks when swapping extents Christoph Hellwig
2017-10-19 14:22 ` [PATCH 09/16] xfs: handle racy AIO in xfs_reflink_end_cow Christoph Hellwig
2017-10-19 14:22 ` [PATCH 10/16] xfs: Don't log uninitialised fields in inode structures Christoph Hellwig
2017-10-19 14:22 ` [PATCH 11/16] xfs: move more RT specific code under CONFIG_XFS_RT Christoph Hellwig
2017-10-19 14:22 ` [PATCH 12/16] xfs: don't change inode mode if ACL update fails Christoph Hellwig
2017-10-19 14:22 ` [PATCH 13/16] xfs: reinit btree pointer on attr tree inactivation walk Christoph Hellwig
2017-10-19 14:22 ` [PATCH 14/16] xfs: handle error if xfs_btree_get_bufs fails Christoph Hellwig
2017-10-19 14:22 ` [PATCH 15/16] xfs: cancel dirty pages on invalidation Christoph Hellwig
2017-10-19 14:22 ` [PATCH 16/16] xfs: trim writepage mapping to within eof Christoph Hellwig
2017-10-24 12:54 ` 4.9-stable updates for XFS Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171019142259.20082-6-hch@lst.de \
    --to=hch@lst.de \
    --cc=darrick.wong@oracle.com \
    --cc=eguan@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).