linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: stable@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
Subject: [PATCH 14/19] xfs: fix COW writeback race
Date: Thu,  2 Feb 2017 08:56:06 +0100	[thread overview]
Message-ID: <1486022171-8076-15-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1486022171-8076-1-git-send-email-hch@lst.de>

commit d2b3964a0780d2d2994eba57f950d6c9fe489ed8 upstream.

Due to the way how xfs_iomap_write_allocate tries to convert the whole
found extents from delalloc to real space we can run into a race
condition with multiple threads doing writes to this same extent.
For the non-COW case that is harmless as the only thing that can happen
is that we call xfs_bmapi_write on an extent that has already been
converted to a real allocation.  For COW writes where we move the extent
from the COW to the data fork after I/O completion the race is, however,
not quite as harmless.  In the worst case we are now calling
xfs_bmapi_write on a region that contains hole in the COW work, which
will trip up an assert in debug builds or lead to file system corruption
in non-debug builds.  This seems to be reproducible with workloads of
small O_DSYNC write, although so far I've not managed to come up with
a with an isolated reproducer.

The fix for the issue is relatively simple:  tell xfs_bmapi_write
that we are only asked to convert delayed allocations and skip holes
in that case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 44 ++++++++++++++++++++++++++++++++------------
 fs/xfs/libxfs/xfs_bmap.h |  6 +++++-
 fs/xfs/xfs_iomap.c       |  2 +-
 3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 52dc5c1..fbb60d3 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4607,8 +4607,6 @@ xfs_bmapi_write(
 	int			n;		/* current extent index */
 	xfs_fileoff_t		obno;		/* old block number (offset) */
 	int			whichfork;	/* data or attr fork */
-	char			inhole;		/* current location is hole in file */
-	char			wasdelay;	/* old extent was delayed */
 
 #ifdef DEBUG
 	xfs_fileoff_t		orig_bno;	/* original block number value */
@@ -4694,22 +4692,44 @@ xfs_bmapi_write(
 	bma.firstblock = firstblock;
 
 	while (bno < end && n < *nmap) {
-		inhole = eof || bma.got.br_startoff > bno;
-		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+		bool			need_alloc = false, wasdelay = false;
 
-		/*
-		 * Make sure we only reflink into a hole.
-		 */
-		if (flags & XFS_BMAPI_REMAP)
-			ASSERT(inhole);
-		if (flags & XFS_BMAPI_COWFORK)
-			ASSERT(!inhole);
+		/* in hole or beyoned EOF? */
+		if (eof || bma.got.br_startoff > bno) {
+			if (flags & XFS_BMAPI_DELALLOC) {
+				/*
+				 * For the COW fork we can reasonably get a
+				 * request for converting an extent that races
+				 * with other threads already having converted
+				 * part of it, as there converting COW to
+				 * regular blocks is not protected using the
+				 * IOLOCK.
+				 */
+				ASSERT(flags & XFS_BMAPI_COWFORK);
+				if (!(flags & XFS_BMAPI_COWFORK)) {
+					error = -EIO;
+					goto error0;
+				}
+
+				if (eof || bno >= end)
+					break;
+			} else {
+				need_alloc = true;
+			}
+		} else {
+			/*
+			 * Make sure we only reflink into a hole.
+			 */
+			ASSERT(!(flags & XFS_BMAPI_REMAP));
+			if (isnullstartblock(bma.got.br_startblock))
+				wasdelay = true;
+		}
 
 		/*
 		 * First, deal with the hole before the allocated space
 		 * that we found, if any.
 		 */
-		if (inhole || wasdelay) {
+		if (need_alloc || wasdelay) {
 			bma.eof = eof;
 			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
 			bma.wasdel = wasdelay;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index d6d175a..e7d40b3 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
 /* Map something in the CoW fork. */
 #define XFS_BMAPI_COWFORK	0x200
 
+/* Only convert delalloc space, don't allocate entirely new extents */
+#define XFS_BMAPI_DELALLOC	0x400
+
 #define XFS_BMAPI_FLAGS \
 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
 	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
-	{ XFS_BMAPI_COWFORK,	"COWFORK" }
+	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
+	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }
 
 
 static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 15a83813..cdc6bdd 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
 	xfs_trans_t	*tp;
 	int		nimaps;
 	int		error = 0;
-	int		flags = 0;
+	int		flags = XFS_BMAPI_DELALLOC;
 	int		nres;
 
 	if (whichfork == XFS_COW_FORK)
-- 
2.1.4


  parent reply	other threads:[~2017-02-02  7:57 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-02  7:55 4.9-stable updates for XFS Christoph Hellwig
2017-02-02  7:55 ` [PATCH 01/19] xfs: bump up reserved blocks in xfs_alloc_set_aside Christoph Hellwig
2017-02-02  7:55 ` [PATCH 02/19] xfs: fix bogus minleft manipulations Christoph Hellwig
2017-02-02  7:55 ` [PATCH 03/19] xfs: adjust allocation length in xfs_alloc_space_available Christoph Hellwig
2017-02-02  7:55 ` [PATCH 04/19] xfs: don't rely on ->total " Christoph Hellwig
2017-02-02  7:55 ` [PATCH 05/19] xfs: don't print warnings when xfs_log_force fails Christoph Hellwig
2017-02-02  7:55 ` [PATCH 06/19] xfs: make the ASSERT() condition likely Christoph Hellwig
2017-02-02  7:55 ` [PATCH 07/19] xfs: sanity check directory inode di_size Christoph Hellwig
2017-02-02  7:56 ` [PATCH 08/19] xfs: add missing include dependencies to xfs_dir2.h Christoph Hellwig
2017-02-02  7:56 ` [PATCH 09/19] xfs: replace xfs_mode_to_ftype table with switch statement Christoph Hellwig
2017-02-02  7:56 ` [PATCH 10/19] xfs: sanity check inode mode when creating new dentry Christoph Hellwig
2017-02-02  7:56 ` [PATCH 11/19] xfs: sanity check inode di_mode Christoph Hellwig
2017-02-02  7:56 ` [PATCH 12/19] xfs: don't wrap ID in xfs_dq_get_next_id Christoph Hellwig
2017-02-02  7:56 ` [PATCH 13/19] xfs: fix xfs_mode_to_ftype() prototype Christoph Hellwig
2017-02-02  7:56 ` Christoph Hellwig [this message]
2017-02-02  7:56 ` [PATCH 15/19] xfs: verify dirblocklog correctly Christoph Hellwig
2017-02-02  7:56 ` [PATCH 16/19] xfs: remove racy hasattr check from attr ops Christoph Hellwig
2017-02-02  7:56 ` [PATCH 17/19] xfs: extsize hints are not unlikely in xfs_bmap_btalloc Christoph Hellwig
2017-02-02  7:56 ` [PATCH 18/19] xfs: clear _XBF_PAGES from buffers when readahead page Christoph Hellwig
2017-02-02  7:56 ` [PATCH 19/19] xfs: fix bmv_count confusion w/ shared extents Christoph Hellwig
2017-02-02 10:18 ` 4.9-stable updates for XFS Greg KH
2017-02-02 16:00 ` Eric Sandeen
2017-02-02 17:34   ` Eric Sandeen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1486022171-8076-15-git-send-email-hch@lst.de \
    --to=hch@lst.de \
    --cc=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).