All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: linux-xfs@vger.kernel.org, darrick.wong@oracle.com
Subject: Re: [PATCH v3] xfs: fix COW writeback race
Date: Fri, 20 Jan 2017 06:57:11 -0500	[thread overview]
Message-ID: <20170120115711.GA7220@bfoster.bfoster> (raw)
In-Reply-To: <1484837472-14368-1-git-send-email-hch@lst.de>

On Thu, Jan 19, 2017 at 03:51:12PM +0100, Christoph Hellwig wrote:
> Due to the way how xfs_iomap_write_allocate tries to convert the whole
> found extents from delalloc to real space we can run into a race
> condition with multiple threads doing writes to this same extent.
> For the non-COW case that is harmless as the only thing that can happen
> is that we call xfs_bmapi_write on an extent that has already been
> converted to a real allocation.  For COW writes where we move the extent
> from the COW to the data fork after I/O completion the race is, however,
> not quite as harmless.  In the worst case we are now calling
> xfs_bmapi_write on a region that contains hole in the COW work, which
> will trip up an assert in debug builds or lead to file system corruption
> in non-debug builds.  This seems to be reproducible with workloads of
> small O_DSYNC write, although so far I've not managed to come up with
> a with an isolated reproducer.
> 
> The fix for the issue is relatively simple:  tell xfs_bmapi_write
> that we are only asked to convert delayed allocations and skip holes
> in that case.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 44 ++++++++++++++++++++++++++++++++------------
>  fs/xfs/libxfs/xfs_bmap.h |  6 +++++-
>  fs/xfs/xfs_iomap.c       |  2 +-
>  3 files changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 44773c9..ab82dd4 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4514,8 +4514,6 @@ xfs_bmapi_write(
>  	int			n;		/* current extent index */
>  	xfs_fileoff_t		obno;		/* old block number (offset) */
>  	int			whichfork;	/* data or attr fork */
> -	char			inhole;		/* current location is hole in file */
> -	char			wasdelay;	/* old extent was delayed */
>  
>  #ifdef DEBUG
>  	xfs_fileoff_t		orig_bno;	/* original block number value */
> @@ -4603,22 +4601,44 @@ xfs_bmapi_write(
>  	bma.firstblock = firstblock;
>  
>  	while (bno < end && n < *nmap) {
> -		inhole = eof || bma.got.br_startoff > bno;
> -		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
> +		bool			need_alloc = false, wasdelay = false;
>  
> -		/*
> -		 * Make sure we only reflink into a hole.
> -		 */
> -		if (flags & XFS_BMAPI_REMAP)
> -			ASSERT(inhole);
> -		if (flags & XFS_BMAPI_COWFORK)
> -			ASSERT(!inhole);
> +		/* in hole or beyoned EOF? */

Typo:			      beyond

Otherwise looks good:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> +		if (eof || bma.got.br_startoff > bno) {
> +			if (flags & XFS_BMAPI_DELALLOC) {
> +				/*
> +				 * For the COW fork we can reasonably get a
> +				 * request for converting an extent that races
> +				 * with other threads already having converted
> +				 * part of it, as there converting COW to
> +				 * regular blocks is not protected using the
> +				 * IOLOCK.
> +				 */
> +				ASSERT(flags & XFS_BMAPI_COWFORK);
> +				if (!(flags & XFS_BMAPI_COWFORK)) {
> +					error = -EIO;
> +					goto error0;
> +				}
> +
> +				if (eof || bno >= end)
> +					break;
> +			} else {
> +				need_alloc = true;
> +			}
> +		} else {
> +			/*
> +			 * Make sure we only reflink into a hole.
> +			 */
> +			ASSERT(!(flags & XFS_BMAPI_REMAP));
> +			if (isnullstartblock(bma.got.br_startblock))
> +				wasdelay = true;
> +		}
>  
>  		/*
>  		 * First, deal with the hole before the allocated space
>  		 * that we found, if any.
>  		 */
> -		if (inhole || wasdelay) {
> +		if (need_alloc || wasdelay) {
>  			bma.eof = eof;
>  			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
>  			bma.wasdel = wasdelay;
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index cecd094..cdef87d 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -110,6 +110,9 @@ struct xfs_extent_free_item
>  /* Map something in the CoW fork. */
>  #define XFS_BMAPI_COWFORK	0x200
>  
> +/* Only convert delalloc space, don't allocate entirely new extents */
> +#define XFS_BMAPI_DELALLOC	0x400
> +
>  #define XFS_BMAPI_FLAGS \
>  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
>  	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> @@ -120,7 +123,8 @@ struct xfs_extent_free_item
>  	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
>  	{ XFS_BMAPI_ZERO,	"ZERO" }, \
>  	{ XFS_BMAPI_REMAP,	"REMAP" }, \
> -	{ XFS_BMAPI_COWFORK,	"COWFORK" }
> +	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
> +	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }
>  
>  
>  static inline int xfs_bmapi_aflag(int w)
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index ca137b7..7ee8629 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -795,7 +795,7 @@ xfs_iomap_write_allocate(
>  	xfs_trans_t	*tp;
>  	int		nimaps;
>  	int		error = 0;
> -	int		flags = 0;
> +	int		flags = XFS_BMAPI_DELALLOC;
>  	int		nres;
>  
>  	if (whichfork == XFS_COW_FORK)
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2017-01-20 11:57 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-19 14:51 [PATCH v3] xfs: fix COW writeback race Christoph Hellwig
2017-01-20 11:57 ` Brian Foster [this message]
2017-01-20 16:25 ` Darrick J. Wong
2017-01-23 12:18   ` Christoph Hellwig
2017-01-23 18:56     ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170120115711.GA7220@bfoster.bfoster \
    --to=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.