linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: Brian Foster <bfoster@redhat.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 9/9] xfs: relog dirty buffers during swapext bmbt owner change
Date: Fri, 25 Aug 2017 09:53:59 -0700	[thread overview]
Message-ID: <20170825165359.GR4796@magnolia> (raw)
In-Reply-To: <20170825150557.43010-10-bfoster@redhat.com>

On Fri, Aug 25, 2017 at 11:05:57AM -0400, Brian Foster wrote:
> The owner change bmbt scan that occurs during extent swap operations
> does not handle ordered buffer failures. Buffers that cannot be
> marked ordered must be physically logged so previously dirty ranges
> of the buffer can be relogged in the transaction.
> 
> Since the bmbt scan may need to process and potentially log a large
> number of blocks, we can't expect to complete this operation in a
> single transaction. Update extent swap to use a permanent
> transaction with enough log reservation to physically log a buffer.
> Update the bmbt scan to physically log any buffers that cannot be
> ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the
> caller rolls the transaction and restarts the scan. Finally, update
> the bmbt scan helper function to skip bmbt blocks that already match
> the expected owner so they are not reprocessed after scan restarts.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>

Looks ok, I think...
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

> ---
>  fs/xfs/libxfs/xfs_btree.c | 26 ++++++++++++++-------
>  fs/xfs/xfs_bmap_util.c    | 57 ++++++++++++++++++++++++++++++++++++++---------
>  2 files changed, 65 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> index d06b04d..c466a23 100644
> --- a/fs/xfs/libxfs/xfs_btree.c
> +++ b/fs/xfs/libxfs/xfs_btree.c
> @@ -4452,10 +4452,15 @@ xfs_btree_block_change_owner(
>  
>  	/* modify the owner */
>  	block = xfs_btree_get_block(cur, level, &bp);
> -	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
> +		if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
> +			return 0;
>  		block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
> -	else
> +	} else {
> +		if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
> +			return 0;
>  		block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
> +	}
>  
>  	/*
>  	 * If the block is a root block hosted in an inode, we might not have a
> @@ -4464,14 +4469,19 @@ xfs_btree_block_change_owner(
>  	 * block is formatted into the on-disk inode fork. We still change it,
>  	 * though, so everything is consistent in memory.
>  	 */
> -	if (bp) {
> -		if (cur->bc_tp)
> -			xfs_trans_ordered_buf(cur->bc_tp, bp);
> -		else
> -			xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
> -	} else {
> +	if (!bp) {
>  		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
>  		ASSERT(level == cur->bc_nlevels - 1);
> +		return 0;
> +	}
> +
> +	if (cur->bc_tp) {
> +		if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
> +			xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
> +			return -EAGAIN;
> +		}
> +	} else {
> +		xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
>  	}
>  
>  	return 0;
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index ee8fb9a..3e9b7a4 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -1929,6 +1929,48 @@ xfs_swap_extent_forks(
>  	return 0;
>  }
>  
> +/*
> + * Fix up the owners of the bmbt blocks to refer to the current inode. The
> + * change owner scan attempts to order all modified buffers in the current
> + * transaction. In the event of ordered buffer failure, the offending buffer is
> + * physically logged as a fallback and the scan returns -EAGAIN. We must roll
> + * the transaction in this case to replenish the fallback log reservation and
> + * restart the scan. This process repeats until the scan completes.
> + */
> +static int
> +xfs_swap_change_owner(
> +	struct xfs_trans	**tpp,
> +	struct xfs_inode	*ip,
> +	struct xfs_inode	*tmpip)
> +{
> +	int			error;
> +	struct xfs_trans	*tp = *tpp;
> +
> +	do {
> +		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
> +					      NULL);
> +		/* success or fatal error */
> +		if (error != -EAGAIN)
> +			break;
> +
> +		error = xfs_trans_roll(tpp, NULL);
> +		if (error)
> +			break;
> +		tp = *tpp;
> +
> +		/*
> +		 * Redirty both inodes so they can relog and keep the log tail
> +		 * moving forward.
> +		 */
> +		xfs_trans_ijoin(tp, ip, 0);
> +		xfs_trans_ijoin(tp, tmpip, 0);
> +		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
> +	} while (true);
> +
> +	return error;
> +}
> +
>  int
>  xfs_swap_extents(
>  	struct xfs_inode	*ip,	/* target inode */
> @@ -1943,7 +1985,7 @@ xfs_swap_extents(
>  	int			lock_flags;
>  	struct xfs_ifork	*cowfp;
>  	uint64_t		f;
> -	int			resblks;
> +	int			resblks = 0;
>  
>  	/*
>  	 * Lock the inodes against other IO, page faults and truncate to
> @@ -1991,11 +2033,8 @@ xfs_swap_extents(
>  			  XFS_SWAP_RMAP_SPACE_RES(mp,
>  				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
>  				XFS_DATA_FORK);
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
> -				0, 0, &tp);
> -	} else
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
> -				0, 0, &tp);
> +	}
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
>  	if (error)
>  		goto out_unlock;
>  
> @@ -2087,14 +2126,12 @@ xfs_swap_extents(
>  	 * inode number of the current inode.
>  	 */
>  	if (src_log_flags & XFS_ILOG_DOWNER) {
> -		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
> -					      ip->i_ino, NULL);
> +		error = xfs_swap_change_owner(&tp, ip, tip);
>  		if (error)
>  			goto out_trans_cancel;
>  	}
>  	if (target_log_flags & XFS_ILOG_DOWNER) {
> -		error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
> -					      tip->i_ino, NULL);
> +		error = xfs_swap_change_owner(&tp, tip, ip);
>  		if (error)
>  			goto out_trans_cancel;
>  	}
> -- 
> 2.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2017-08-25 16:54 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-25 15:05 [PATCH 0/9] xfs: xfs: refactor ordered buffer logging code Brian Foster
2017-08-25 15:05 ` [PATCH 1/9] xfs: open-code xfs_buf_item_dirty() Brian Foster
2017-08-25 15:26   ` Darrick J. Wong
2017-08-28  9:20   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 2/9] xfs: remove unnecessary dirty bli format check for ordered bufs Brian Foster
2017-08-25 15:51   ` Darrick J. Wong
2017-08-28  9:25   ` Christoph Hellwig
2017-08-28 10:51     ` Brian Foster
2017-08-25 15:05 ` [PATCH 3/9] xfs: ordered buffer log items are never formatted Brian Foster
2017-08-25 15:26   ` Darrick J. Wong
2017-08-28  9:26   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 4/9] xfs: refactor buffer logging into buffer dirtying helper Brian Foster
2017-08-28  9:28   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 5/9] xfs: don't log dirty ranges for ordered buffers Brian Foster
2017-08-25 15:51   ` Darrick J. Wong
2017-08-28  9:29   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 6/9] xfs: skip bmbt block ino validation during owner change Brian Foster
2017-08-25 15:35   ` Darrick J. Wong
2017-08-25 18:11     ` Brian Foster
2017-08-28  9:44   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 7/9] xfs: move bmbt owner change to last step of extent swap Brian Foster
2017-08-25 15:57   ` Darrick J. Wong
2017-08-28  9:46   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 8/9] xfs: disallow marking previously dirty buffers as ordered Brian Foster
2017-08-25 16:50   ` Darrick J. Wong
2017-08-28  9:34   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 9/9] xfs: relog dirty buffers during swapext bmbt owner change Brian Foster
2017-08-25 16:53   ` Darrick J. Wong [this message]
2017-08-28  9:51   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170825165359.GR4796@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).