linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: John Garry <john.g.garry@oracle.com>
Cc: brauner@kernel.org, hch@lst.de, viro@zeniv.linux.org.uk,
	jack@suse.cz, cem@kernel.org, linux-fsdevel@vger.kernel.org,
	dchinner@redhat.com, linux-xfs@vger.kernel.org,
	linux-kernel@vger.kernel.org, ojaswin@linux.ibm.com,
	ritesh.list@gmail.com, martin.petersen@oracle.com,
	linux-ext4@vger.kernel.org, linux-block@vger.kernel.org,
	catherine.hoang@oracle.com, linux-api@vger.kernel.org
Subject: Re: [PATCH v8 05/15] xfs: ignore HW which cannot atomic write a single block
Date: Tue, 22 Apr 2025 17:38:23 -0700	[thread overview]
Message-ID: <20250423003823.GW25675@frogsfrogsfrogs> (raw)
In-Reply-To: <20250422122739.2230121-6-john.g.garry@oracle.com>

On Tue, Apr 22, 2025 at 12:27:29PM +0000, John Garry wrote:
> Currently only HW which can write at least 1x block is supported.
> 
> For supporting atomic writes > 1x block, a CoW-based method will also be
> used and this will not be resticted to using HW which can write >= 1x
> block.
> 
> However for deciding if HW-based atomic writes can be used, we need to
> start adding checks for write length < HW min, which complicates the code.
> Indeed, a statx field similar to unit_max_opt should also be added for this
> minimum, which is undesirable.
> 
> HW which can only write > 1x blocks would be uncommon and quite weird, so
> let's just not support it.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_inode.h | 17 ++++++++---------
>  fs/xfs/xfs_mount.c | 14 ++++++++++++++
>  fs/xfs/xfs_mount.h |  4 ++++
>  3 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index cff643cd03fc..725cd7c16a6e 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -355,20 +355,19 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
>  #define xfs_inode_buftarg(ip) \
>  	(XFS_IS_REALTIME_INODE(ip) ? \
>  		(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
> +/*
> + * Return max atomic write unit for a given inode.
> + */
> +#define xfs_inode_hw_atomicwrite_max(ip) \
> +	(XFS_IS_REALTIME_INODE(ip) ? \
> +		(ip)->i_mount->m_rt_awu_hw_max : \
> +		(ip)->i_mount->m_dd_awu_hw_max)
>  
>  static inline bool
>  xfs_inode_can_hw_atomicwrite(
>  	struct xfs_inode	*ip)
>  {
> -	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> -
> -	if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
> -		return false;
> -	if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
> -		return false;
> -
> -	return true;
> +	return xfs_inode_hw_atomicwrite_max(ip);
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 00b53f479ece..ee68c026e6cd 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1082,6 +1082,20 @@ xfs_mountfs(
>  		xfs_zone_gc_start(mp);
>  	}
>  
> +	/*
> +	 * Set atomic write unit max for mp. Ignore devices which cannot atomic
> +	 * a single block, as they would be uncommon and more difficult to
> +	 * support.
> +	 */
> +	if (mp->m_ddev_targp->bt_bdev_awu_min <= mp->m_sb.sb_blocksize &&
> +	    mp->m_ddev_targp->bt_bdev_awu_max >= mp->m_sb.sb_blocksize)
> +		mp->m_dd_awu_hw_max = mp->m_ddev_targp->bt_bdev_awu_max;

If we don't want to use the device's atomic write capabilities due to
fsblock alignment problems, why not just zero out bt_bdev_awu_min/max?
That would cut down on the number of "awu" variables around the
codebase.

/*
 * Ignore hardware atomic writes if the device can't handle a single
 * fsblock for us.  Most devices set the min_awu to the LBA size, but
 * the spec allows for a functionality gap.
 */
static void
xfs_buftarg_reconcile_awu(
	struct xfs_buftarg	*btp)
{
	struct xfs_mount	*mp = btp->bt_mount;

	if (btp->bt_bdev_awu_min > mp->m_sb.sb_blocksize ||
	    btp->bt_bdev_awu_max < mp->m_sb.sb_blocksize) {
		btp->bt_bdev_awu_min = 0;
		btp->bt_bdev_awu_max = 0;
	}
}

	xfs_buftarg_reconcile_awu(mp->m_ddev_targp);
	if (mp->m_rtdev_targp)
		xfs_buftarg_reconcile_awu(mp->m_rtdev_targp);

Hrm?

--D

> +
> +	if (mp->m_rtdev_targp &&
> +	    mp->m_rtdev_targp->bt_bdev_awu_min <= mp->m_sb.sb_blocksize &&
> +	    mp->m_rtdev_targp->bt_bdev_awu_max >= mp->m_sb.sb_blocksize)
> +		mp->m_rt_awu_hw_max = mp->m_rtdev_targp->bt_bdev_awu_max;
> +
>  	return 0;
>  
>   out_agresv:
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index e5192c12e7ac..2819e160f0e9 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -231,6 +231,10 @@ typedef struct xfs_mount {
>  	unsigned int		m_max_open_zones;
>  	unsigned int		m_zonegc_low_space;
>  
> +	/* ddev and rtdev HW max atomic write size */
> +	unsigned int		m_dd_awu_hw_max;
> +	unsigned int		m_rt_awu_hw_max;
> +
>  	/*
>  	 * Bitsets of per-fs metadata that have been checked and/or are sick.
>  	 * Callers must hold m_sb_lock to access these two fields.
> -- 
> 2.31.1
> 
> 

  reply	other threads:[~2025-04-23  0:38 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-22 12:27 [PATCH v8 00/15] large atomic writes for xfs John Garry
2025-04-22 12:27 ` [PATCH v8 01/15] fs: add atomic write unit max opt to statx John Garry
2025-04-22 12:27 ` [PATCH v8 02/15] xfs: add helpers to compute log item overhead John Garry
2025-04-22 12:27 ` [PATCH v8 03/15] xfs: add helpers to compute transaction reservation for finishing intent items John Garry
2025-04-22 12:27 ` [PATCH v8 04/15] xfs: rename xfs_inode_can_atomicwrite() -> xfs_inode_can_hw_atomicwrite() John Garry
2025-04-22 12:27 ` [PATCH v8 05/15] xfs: ignore HW which cannot atomic write a single block John Garry
2025-04-23  0:38   ` Darrick J. Wong [this message]
2025-04-23  7:15     ` John Garry
2025-04-23  8:10       ` Christoph Hellwig
2025-04-23  8:28         ` John Garry
2025-04-23  8:33           ` Christoph Hellwig
2025-04-23 15:12             ` Darrick J. Wong
2025-04-23 15:46               ` Christoph Hellwig
2025-04-22 12:27 ` [PATCH v8 06/15] xfs: allow block allocator to take an alignment hint John Garry
2025-04-22 12:27 ` [PATCH v8 07/15] xfs: refactor xfs_reflink_end_cow_extent() John Garry
2025-04-22 12:27 ` [PATCH v8 08/15] xfs: refine atomic write size check in xfs_file_write_iter() John Garry
2025-04-22 12:27 ` [PATCH v8 09/15] xfs: add xfs_atomic_write_cow_iomap_begin() John Garry
2025-04-23  8:12   ` Christoph Hellwig
2025-04-22 12:27 ` [PATCH v8 10/15] xfs: add large atomic writes checks in xfs_direct_write_iomap_begin() John Garry
2025-04-23  8:16   ` Christoph Hellwig
2025-04-23  8:18     ` John Garry
2025-04-22 12:27 ` [PATCH v8 11/15] xfs: commit CoW-based atomic writes atomically John Garry
2025-04-23  8:23   ` Christoph Hellwig
2025-04-23 14:58     ` Darrick J. Wong
2025-04-23 15:53       ` Christoph Hellwig
2025-04-23 15:58         ` Darrick J. Wong
2025-04-23 16:38           ` John Garry
2025-04-22 12:27 ` [PATCH v8 12/15] xfs: add xfs_file_dio_write_atomic() John Garry
2025-04-23  8:25   ` Christoph Hellwig
2025-04-23  8:29     ` John Garry
2025-04-22 12:27 ` [PATCH v8 13/15] xfs: add xfs_compute_atomic_write_unit_max() John Garry
2025-04-23  8:26   ` Christoph Hellwig
2025-04-22 12:27 ` [PATCH v8 14/15] xfs: update atomic write limits John Garry
2025-04-23  8:27   ` Christoph Hellwig
2025-04-22 12:27 ` [PATCH v8 15/15] xfs: allow sysadmins to specify a maximum atomic write limit at mount time John Garry
2025-04-23  8:32   ` Christoph Hellwig
2025-04-23 15:01     ` Darrick J. Wong
2025-04-23 15:21       ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250423003823.GW25675@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=catherine.hoang@oracle.com \
    --cc=cem@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=john.g.garry@oracle.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ojaswin@linux.ibm.com \
    --cc=ritesh.list@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).