linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: John Garry <john.g.garry@oracle.com>
Cc: brauner@kernel.org, hch@lst.de, viro@zeniv.linux.org.uk,
	jack@suse.cz, cem@kernel.org, linux-fsdevel@vger.kernel.org,
	dchinner@redhat.com, linux-xfs@vger.kernel.org,
	linux-kernel@vger.kernel.org, ojaswin@linux.ibm.com,
	ritesh.list@gmail.com, martin.petersen@oracle.com,
	linux-ext4@vger.kernel.org, linux-block@vger.kernel.org,
	catherine.hoang@oracle.com, linux-api@vger.kernel.org
Subject: [PATCH v10.1 05/15] xfs: ignore HW which cannot atomic write a single block
Date: Fri, 2 May 2025 13:13:20 -0700	[thread overview]
Message-ID: <20250502201320.GV25675@frogsfrogsfrogs> (raw)
In-Reply-To: <20250501165733.1025207-6-john.g.garry@oracle.com>

From: Darrick J. Wong <djwong@kernel.org>

Currently only HW which can write at least 1x block is supported.

For supporting atomic writes > 1x block, a CoW-based method will also be
used and this will not be resticted to using HW which can write >= 1x
block.

However for deciding if HW-based atomic writes can be used, we need to
start adding checks for write length < HW min, which complicates the
code.  Indeed, a statx field similar to unit_max_opt should also be
added for this minimum, which is undesirable.

HW which can only write > 1x blocks would be uncommon and quite weird,
so let's just not support it.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
v10.1: rename xfs_getsize_buftarg and rebase on previous changes to
xfs_getsize_buftarg
---
 fs/xfs/xfs_buf.h   |    4 ++--
 fs/xfs/xfs_inode.h |   14 ++------------
 fs/xfs/xfs_buf.c   |   44 ++++++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_super.c |    6 +++---
 4 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 132210705602b4..7759fe35d93ea7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -112,7 +112,7 @@ struct xfs_buftarg {
 	struct percpu_counter	bt_readahead_count;
 	struct ratelimit_state	bt_ioerror_rl;
 
-	/* Atomic write unit values */
+	/* Atomic write unit values, bytes */
 	unsigned int		bt_bdev_awu_min;
 	unsigned int		bt_bdev_awu_max;
 
@@ -374,7 +374,7 @@ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
 extern void xfs_free_buftarg(struct xfs_buftarg *);
 extern void xfs_buftarg_wait(struct xfs_buftarg *);
 extern void xfs_buftarg_drain(struct xfs_buftarg *);
-extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
+extern int xfs_configure_buftarg(struct xfs_buftarg *, unsigned int);
 
 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bdbbff0d8d9920..d7e2b902ef5c97 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -356,19 +356,9 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
 	(XFS_IS_REALTIME_INODE(ip) ? \
 		(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
 
-static inline bool
-xfs_inode_can_hw_atomic_write(
-	struct xfs_inode	*ip)
+static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
-
-	if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
-		return false;
-	if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
-		return false;
-
-	return true;
+	return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0;
 }
 
 /*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 292891d6ff69ac..770dc4ca79e4c4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1714,13 +1714,45 @@ xfs_free_buftarg(
 	kfree(btp);
 }
 
+/*
+ * Configure this buffer target for hardware-assisted atomic writes if the
+ * underlying block device supports is congruent with the filesystem geometry.
+ */
+static inline void
+xfs_configure_buftarg_atomic_writes(
+	struct xfs_buftarg	*btp)
+{
+	struct xfs_mount	*mp = btp->bt_mount;
+	unsigned int		min_bytes, max_bytes;
+
+	min_bytes = bdev_atomic_write_unit_min_bytes(btp->bt_bdev);
+	max_bytes = bdev_atomic_write_unit_max_bytes(btp->bt_bdev);
+
+	/*
+	 * Ignore atomic write geometry that is nonsense or doesn't even cover
+	 * a single fsblock.
+	 */
+	if (min_bytes > max_bytes ||
+	    min_bytes > mp->m_sb.sb_blocksize ||
+	    max_bytes < mp->m_sb.sb_blocksize) {
+		min_bytes = 0;
+		max_bytes = 0;
+	}
+
+	btp->bt_bdev_awu_min = min_bytes;
+	btp->bt_bdev_awu_max = max_bytes;
+}
+
+/* Configure a buffer target that abstracts a block device. */
 int
-xfs_setsize_buftarg(
+xfs_configure_buftarg(
 	struct xfs_buftarg	*btp,
 	unsigned int		sectorsize)
 {
 	int			error;
 
+	ASSERT(btp->bt_bdev != NULL);
+
 	/* Set up metadata sector size info */
 	btp->bt_meta_sectorsize = sectorsize;
 	btp->bt_meta_sectormask = sectorsize - 1;
@@ -1733,6 +1765,9 @@ xfs_setsize_buftarg(
 		return -EINVAL;
 	}
 
+	if (bdev_can_atomic_write(btp->bt_bdev))
+		xfs_configure_buftarg_atomic_writes(btp);
+
 	return 0;
 }
 
@@ -1795,13 +1830,6 @@ xfs_alloc_buftarg(
 	btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
 					    mp, ops);
 
-	if (bdev_can_atomic_write(btp->bt_bdev)) {
-		btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
-						btp->bt_bdev);
-		btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
-						btp->bt_bdev);
-	}
-
 	/*
 	 * When allocating the buftargs we have not yet read the super block and
 	 * thus don't know the file system sector size yet.
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 83de3ac39ae53b..ed23e6ffe644b6 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -557,7 +557,7 @@ xfs_setup_devices(
 {
 	int			error;
 
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
+	error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 	if (error)
 		return error;
 
@@ -566,7 +566,7 @@ xfs_setup_devices(
 
 		if (xfs_has_sector(mp))
 			log_sector_size = mp->m_sb.sb_logsectsize;
-		error = xfs_setsize_buftarg(mp->m_logdev_targp,
+		error = xfs_configure_buftarg(mp->m_logdev_targp,
 					    log_sector_size);
 		if (error)
 			return error;
@@ -580,7 +580,7 @@ xfs_setup_devices(
 		}
 		mp->m_rtdev_targp = mp->m_ddev_targp;
 	} else if (mp->m_rtname) {
-		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+		error = xfs_configure_buftarg(mp->m_rtdev_targp,
 					    mp->m_sb.sb_sectsize);
 		if (error)
 			return error;

  reply	other threads:[~2025-05-02 20:13 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-01 16:57 [PATCH v10 00/15] large atomic writes for xfs John Garry
2025-05-01 16:57 ` [PATCH v10 01/15] fs: add atomic write unit max opt to statx John Garry
2025-05-02 20:12   ` [PATCH v10.1 1.1/15] xfs: only call xfs_setsize_buftarg once per buffer target Darrick J. Wong
2025-05-01 16:57 ` [PATCH v10 02/15] xfs: add helpers to compute log item overhead John Garry
2025-05-01 16:57 ` [PATCH v10 03/15] xfs: add helpers to compute transaction reservation for finishing intent items John Garry
2025-05-01 16:57 ` [PATCH v10 04/15] xfs: rename xfs_inode_can_atomicwrite() -> xfs_inode_can_hw_atomic_write() John Garry
2025-05-01 16:57 ` [PATCH v10 05/15] xfs: ignore HW which cannot atomic write a single block John Garry
2025-05-02 20:13   ` Darrick J. Wong [this message]
2025-05-01 16:57 ` [PATCH v10 06/15] xfs: allow block allocator to take an alignment hint John Garry
2025-05-01 16:57 ` [PATCH v10 07/15] xfs: refactor xfs_reflink_end_cow_extent() John Garry
2025-05-01 16:57 ` [PATCH v10 08/15] xfs: refine atomic write size check in xfs_file_write_iter() John Garry
2025-05-01 16:57 ` [PATCH v10 09/15] xfs: add xfs_atomic_write_cow_iomap_begin() John Garry
2025-05-01 16:57 ` [PATCH v10 10/15] xfs: add large atomic writes checks in xfs_direct_write_iomap_begin() John Garry
2025-05-01 16:57 ` [PATCH v10 11/15] xfs: commit CoW-based atomic writes atomically John Garry
2025-05-01 16:57 ` [PATCH v10 12/15] xfs: add xfs_file_dio_write_atomic() John Garry
2025-05-01 16:57 ` [PATCH v10 13/15] xfs: add xfs_calc_atomic_write_unit_max() John Garry
2025-05-05  5:22   ` Darrick J. Wong
2025-05-01 16:57 ` [PATCH v10 14/15] xfs: update atomic write limits John Garry
2025-05-01 16:57 ` [PATCH v10 15/15] xfs: allow sysadmins to specify a maximum atomic write limit at mount time John Garry
2025-05-01 19:52 ` [PATCH 16/15] xfs: only call xfs_setsize_buftarg once per buffer target Darrick J. Wong
2025-05-02  6:57   ` Christoph Hellwig
2025-05-02 16:44     ` Darrick J. Wong
2025-05-01 19:53 ` [PATCH 17/15] xfs: move buftarg atomic write geometry config to setsize_buftarg Darrick J. Wong
2025-05-02  6:58   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250502201320.GV25675@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=catherine.hoang@oracle.com \
    --cc=cem@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=john.g.garry@oracle.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ojaswin@linux.ibm.com \
    --cc=ritesh.list@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).