From: John Garry <john.g.garry@oracle.com>
To: brauner@kernel.org, djwong@kernel.org, cem@kernel.org
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org, ojaswin@linux.ibm.com,
ritesh.list@gmail.com, martin.petersen@oracle.com, tytso@mit.edu,
linux-ext4@vger.kernel.org, John Garry <john.g.garry@oracle.com>
Subject: [PATCH v3 08/12] xfs: Iomap SW-based atomic write support
Date: Thu, 27 Feb 2025 18:08:09 +0000 [thread overview]
Message-ID: <20250227180813.1553404-9-john.g.garry@oracle.com> (raw)
In-Reply-To: <20250227180813.1553404-1-john.g.garry@oracle.com>
In cases of an atomic write occurs for misaligned or discontiguous disk
blocks, we will use a CoW-based method to issue the atomic write.
So, for that case, return -EAGAIN to request that the write be issued in
CoW atomic write mode. The dio write path should detect this, similar to
how misaligned regular DIO writes are handled.
For normal HW-based mode, when the range which we are atomic writing to
covers a shared data extent, try to allocate a new CoW fork. However, if
we find that what we allocated does not meet atomic write requirements
in terms of length and alignment, then fallback on the CoW-based mode
for the atomic write.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
fs/xfs/xfs_iomap.h | 1 +
2 files changed, 142 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index edfc038bf728..573108cbdc5c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -795,6 +795,23 @@ imap_spans_range(
return true;
}
+static bool
+xfs_bmap_valid_for_atomic_write(
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ /* Misaligned start block wrt size */
+ if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
+ return false;
+
+ /* Discontiguous or mixed extents */
+ if (!imap_spans_range(imap, offset_fsb, end_fsb))
+ return false;
+
+ return true;
+}
+
static int
xfs_direct_write_iomap_begin(
struct inode *inode,
@@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin(
struct xfs_bmbt_irec imap, cmap;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+ xfs_fileoff_t orig_end_fsb = end_fsb;
+ bool atomic_hw = flags & IOMAP_ATOMIC_HW;
int nimaps = 1, error = 0;
unsigned int reflink_flags = 0;
bool shared = false;
u16 iomap_flags = 0;
+ bool needs_alloc;
unsigned int lockmode;
u64 seq;
@@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin(
&lockmode, reflink_flags);
if (error)
goto out_unlock;
- if (shared)
+ if (shared) {
+ if (atomic_hw &&
+ !xfs_bmap_valid_for_atomic_write(&cmap,
+ offset_fsb, end_fsb)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
goto out_found_cow;
+ }
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
}
- if (imap_needs_alloc(inode, flags, &imap, nimaps))
+ needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
+
+ if (atomic_hw) {
+ error = -EAGAIN;
+ /*
+ * Use CoW method for when we need to alloc > 1 block,
+ * otherwise we might allocate less than what we need here and
+ * have multiple mappings.
+ */
+ if (needs_alloc && orig_end_fsb - offset_fsb > 1)
+ goto out_unlock;
+
+ if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb,
+ orig_end_fsb))
+ goto out_unlock;
+ }
+
+ if (needs_alloc)
goto allocate_blocks;
/*
@@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
.iomap_begin = xfs_direct_write_iomap_begin,
};
+static int
+xfs_atomic_write_sw_iomap_begin(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length,
+ unsigned flags,
+ struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap, cmap;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+ int nimaps = 1, error;
+ unsigned int reflink_flags;
+ bool shared = false;
+ u16 iomap_flags = 0;
+ unsigned int lockmode = XFS_ILOCK_EXCL;
+ u64 seq;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW;
+
+ /*
+ * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin()
+ */
+ if (offset + length > i_size_read(inode))
+ iomap_flags |= IOMAP_F_DIRTY;
+
+ error = xfs_ilock_for_iomap(ip, flags, &lockmode);
+ if (error)
+ return error;
+
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, 0);
+ if (error)
+ goto out_unlock;
+
+ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
+ &lockmode, reflink_flags);
+ /*
+ * Don't check @shared. For atomic writes, we should error when
+ * we don't get a COW mapping
+ */
+ if (error)
+ goto out_unlock;
+
+ end_fsb = imap.br_startoff + imap.br_blockcount;
+
+ length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
+ trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
+ if (imap.br_startblock != HOLESTARTBLOCK) {
+ seq = xfs_iomap_inode_sequence(ip, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
+ if (error)
+ goto out_unlock;
+ }
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+ xfs_iunlock(ip, lockmode);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
+
+out_unlock:
+ if (lockmode)
+ xfs_iunlock(ip, lockmode);
+ return error;
+}
+
+static int
+xfs_atomic_write_iomap_begin(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length,
+ unsigned flags,
+ struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ ASSERT(flags & IOMAP_WRITE);
+ ASSERT(flags & IOMAP_DIRECT);
+
+ if (flags & IOMAP_ATOMIC_SW)
+ return xfs_atomic_write_sw_iomap_begin(inode, offset, length,
+ flags, iomap, srcmap);
+
+ ASSERT(flags & IOMAP_ATOMIC_HW);
+ return xfs_direct_write_iomap_begin(inode, offset, length, flags,
+ iomap, srcmap);
+}
+
+const struct iomap_ops xfs_atomic_write_iomap_ops = {
+ .iomap_begin = xfs_atomic_write_iomap_begin,
+};
+
static int
xfs_dax_write_iomap_end(
struct inode *inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 8347268af727..b7fbbc909943 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops;
extern const struct iomap_ops xfs_dax_write_iomap_ops;
+extern const struct iomap_ops xfs_atomic_write_iomap_ops;
#endif /* __XFS_IOMAP_H__*/
--
2.31.1
next prev parent reply other threads:[~2025-02-27 18:08 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-27 18:08 [PATCH v3 00/12] large atomic writes for xfs with CoW John Garry
2025-02-27 18:08 ` [PATCH v3 01/12] xfs: Pass flags to xfs_reflink_allocate_cow() John Garry
2025-02-28 1:19 ` Darrick J. Wong
2025-02-27 18:08 ` [PATCH v3 02/12] iomap: Rename IOMAP_ATOMIC -> IOMAP_ATOMIC_HW John Garry
2025-02-27 18:08 ` [PATCH v3 03/12] xfs: Switch atomic write size check in xfs_file_write_iter() John Garry
2025-02-27 18:08 ` [PATCH v3 04/12] xfs: Refactor xfs_reflink_end_cow_extent() John Garry
2025-02-27 18:08 ` [PATCH v3 05/12] iomap: Support SW-based atomic writes John Garry
2025-02-28 1:08 ` Darrick J. Wong
2025-02-27 18:08 ` [PATCH v3 06/12] iomap: Lift blocksize restriction on " John Garry
2025-02-27 18:08 ` [PATCH v3 07/12] xfs: Reflink CoW-based atomic write support John Garry
2025-02-27 18:08 ` John Garry [this message]
2025-02-28 1:19 ` [PATCH v3 08/12] xfs: Iomap SW-based " Darrick J. Wong
2025-03-01 19:44 ` kernel test robot
2025-02-27 18:08 ` [PATCH v3 09/12] xfs: Add xfs_file_dio_write_atomic() John Garry
2025-02-28 1:19 ` Darrick J. Wong
2025-02-28 7:45 ` John Garry
2025-02-28 15:39 ` Darrick J. Wong
2025-03-03 13:45 ` John Garry
2025-02-27 18:08 ` [PATCH v3 10/12] xfs: Commit CoW-based atomic writes atomically John Garry
2025-02-28 1:13 ` Darrick J. Wong
2025-02-27 18:08 ` [PATCH v3 11/12] xfs: Update atomic write max size John Garry
2025-02-27 18:08 ` [PATCH v3 12/12] xfs: Allow block allocator to take an alignment hint John Garry
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250227180813.1553404-9-john.g.garry@oracle.com \
--to=john.g.garry@oracle.com \
--cc=brauner@kernel.org \
--cc=cem@kernel.org \
--cc=djwong@kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=ojaswin@linux.ibm.com \
--cc=ritesh.list@gmail.com \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox