public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: John Garry <john.g.garry@oracle.com>
Cc: brauner@kernel.org, cem@kernel.org, dchinner@redhat.com,
	hch@lst.de, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	ojaswin@linux.ibm.com, ritesh.list@gmail.com,
	martin.petersen@oracle.com
Subject: Re: [PATCH RFC 03/10] iomap: Support CoW-based atomic writes
Date: Wed, 5 Feb 2025 12:11:07 -0800	[thread overview]
Message-ID: <20250205201107.GA21808@frogsfrogsfrogs> (raw)
In-Reply-To: <20250204120127.2396727-4-john.g.garry@oracle.com>

On Tue, Feb 04, 2025 at 12:01:20PM +0000, John Garry wrote:
> Currently atomic write support requires dedicated HW support. This imposes
> a restriction on the filesystem that disk blocks need to be aligned and
> contiguously mapped to FS blocks to issue atomic writes.
> 
> XFS has no method to guarantee FS block alignment. As such, atomic writes
> are currently limited to 1x FS block there.
> 
> To allow deal with the scenario that we are issuing an atomic write over
> misaligned or discontiguous data blocks larger atomic writes - and raise
> the atomic write limit - support a CoW-based software emulated atomic
> write mode.
> 
> For this special mode, the FS will reserve blocks for that data to be
> written and then atomically map that data in once the data has been
> commited to disk.
> 
> It is the responsibility of the FS to detect discontiguous atomic writes
> and switch to IOMAP_DIO_ATOMIC_COW mode and retry the write.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/iomap/direct-io.c  | 23 ++++++++++++++++-------
>  include/linux/iomap.h |  9 +++++++++
>  2 files changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 3dd883dd77d2..e63b5096bcd8 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
>   * clearing the WRITE_THROUGH flag in the dio request.
>   */
>  static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
> -		const struct iomap *iomap, bool use_fua, bool atomic)
> +		const struct iomap *iomap, bool use_fua, bool atomic_bio)
>  {
>  	blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
>  
> @@ -283,7 +283,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
>  		opflags |= REQ_FUA;
>  	else
>  		dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
> -	if (atomic)
> +	if (atomic_bio)
>  		opflags |= REQ_ATOMIC;
>  
>  	return opflags;
> @@ -301,13 +301,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  	blk_opf_t bio_opf;
>  	struct bio *bio;
>  	bool need_zeroout = false;
> +	bool atomic_bio = false;
>  	bool use_fua = false;
>  	int nr_pages, ret = 0;
>  	size_t copied = 0;
>  	size_t orig_count;
>  
> -	if (atomic && length != iter->len)
> -		return -EINVAL;
> +	if (atomic) {
> +		if (!(iomap->flags & IOMAP_F_ATOMIC_COW)) {
> +			if (length != iter->len)
> +				return -EINVAL;
> +			atomic_bio = true;
> +		}
> +	}
>  
>  	if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
>  	    !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
> @@ -318,7 +324,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  		need_zeroout = true;
>  	}
>  
> -	if (iomap->flags & IOMAP_F_SHARED)
> +	if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_ATOMIC_COW))
>  		dio->flags |= IOMAP_DIO_COW;
>  
>  	if (iomap->flags & IOMAP_F_NEW) {
> @@ -383,7 +389,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  			goto out;
>  	}
>  
> -	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
> +	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_bio);
>  
>  	nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
>  	do {
> @@ -416,7 +422,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  		}
>  
>  		n = bio->bi_iter.bi_size;
> -		if (WARN_ON_ONCE(atomic && n != length)) {
> +		if (WARN_ON_ONCE(atomic_bio && n != length)) {
>  			/*
>  			 * This bio should have covered the complete length,
>  			 * which it doesn't, so error. We may need to zero out
> @@ -639,6 +645,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
>  			dio->flags |= IOMAP_DIO_CALLER_COMP;
>  
> +		if (dio_flags & IOMAP_DIO_ATOMIC_COW)
> +			iomi.flags |= IOMAP_ATOMIC_COW;
> +
>  		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
>  			ret = -EAGAIN;
>  			if (iomi.pos >= dio->i_size ||
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 75bf54e76f3b..0a0b6798f517 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -56,6 +56,8 @@ struct vm_fault;
>   *
>   * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
>   * never be merged with the mapping before it.
> + *
> + * IOMAP_F_ATOMIC_COW indicates that we require atomic CoW end IO handling.

It more indicates that the filesystem is using copy on write to handle
an untorn write, and will provide the ioend support necessary to commit
the remapping atomically, right?

>   */
>  #define IOMAP_F_NEW		(1U << 0)
>  #define IOMAP_F_DIRTY		(1U << 1)
> @@ -68,6 +70,7 @@ struct vm_fault;
>  #endif /* CONFIG_BUFFER_HEAD */
>  #define IOMAP_F_XATTR		(1U << 5)
>  #define IOMAP_F_BOUNDARY	(1U << 6)
> +#define IOMAP_F_ATOMIC_COW	(1U << 7)
>  
>  /*
>   * Flags set by the core iomap code during operations:
> @@ -183,6 +186,7 @@ struct iomap_folio_ops {
>  #define IOMAP_DAX		0
>  #endif /* CONFIG_FS_DAX */
>  #define IOMAP_ATOMIC		(1 << 9)
> +#define IOMAP_ATOMIC_COW	(1 << 10)

What does IOMAP_ATOMIC_COW do?  There's no description for it (or for
IOMAP_ATOMIC).  Can you have IOMAP_ATOMIC and IOMAP_ATOMIC_COW both set?
Or are they mutually exclusive?

I'm guessing from the code that ATOMIC_COW requires ATOMIC to be set,
but I wonder why because there's no documentation update in the header
files or in Documentation/filesystems/iomap/.

--D

>  struct iomap_ops {
>  	/*
> @@ -434,6 +438,11 @@ struct iomap_dio_ops {
>   */
>  #define IOMAP_DIO_PARTIAL		(1 << 2)
>  
> +/*
> + * Use CoW-based software emulated atomic write.
> + */
> +#define IOMAP_DIO_ATOMIC_COW		(1 << 3)
> +
>  ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
>  		unsigned int dio_flags, void *private, size_t done_before);
> -- 
> 2.31.1
> 
> 

  reply	other threads:[~2025-02-05 20:11 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-04 12:01 [PATCH RFC 00/10] large atomic writes for xfs with CoW John Garry
2025-02-04 12:01 ` [PATCH RFC 01/10] xfs: Switch atomic write size check in xfs_file_write_iter() John Garry
2025-02-04 12:01 ` [PATCH RFC 02/10] xfs: Refactor xfs_reflink_end_cow_extent() John Garry
2025-02-05 19:50   ` Darrick J. Wong
2025-02-06 10:35     ` John Garry
2025-02-06 21:38       ` Darrick J. Wong
2025-02-04 12:01 ` [PATCH RFC 03/10] iomap: Support CoW-based atomic writes John Garry
2025-02-05 20:11   ` Darrick J. Wong [this message]
2025-02-06 11:21     ` John Garry
2025-02-06 21:40       ` Darrick J. Wong
2025-02-04 12:01 ` [PATCH RFC 04/10] xfs: Make xfs_find_trim_cow_extent() public John Garry
2025-02-04 12:01 ` [PATCH RFC 05/10] xfs: Reflink CoW-based atomic write support John Garry
2025-02-04 12:01 ` [PATCH RFC 06/10] xfs: iomap " John Garry
2025-02-05 20:05   ` Darrick J. Wong
2025-02-06 11:10     ` John Garry
2025-02-06 21:44       ` Darrick J. Wong
2025-02-07 11:48         ` John Garry
2025-02-04 12:01 ` [PATCH RFC 07/10] xfs: Add xfs_file_dio_write_atomic() John Garry
2025-02-05 19:55   ` Darrick J. Wong
2025-02-06 10:43     ` John Garry
2025-02-10 16:59   ` John Garry
2025-02-04 12:01 ` [PATCH RFC 08/10] xfs: Commit CoW-based atomic writes atomically John Garry
2025-02-05 19:47   ` Darrick J. Wong
2025-02-06 10:27     ` John Garry
2025-02-06 21:50       ` Darrick J. Wong
2025-02-07 11:52         ` John Garry
2025-02-04 12:01 ` [PATCH RFC 09/10] xfs: Update atomic write max size John Garry
2025-02-05 19:41   ` Darrick J. Wong
2025-02-06  9:15     ` John Garry
2025-02-06 21:54       ` Darrick J. Wong
2025-02-07 11:53         ` John Garry
2025-02-04 12:01 ` [PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint John Garry
2025-02-05 19:20   ` Darrick J. Wong
2025-02-06  8:10     ` John Garry
2025-02-06 21:54       ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250205201107.GA21808@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=cem@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=hch@lst.de \
    --cc=john.g.garry@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ojaswin@linux.ibm.com \
    --cc=ritesh.list@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox