linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: John Garry <john.g.garry@oracle.com>
Cc: hch@lst.de, viro@zeniv.linux.org.uk, brauner@kernel.org,
	dchinner@redhat.com, jack@suse.cz, chandan.babu@oracle.com,
	martin.petersen@oracle.com, linux-kernel@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	tytso@mit.edu, jbongio@google.com, ojaswin@linux.ibm.com
Subject: Re: [PATCH 1/6] fs: iomap: Atomic write support
Date: Fri, 2 Feb 2024 09:25:13 -0800	[thread overview]
Message-ID: <20240202172513.GZ6226@frogsfrogsfrogs> (raw)
In-Reply-To: <20240124142645.9334-2-john.g.garry@oracle.com>

On Wed, Jan 24, 2024 at 02:26:40PM +0000, John Garry wrote:
> Add flag IOMAP_ATOMIC_WRITE to indicate to the FS that an atomic write
> bio is being created and all the rules there need to be followed.
> 
> It is the task of the FS iomap iter callbacks to ensure that the mapping
> created adheres to those rules, like size is power-of-2, is at a
> naturally-aligned offset, etc. However, checking for a single iovec, i.e.
> iter type is ubuf, is done in __iomap_dio_rw().
> 
> A write should only produce a single bio, so error when it doesn't.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/iomap/direct-io.c  | 21 ++++++++++++++++++++-
>  fs/iomap/trace.h      |  3 ++-
>  include/linux/iomap.h |  1 +
>  3 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index bcd3f8cf5ea4..25736d01b857 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -275,10 +275,12 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
>  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  		struct iomap_dio *dio)
>  {
> +	bool atomic_write = iter->flags & IOMAP_ATOMIC;
>  	const struct iomap *iomap = &iter->iomap;
>  	struct inode *inode = iter->inode;
>  	unsigned int fs_block_size = i_blocksize(inode), pad;
>  	loff_t length = iomap_length(iter);
> +	const size_t iter_len = iter->len;
>  	loff_t pos = iter->pos;
>  	blk_opf_t bio_opf;
>  	struct bio *bio;
> @@ -381,6 +383,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  					  GFP_KERNEL);
>  		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
>  		bio->bi_ioprio = dio->iocb->ki_ioprio;
> +		if (atomic_write)
> +			bio->bi_opf |= REQ_ATOMIC;

This really ought to be in iomap_dio_bio_opflags.  Unless you can't pass
REQ_ATOMIC to bio_alloc*, in which case there ought to be a comment
about why.

Also, what's the meaning of REQ_OP_READ | REQ_ATOMIC?  Does that
actually work?  I don't know what that means, and "block: Add REQ_ATOMIC
flag" says that's not a valid combination.  I'll complain about this
more below.

> +
>  		bio->bi_private = dio;
>  		bio->bi_end_io = iomap_dio_bio_end_io;
>  
> @@ -397,6 +402,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  		}
>  
>  		n = bio->bi_iter.bi_size;
> +		if (atomic_write && n != iter_len) {

s/iter_len/orig_len/ ?

> +			/* This bio should have covered the complete length */
> +			ret = -EINVAL;
> +			bio_put(bio);
> +			goto out;
> +		}
>  		if (dio->flags & IOMAP_DIO_WRITE) {
>  			task_io_account_write(n);
>  		} else {
> @@ -554,12 +565,17 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	struct blk_plug plug;
>  	struct iomap_dio *dio;
>  	loff_t ret = 0;
> +	bool is_read = iov_iter_rw(iter) == READ;
> +	bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;

Hrmm.  So if the caller passes in an IOCB_ATOMIC iocb with a READ iter,
we'll silently drop IOCB_ATOMIC and do the read anyway?  That seems like
a nonsense combination, but is that ok for some reason?

>  	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);
>  
>  	if (!iomi.len)
>  		return NULL;
>  
> +	if (atomic_write && !iter_is_ubuf(iter))
> +		return ERR_PTR(-EINVAL);

Does !iter_is_ubuf actually happen?  Why don't we support any of the
other ITER_ types?  Is it because hardware doesn't want vectored
buffers?

I really wish there was more commenting on /why/ we do things here:

	if (iocb->ki_flags & IOCB_ATOMIC) {
		/* atomic reads do not make sense */
		if (iov_iter_rw(iter) == READ)
			return ERR_PTR(-EINVAL);

		/*
		 * block layer doesn't want to handle handle vectors of
		 * buffers when performing an atomic write i guess?
		 */
		if (!iter_is_ubuf(iter))
			return ERR_PTR(-EINVAL);

		iomi.flags |= IOMAP_ATOMIC;
	}

> +
>  	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
>  	if (!dio)
>  		return ERR_PTR(-ENOMEM);
> @@ -579,7 +595,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	if (iocb->ki_flags & IOCB_NOWAIT)
>  		iomi.flags |= IOMAP_NOWAIT;
>  
> -	if (iov_iter_rw(iter) == READ) {
> +	if (is_read) {
>  		/* reads can always complete inline */
>  		dio->flags |= IOMAP_DIO_INLINE_COMP;
>  
> @@ -605,6 +621,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
>  			dio->flags |= IOMAP_DIO_CALLER_COMP;
>  
> +		if (atomic_write)
> +			iomi.flags |= IOMAP_ATOMIC;
> +
>  		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
>  			ret = -EAGAIN;
>  			if (iomi.pos >= dio->i_size ||
> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
> index c16fd55f5595..c95576420bca 100644
> --- a/fs/iomap/trace.h
> +++ b/fs/iomap/trace.h
> @@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
>  	{ IOMAP_REPORT,		"REPORT" }, \
>  	{ IOMAP_FAULT,		"FAULT" }, \
>  	{ IOMAP_DIRECT,		"DIRECT" }, \
> -	{ IOMAP_NOWAIT,		"NOWAIT" }
> +	{ IOMAP_NOWAIT,		"NOWAIT" }, \
> +	{ IOMAP_ATOMIC,		"ATOMIC" }
>  
>  #define IOMAP_F_FLAGS_STRINGS \
>  	{ IOMAP_F_NEW,		"NEW" }, \
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 96dd0acbba44..9eac704a0d6f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -178,6 +178,7 @@ struct iomap_folio_ops {
>  #else
>  #define IOMAP_DAX		0
>  #endif /* CONFIG_FS_DAX */
> +#define IOMAP_ATOMIC		(1 << 9)
>  
>  struct iomap_ops {
>  	/*
> -- 
> 2.31.1
> 
> 

  reply	other threads:[~2024-02-02 17:25 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-24 14:26 [PATCH 0/6] block atomic writes for XFS John Garry
2024-01-24 14:26 ` [PATCH 1/6] fs: iomap: Atomic write support John Garry
2024-02-02 17:25   ` Darrick J. Wong [this message]
2024-02-05 11:29     ` John Garry
2024-02-13  6:55       ` Christoph Hellwig
2024-02-13  8:20         ` John Garry
2024-02-15 11:08           ` John Garry
2024-02-13 18:08       ` Darrick J. Wong
2024-02-05 15:20   ` Pankaj Raghav (Samsung)
2024-02-05 15:41     ` John Garry
2024-01-24 14:26 ` [PATCH 2/6] fs: Add FS_XFLAG_ATOMICWRITES flag John Garry
2024-02-02 17:57   ` Darrick J. Wong
2024-02-05 12:58     ` John Garry
2024-02-13  6:56       ` Christoph Hellwig
2024-02-13 17:08       ` Darrick J. Wong
2024-01-24 14:26 ` [PATCH 3/6] fs: xfs: Support FS_XFLAG_ATOMICWRITES for rtvol John Garry
2024-02-02 17:52   ` Darrick J. Wong
2024-02-03  7:40     ` Ojaswin Mujoo
2024-02-05 12:51     ` John Garry
2024-02-13 17:22       ` Darrick J. Wong
2024-02-14 12:19         ` John Garry
2024-01-24 14:26 ` [PATCH 4/6] fs: xfs: Support atomic write for statx John Garry
2024-02-02 18:05   ` Darrick J. Wong
2024-02-05 13:10     ` John Garry
2024-02-13 17:37       ` Darrick J. Wong
2024-02-14 12:26         ` John Garry
2024-02-09  7:00   ` Ojaswin Mujoo
2024-02-09 17:30     ` John Garry
2024-02-12 11:48       ` Ojaswin Mujoo
2024-02-12 12:05       ` Ojaswin Mujoo
2024-01-24 14:26 ` [PATCH RFC 5/6] fs: xfs: iomap atomic write support John Garry
2024-02-02 18:47   ` Darrick J. Wong
2024-02-05 13:36     ` John Garry
2024-02-06  1:15       ` Dave Chinner
2024-02-06  9:53         ` John Garry
2024-02-07  0:06           ` Dave Chinner
2024-02-07 14:13             ` John Garry
2024-02-09  1:40               ` Dave Chinner
2024-02-09 12:47                 ` John Garry
2024-02-13 23:41                   ` Dave Chinner
2024-02-14 11:06                     ` John Garry
2024-02-14 23:03                       ` Dave Chinner
2024-02-15  9:53                         ` John Garry
2024-02-13 17:50       ` Darrick J. Wong
2024-02-14 12:13         ` John Garry
2024-01-24 14:26 ` [PATCH 6/6] fs: xfs: Set FMODE_CAN_ATOMIC_WRITE for FS_XFLAG_ATOMICWRITES set John Garry
2024-02-02 18:06   ` Darrick J. Wong
2024-02-05 10:26     ` John Garry
2024-02-13 17:59       ` Darrick J. Wong
2024-02-14 12:36         ` John Garry
2024-02-21 17:00           ` Darrick J. Wong
2024-02-21 17:38             ` John Garry
2024-02-24  4:18               ` Darrick J. Wong
2024-02-09  7:14 ` [PATCH 0/6] block atomic writes for XFS Ojaswin Mujoo
2024-02-09  9:22   ` John Garry
2024-02-12 12:06     ` Ojaswin Mujoo
2024-02-13  7:22 ` Christoph Hellwig
2024-02-13 17:55   ` Darrick J. Wong
2024-02-14  7:45     ` Christoph Hellwig
2024-02-21 16:56       ` Darrick J. Wong
2024-02-23  6:57         ` Christoph Hellwig
2024-02-13 23:50   ` Dave Chinner
2024-02-14  7:38     ` Christoph Hellwig
2024-02-13  7:45 ` Ritesh Harjani
2024-02-13  8:41   ` John Garry
2024-02-13  9:10     ` Ritesh Harjani
2024-02-13 22:49     ` Dave Chinner
2024-02-14 10:10       ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240202172513.GZ6226@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=chandan.babu@oracle.com \
    --cc=dchinner@redhat.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jbongio@google.com \
    --cc=john.g.garry@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ojaswin@linux.ibm.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).