From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Christian Brauner <brauner@kernel.org>,
Carlos Maiolino <cem@kernel.org>,
linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 07/10] iomap: optionally use ioends for direct I/O
Date: Thu, 19 Dec 2024 10:25:03 -0800 [thread overview]
Message-ID: <20241219182503.GG6156@frogsfrogsfrogs> (raw)
In-Reply-To: <20241219173954.22546-8-hch@lst.de>
On Thu, Dec 19, 2024 at 05:39:12PM +0000, Christoph Hellwig wrote:
> struct iomap_ioend currently tracks outstanding buffered writes and has
> some really nice code in core iomap and XFS to merge contiguous I/Os
> an defer them to userspace for completion in a very efficient way.
>
> For zoned writes we'll also need a per-bio user context completion to
> record the written blocks, and the infrastructure for that would look
> basically like the ioend handling for buffered I/O.
>
> So instead of reinventing the wheel, reuse the existing infrastructure.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/iomap/direct-io.c | 49 +++++++++++++++++++++++++++++++++++++++++--
> fs/iomap/internal.h | 1 +
> fs/iomap/ioend.c | 2 ++
> include/linux/iomap.h | 4 +++-
> 4 files changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index ed658eb09a1a..dd521f4edf55 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -1,7 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0
> /*
> * Copyright (C) 2010 Red Hat, Inc.
> - * Copyright (c) 2016-2021 Christoph Hellwig.
> + * Copyright (c) 2016-2024 Christoph Hellwig.
> */
> #include <linux/module.h>
> #include <linux/compiler.h>
> @@ -12,6 +12,7 @@
> #include <linux/backing-dev.h>
> #include <linux/uio.h>
> #include <linux/task_io_accounting_ops.h>
> +#include "internal.h"
> #include "trace.h"
>
> #include "../internal.h"
> @@ -20,6 +21,7 @@
> * Private flags for iomap_dio, must not overlap with the public ones in
> * iomap.h:
> */
> +#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
> #define IOMAP_DIO_CALLER_COMP (1U << 26)
> #define IOMAP_DIO_INLINE_COMP (1U << 27)
> #define IOMAP_DIO_WRITE_THROUGH (1U << 28)
> @@ -119,7 +121,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
> * ->end_io() when necessary, otherwise a racing buffer read would cache
> * zeros from unwritten extents.
> */
> - if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
> + if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) &&
> + !(dio->flags & IOMAP_DIO_NO_INVALIDATE))
> kiocb_invalidate_post_direct_write(iocb, dio->size);
>
> inode_dio_end(file_inode(iocb->ki_filp));
> @@ -221,6 +224,7 @@ static void iomap_dio_done(struct iomap_dio *dio)
> }
> }
>
> +
> void iomap_dio_bio_end_io(struct bio *bio)
> {
> struct iomap_dio *dio = bio->bi_private;
> @@ -241,6 +245,47 @@ void iomap_dio_bio_end_io(struct bio *bio)
> }
> EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
>
> +u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend)
> +{
> + struct iomap_dio *dio = ioend->io_bio.bi_private;
> + bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
> + u32 vec_count = ioend->io_bio.bi_vcnt;
> +
> + if (ioend->io_error)
> + iomap_dio_set_error(dio, ioend->io_error);
> +
> + if (atomic_dec_and_test(&dio->ref)) {
> + /*
> + * Try to avoid another context switch for the completion given
> + * that we are already called from the ioend completion
> + * workqueue, but never invalidate pages from this thread to
> + * avoid deadlocks with buffered I/O completions. Tough luck if
> + * yoy hit the tiny race with someone dirtying the range now
you
> + * betweem this check and the actual completion.
between
With those fixed,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> + */
> + if (!dio->iocb->ki_filp->f_mapping->nrpages) {
> + dio->flags |= IOMAP_DIO_INLINE_COMP;
> + dio->flags |= IOMAP_DIO_NO_INVALIDATE;
> + }
> + dio->flags &= ~IOMAP_DIO_CALLER_COMP;
> + iomap_dio_done(dio);
> + }
> +
> + if (should_dirty) {
> + bio_check_pages_dirty(&ioend->io_bio);
> + } else {
> + bio_release_pages(&ioend->io_bio, false);
> + bio_put(&ioend->io_bio);
> + }
> +
> + /*
> + * Return the number of bvecs completed as even direct I/O completions
> + * do significant per-folio work and we'll still want to give up the
> + * CPU after a lot of completions.
> + */
> + return vec_count;
> +}
> +
> static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
> loff_t pos, unsigned len)
> {
> diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h
> index 36d5c56e073e..f6992a3bf66a 100644
> --- a/fs/iomap/internal.h
> +++ b/fs/iomap/internal.h
> @@ -5,5 +5,6 @@
> #define IOEND_BATCH_SIZE 4096
>
> u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend);
> +u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
>
> #endif /* _IOMAP_INTERNAL_H */
> diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
> index b4f6dd9e319a..158fa685d81f 100644
> --- a/fs/iomap/ioend.c
> +++ b/fs/iomap/ioend.c
> @@ -41,6 +41,8 @@ static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
>
> if (!atomic_dec_and_test(&ioend->io_remaining))
> return 0;
> + if (ioend->io_flags & IOMAP_IOEND_DIRECT)
> + return iomap_finish_ioend_direct(ioend);
> return iomap_finish_ioend_buffered(ioend);
> }
>
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 0d221fbe0eb3..1ef4c44fa36f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -343,13 +343,15 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
> #define IOMAP_IOEND_UNWRITTEN (1U << 1)
> /* don't merge into previous ioend */
> #define IOMAP_IOEND_BOUNDARY (1U << 2)
> +/* is direct I/O */
> +#define IOMAP_IOEND_DIRECT (1U << 3)
>
> /*
> * Flags that if set on either ioend prevent the merge of two ioends.
> * (IOMAP_IOEND_BOUNDARY also prevents merged, but only one-way)
> */
> #define IOMAP_IOEND_NOMERGE_FLAGS \
> - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN)
> + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
>
> /*
> * Structure for writeback I/O completions.
> --
> 2.45.2
>
>
next prev parent reply other threads:[~2024-12-19 18:25 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-19 17:39 iomap patches for zoned XFS v1 Christoph Hellwig
2024-12-19 17:39 ` [PATCH 01/10] iomap: allow the file system to submit the writeback bios Christoph Hellwig
2024-12-19 17:54 ` Darrick J. Wong
2024-12-19 17:39 ` [PATCH 02/10] iomap: simplify io_flags and io_type in struct iomap_ioend Christoph Hellwig
2024-12-19 17:56 ` Darrick J. Wong
2024-12-19 17:39 ` [PATCH 03/10] iomap: add a IOMAP_F_ANON_WRITE flag Christoph Hellwig
2024-12-19 18:02 ` Darrick J. Wong
2024-12-19 18:24 ` Christoph Hellwig
2024-12-19 17:39 ` [PATCH 04/10] iomap: split bios to zone append limits in the submission handlers Christoph Hellwig
2024-12-19 18:17 ` Darrick J. Wong
2024-12-19 18:19 ` Christoph Hellwig
2024-12-19 17:39 ` [PATCH 05/10] iomap: move common ioend code to ioend.c Christoph Hellwig
2024-12-19 18:20 ` Darrick J. Wong
2024-12-19 17:39 ` [PATCH 06/10] iomap: factor out a iomap_dio_done helper Christoph Hellwig
2024-12-19 18:22 ` Darrick J. Wong
2024-12-19 17:39 ` [PATCH 07/10] iomap: optionally use ioends for direct I/O Christoph Hellwig
2024-12-19 18:25 ` Darrick J. Wong [this message]
2024-12-19 17:39 ` [PATCH 08/10] iomap: pass private data to iomap_page_mkwrite Christoph Hellwig
2024-12-19 17:39 ` [PATCH 09/10] iomap: pass private data to iomap_zero_range Christoph Hellwig
2024-12-19 17:39 ` [PATCH 10/10] iomap: pass private data to iomap_truncate_page Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241219182503.GG6156@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=brauner@kernel.org \
--cc=cem@kernel.org \
--cc=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox