From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Carlos Maiolino <cem@kernel.org>,
Hans Holmberg <hans.holmberg@wdc.com>,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH 28/43] xfs: implement direct writes to zoned RT devices
Date: Fri, 13 Dec 2024 14:39:37 -0800 [thread overview]
Message-ID: <20241213223937.GR6678@frogsfrogsfrogs> (raw)
In-Reply-To: <20241211085636.1380516-29-hch@lst.de>
On Wed, Dec 11, 2024 at 09:54:53AM +0100, Christoph Hellwig wrote:
> Direct writes to zoned RT devices are extremely simple. After taking the
> block reservation before acquiring the iolock, the iomap direct I/O calls
> into ->iomap_begin which will return a "fake" iomap for the entire
> requested range. The actual block allocation is then done from the
> submit_io handler using code shared with the buffered I/O path.
>
> The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
> the embedded ioend, which allows reusing the existing ioend based buffered
> I/O completion path.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Yeah that is a lot simpler. :)
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fs/xfs/xfs_aops.c | 6 ++--
> fs/xfs/xfs_aops.h | 3 +-
> fs/xfs/xfs_file.c | 80 +++++++++++++++++++++++++++++++++++++++++-----
> fs/xfs/xfs_iomap.c | 54 +++++++++++++++++++++++++++++++
> fs/xfs/xfs_iomap.h | 1 +
> 5 files changed, 133 insertions(+), 11 deletions(-)
>
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 67392413216b..a3ca14e811fd 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -137,7 +137,9 @@ xfs_end_ioend(
> else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
> error = xfs_iomap_write_unwritten(ip, offset, size, false);
>
> - if (!error && xfs_ioend_is_append(ioend))
> + if (!error &&
> + !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
> + xfs_ioend_is_append(ioend))
> error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
> done:
> iomap_finish_ioends(ioend, error);
> @@ -182,7 +184,7 @@ xfs_end_io(
> }
> }
>
> -static void
> +void
> xfs_end_bio(
> struct bio *bio)
> {
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index e0bd68419764..5a7a0f1a0b49 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -9,6 +9,7 @@
> extern const struct address_space_operations xfs_address_space_operations;
> extern const struct address_space_operations xfs_dax_aops;
>
> -int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +void xfs_end_bio(struct bio *bio);
>
> #endif /* __XFS_AOPS_H__ */
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 195cf60a81b0..1b39000b7c62 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -25,6 +25,7 @@
> #include "xfs_iomap.h"
> #include "xfs_reflink.h"
> #include "xfs_file.h"
> +#include "xfs_aops.h"
> #include "xfs_zone_alloc.h"
>
> #include <linux/dax.h>
> @@ -548,6 +549,9 @@ xfs_dio_write_end_io(
> loff_t offset = iocb->ki_pos;
> unsigned int nofs_flag;
>
> + ASSERT(!xfs_is_zoned_inode(ip) ||
> + !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
> +
> trace_xfs_end_io_direct_write(ip, offset, size);
>
> if (xfs_is_shutdown(ip->i_mount))
> @@ -627,14 +631,51 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
> .end_io = xfs_dio_write_end_io,
> };
>
> +static void
> +xfs_dio_zoned_submit_io(
> + const struct iomap_iter *iter,
> + struct bio *bio,
> + loff_t file_offset)
> +{
> + struct xfs_mount *mp = XFS_I(iter->inode)->i_mount;
> + struct xfs_zone_alloc_ctx *ac = iter->private;
> + xfs_filblks_t count_fsb;
> + struct iomap_ioend *ioend;
> +
> + count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
> + if (count_fsb > ac->reserved_blocks) {
> + xfs_err(mp,
> +"allocation (%lld) larger than reservation (%lld).",
> + count_fsb, ac->reserved_blocks);
> + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> + bio_io_error(bio);
> + return;
> + }
> + ac->reserved_blocks -= count_fsb;
> +
> + bio->bi_end_io = xfs_end_bio;
> + ioend = iomap_init_ioend(iter->inode, bio, file_offset,
> + IOMAP_IOEND_DIRECT);
> + xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
> +}
> +
> +static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
> + .bio_set = &iomap_ioend_bioset,
> + .submit_io = xfs_dio_zoned_submit_io,
> + .end_io = xfs_dio_write_end_io,
> +};
> +
> /*
> - * Handle block aligned direct I/O writes
> + * Handle block aligned direct I/O writes.
> */
> static noinline ssize_t
> xfs_file_dio_write_aligned(
> struct xfs_inode *ip,
> struct kiocb *iocb,
> - struct iov_iter *from)
> + struct iov_iter *from,
> + const struct iomap_ops *ops,
> + const struct iomap_dio_ops *dops,
> + struct xfs_zone_alloc_ctx *ac)
> {
> unsigned int iolock = XFS_IOLOCK_SHARED;
> ssize_t ret;
> @@ -642,7 +683,7 @@ xfs_file_dio_write_aligned(
> ret = xfs_ilock_iocb_for_write(iocb, &iolock);
> if (ret)
> return ret;
> - ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
> + ret = xfs_file_write_checks(iocb, from, &iolock, ac);
> if (ret)
> goto out_unlock;
>
> @@ -656,11 +697,31 @@ xfs_file_dio_write_aligned(
> iolock = XFS_IOLOCK_SHARED;
> }
> trace_xfs_file_direct_write(iocb, from);
> - ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
> - &xfs_dio_write_ops, 0, NULL, 0);
> + ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
> out_unlock:
> - if (iolock)
> - xfs_iunlock(ip, iolock);
> + xfs_iunlock(ip, iolock);
> + return ret;
> +}
> +
> +/*
> + * Handle block aligned direct I/O writes to zoned devices.
> + */
> +static noinline ssize_t
> +xfs_file_dio_write_zoned(
> + struct xfs_inode *ip,
> + struct kiocb *iocb,
> + struct iov_iter *from)
> +{
> + struct xfs_zone_alloc_ctx ac = { };
> + ssize_t ret;
> +
> + ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
> + if (ret < 0)
> + return ret;
> + ret = xfs_file_dio_write_aligned(ip, iocb, from,
> + &xfs_zoned_direct_write_iomap_ops,
> + &xfs_dio_zoned_write_ops, &ac);
> + xfs_zoned_space_unreserve(ip, &ac);
> return ret;
> }
>
> @@ -777,7 +838,10 @@ xfs_file_dio_write(
> (xfs_is_always_cow_inode(ip) &&
> (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
> return xfs_file_dio_write_unaligned(ip, iocb, from);
> - return xfs_file_dio_write_aligned(ip, iocb, from);
> + if (xfs_is_zoned_inode(ip))
> + return xfs_file_dio_write_zoned(ip, iocb, from);
> + return xfs_file_dio_write_aligned(ip, iocb, from,
> + &xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
> }
>
> static noinline ssize_t
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 402b253ce3a2..9626632883d0 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -965,6 +965,60 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
> .iomap_begin = xfs_direct_write_iomap_begin,
> };
>
> +#ifdef CONFIG_XFS_RT
> +/*
> + * This is really simple. The space has already been reserved before taking the
> + * IOLOCK, the actual block allocation is done just before submitting the bio
> + * and only recorded in the extent map on I/O completion.
> + */
> +static int
> +xfs_zoned_direct_write_iomap_begin(
> + struct inode *inode,
> + loff_t offset,
> + loff_t length,
> + unsigned flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + struct xfs_inode *ip = XFS_I(inode);
> + int error;
> +
> + ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));
> +
> + /*
> + * Needs to be pushed down into the allocator so that only writes into
> + * a single zone can be supported.
> + */
> + if (flags & IOMAP_NOWAIT)
> + return -EAGAIN;
> +
> + /*
> + * Ensure the extent list is in memory in so that we don't have to do
> + * read it from the I/O completion handler.
> + */
> + if (xfs_need_iread_extents(&ip->i_df)) {
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + if (error)
> + return error;
> + }
> +
> + iomap->type = IOMAP_MAPPED;
> + iomap->flags = IOMAP_F_DIRTY;
> + iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
> + iomap->offset = offset;
> + iomap->length = length;
> + iomap->flags = IOMAP_F_ZONE_APPEND;
> + iomap->addr = 0;
> + return 0;
> +}
> +
> +const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
> + .iomap_begin = xfs_zoned_direct_write_iomap_begin,
> +};
> +#endif /* CONFIG_XFS_RT */
> +
> static int
> xfs_dax_write_iomap_end(
> struct inode *inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index bc8a00cad854..d330c4a581b1 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -51,6 +51,7 @@ xfs_aligned_fsb_count(
>
> extern const struct iomap_ops xfs_buffered_write_iomap_ops;
> extern const struct iomap_ops xfs_direct_write_iomap_ops;
> +extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
> extern const struct iomap_ops xfs_read_iomap_ops;
> extern const struct iomap_ops xfs_seek_iomap_ops;
> extern const struct iomap_ops xfs_xattr_iomap_ops;
> --
> 2.45.2
>
>
next prev parent reply other threads:[~2024-12-13 22:39 UTC|newest]
Thread overview: 143+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-11 8:54 RFC: support for zoned devices Christoph Hellwig
2024-12-11 8:54 ` [PATCH 01/43] xfs: constify feature checks Christoph Hellwig
2024-12-12 20:44 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 02/43] xfs: factor out a xfs_rt_check_size helper Christoph Hellwig
2024-12-12 21:11 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 03/43] xfs: add a rtg_blocks helper Christoph Hellwig
2024-12-12 21:12 ` Darrick J. Wong
2024-12-13 5:00 ` Christoph Hellwig
2024-12-15 18:10 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 04/43] xfs: move xfs_bmapi_reserve_delalloc to xfs_iomap.c Christoph Hellwig
2024-12-12 21:18 ` Darrick J. Wong
2024-12-13 5:04 ` Christoph Hellwig
2024-12-15 18:13 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 05/43] xfs: don't take m_sb_lock in xfs_fs_statfs Christoph Hellwig
2024-12-12 21:42 ` Darrick J. Wong
2024-12-13 5:06 ` Christoph Hellwig
2024-12-15 18:16 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 06/43] xfs: refactor xfs_fs_statfs Christoph Hellwig
2024-12-12 21:24 ` Darrick J. Wong
2024-12-13 5:08 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 07/43] xfs: cleanup xfs_vn_getattr Christoph Hellwig
2024-12-12 21:24 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 08/43] xfs: report the correct dio alignment for COW inodes Christoph Hellwig
2024-12-12 21:29 ` Darrick J. Wong
2024-12-13 5:09 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 09/43] xfs: generalize the freespace and reserved blocks handling Christoph Hellwig
2024-12-12 21:37 ` Darrick J. Wong
2024-12-13 5:11 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 10/43] xfs: preserve RT reservations across remounts Christoph Hellwig
2024-12-12 21:38 ` Darrick J. Wong
2024-12-13 9:15 ` Hans Holmberg
2024-12-15 18:42 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 11/43] xfs: skip always_cow inodes in xfs_reflink_trim_around_shared Christoph Hellwig
2024-12-12 21:38 ` Darrick J. Wong
2024-12-13 5:12 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 12/43] xfs: refine the unaligned check for always COW inodes in xfs_file_dio_write Christoph Hellwig
2024-12-12 21:44 ` Darrick J. Wong
2024-12-13 5:14 ` Christoph Hellwig
2024-12-13 23:14 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 13/43] xfs: support XFS_BMAPI_REMAP in xfs_bmap_del_extent_delay Christoph Hellwig
2024-12-12 21:47 ` [PATCH 13/43] xfs: support XFS_BMAPI_REMAP in xfs_bmap_del_extent_delayOM Darrick J. Wong
2024-12-13 5:14 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 14/43] xfs: add a xfs_rtrmap_first_unwritten_rgbno helper Christoph Hellwig
2024-12-12 21:48 ` Darrick J. Wong
2024-12-13 5:16 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 15/43] xfs: define the zoned on-disk format Christoph Hellwig
2024-12-12 22:02 ` Darrick J. Wong
2024-12-13 5:22 ` Christoph Hellwig
2024-12-13 17:09 ` Darrick J. Wong
2024-12-15 5:20 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 16/43] xfs: allow internal RT devices for zoned mode Christoph Hellwig
2024-12-12 22:06 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 17/43] xfs: don't allow growfs of the data device with internal RT device Christoph Hellwig
2024-12-12 22:07 ` Darrick J. Wong
2024-12-13 5:22 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 18/43] xfs: export zoned geometry via XFS_FSOP_GEOM Christoph Hellwig
2024-12-12 22:09 ` Darrick J. Wong
2024-12-13 5:23 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 19/43] xfs: disable sb_frextents for zoned file systems Christoph Hellwig
2024-12-12 22:26 ` Darrick J. Wong
2024-12-13 5:29 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 20/43] xfs: disable FITRIM for zoned RT devices Christoph Hellwig
2024-12-12 22:13 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 21/43] xfs: don't call xfs_can_free_eofblocks from ->release for zoned inodes Christoph Hellwig
2024-12-12 22:15 ` Darrick J. Wong
2024-12-13 5:28 ` Christoph Hellwig
2024-12-13 17:13 ` Darrick J. Wong
2024-12-13 17:18 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 22/43] xfs: skip zoned RT inodes in xfs_inodegc_want_queue_rt_file Christoph Hellwig
2024-12-12 22:15 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 23/43] xfs: parse and validate hardware zone information Christoph Hellwig
2024-12-13 17:31 ` Darrick J. Wong
2024-12-15 5:24 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 24/43] xfs: add the zoned space allocator Christoph Hellwig
2024-12-13 18:33 ` Darrick J. Wong
2024-12-15 5:27 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 25/43] xfs: add support for zoned space reservations Christoph Hellwig
2024-12-13 21:01 ` Darrick J. Wong
2024-12-15 5:31 ` Christoph Hellwig
2024-12-17 16:59 ` Darrick J. Wong
2024-12-19 5:50 ` Christoph Hellwig
2024-12-19 16:00 ` Darrick J. Wong
2024-12-19 17:36 ` Christoph Hellwig
2024-12-19 17:37 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 26/43] xfs: implement zoned garbage collection Christoph Hellwig
2024-12-13 22:18 ` Darrick J. Wong
2024-12-15 5:57 ` Christoph Hellwig
2024-12-17 1:27 ` Darrick J. Wong
2024-12-17 4:06 ` Christoph Hellwig
2024-12-17 17:42 ` Darrick J. Wong
2024-12-18 7:13 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 27/43] xfs: implement buffered writes to zoned RT devices Christoph Hellwig
2024-12-13 22:37 ` Darrick J. Wong
2024-12-15 6:12 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 28/43] xfs: implement direct " Christoph Hellwig
2024-12-13 22:39 ` Darrick J. Wong [this message]
2024-12-11 8:54 ` [PATCH 29/43] xfs: wire up zoned block freeing in xfs_rtextent_free_finish_item Christoph Hellwig
2024-12-13 22:40 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 30/43] xfs: hide reserved RT blocks from statfs Christoph Hellwig
2024-12-13 22:43 ` Darrick J. Wong
2024-12-15 6:03 ` Christoph Hellwig
2024-12-11 8:54 ` [PATCH 31/43] xfs: support growfs on zoned file systems Christoph Hellwig
2024-12-13 22:45 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 32/43] xfs: allow COW forks on zoned file systems in xchk_bmap Christoph Hellwig
2024-12-13 22:47 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 33/43] xfs: support xchk_xref_is_used_rt_space on zoned file systems Christoph Hellwig
2024-12-13 22:49 ` Darrick J. Wong
2024-12-15 6:13 ` Christoph Hellwig
2024-12-17 17:02 ` Darrick J. Wong
2024-12-11 8:54 ` [PATCH 34/43] xfs: support xrep_require_rtext_inuse " Christoph Hellwig
2024-12-13 22:49 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 35/43] xfs: enable fsmap reporting for internal RT devices Christoph Hellwig
2024-12-13 23:11 ` Darrick J. Wong
2024-12-15 6:26 ` Christoph Hellwig
2024-12-17 17:06 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 36/43] xfs: disable reflink for zoned file systems Christoph Hellwig
2024-12-13 23:12 ` Darrick J. Wong
2024-12-15 6:26 ` Christoph Hellwig
2024-12-17 17:10 ` Darrick J. Wong
2024-12-18 7:09 ` Christoph Hellwig
2024-12-18 18:16 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 37/43] xfs: disable rt quotas " Christoph Hellwig
2024-12-13 23:05 ` Darrick J. Wong
2024-12-15 6:21 ` Christoph Hellwig
2024-12-11 8:55 ` [PATCH 38/43] xfs: enable the zoned RT device feature Christoph Hellwig
2024-12-13 22:52 ` Darrick J. Wong
2024-12-15 6:15 ` Christoph Hellwig
2024-12-11 8:55 ` [PATCH 39/43] xfs: support zone gaps Christoph Hellwig
2024-12-13 22:55 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 40/43] xfs: add a max_open_zones mount option Christoph Hellwig
2024-12-13 22:57 ` Darrick J. Wong
2024-12-15 6:16 ` Christoph Hellwig
2024-12-17 17:12 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 41/43] xfs: support write life time based data placement Christoph Hellwig
2024-12-13 23:00 ` Darrick J. Wong
2024-12-15 6:19 ` Christoph Hellwig
2024-12-17 17:14 ` Darrick J. Wong
2024-12-18 7:10 ` Christoph Hellwig
2024-12-18 18:19 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 42/43] xfs: wire up the show_stats super operation Christoph Hellwig
2024-12-13 23:01 ` Darrick J. Wong
2024-12-11 8:55 ` [PATCH 43/43] xfs: export zone stats in /proc/*/mountstats Christoph Hellwig
2024-12-13 23:04 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241213223937.GR6678@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=cem@kernel.org \
--cc=hans.holmberg@wdc.com \
--cc=hch@lst.de \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox