From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Kanchan Joshi <joshi.k@samsung.com>,
"Martin K . Petersen" <martin.petersen@oracle.com>,
Johannes Thumshirn <Johannes.Thumshirn@wdc.com>,
Qu Wenruo <wqu@suse.com>, Goldwyn Rodrigues <rgoldwyn@suse.com>,
linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH 6/7] xfs: support T10 protection information
Date: Mon, 3 Feb 2025 14:21:29 -0800 [thread overview]
Message-ID: <20250203222129.GC134507@frogsfrogsfrogs> (raw)
In-Reply-To: <20250203094322.1809766-7-hch@lst.de>
On Mon, Feb 03, 2025 at 10:43:10AM +0100, Christoph Hellwig wrote:
> Add support for generating / verifying protection information in the
> file system. This is done by hooking into the bio submission in
> iomap and then using the generic PI helpers. Compared to just using
> the block layer auto PI this extends the protection envelope and also
> prepares for eventually passing through PI from userspace at least
> for direct I/O.
>
> Right now this is still pretty hacky, e.g. the single PI buffer can
> get pretty gigantic and has no mempool backing it. The deferring of
> I/O completions is done unconditionally instead only when needed,
> and we assume the device can actually handle these huge segments.
> The latter should be fixed by doing proper splitting based on metadata
> limits in the block layer, but the rest needs to be addressed here.
Seems reasonable to me modulo the XXX parts. :)
--D
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/Makefile | 1 +
> fs/xfs/xfs_aops.c | 29 +++++++++++++++--
> fs/xfs/xfs_aops.h | 1 +
> fs/xfs/xfs_data_csum.c | 73 ++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_data_csum.h | 7 ++++
> fs/xfs/xfs_file.c | 27 +++++++++++++++-
> fs/xfs/xfs_inode.h | 6 ++--
> 7 files changed, 136 insertions(+), 8 deletions(-)
> create mode 100644 fs/xfs/xfs_data_csum.c
> create mode 100644 fs/xfs/xfs_data_csum.h
>
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 7afa51e41427..aa8749d640e7 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -73,6 +73,7 @@ xfs-y += xfs_aops.o \
> xfs_bmap_util.o \
> xfs_bio_io.o \
> xfs_buf.o \
> + xfs_data_csum.o \
> xfs_dahash_test.o \
> xfs_dir2_readdir.o \
> xfs_discard.o \
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 3e42a684cce1..291f5d4dbce6 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -19,6 +19,7 @@
> #include "xfs_reflink.h"
> #include "xfs_errortag.h"
> #include "xfs_error.h"
> +#include "xfs_data_csum.h"
>
> struct xfs_writepage_ctx {
> struct iomap_writepage_ctx ctx;
> @@ -122,6 +123,11 @@ xfs_end_ioend(
> goto done;
> }
>
> + if (bio_op(&ioend->io_bio) == REQ_OP_READ) {
> + error = xfs_data_csum_verify(ioend);
> + goto done;
> + }
> +
> /*
> * Success: commit the COW or unwritten blocks if needed.
> */
> @@ -175,7 +181,7 @@ xfs_end_io(
> }
> }
>
> -STATIC void
> +void
> xfs_end_bio(
> struct bio *bio)
> {
> @@ -417,6 +423,8 @@ xfs_submit_ioend(
>
> memalloc_nofs_restore(nofs_flag);
>
> + xfs_data_csum_generate(&ioend->io_bio);
> +
> /* send ioends that might require a transaction to the completion wq */
> if (xfs_ioend_is_append(ioend) ||
> (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
> @@ -517,19 +525,34 @@ xfs_vm_bmap(
> return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
> }
>
> +static void xfs_buffered_read_submit_io(struct inode *inode,
> + struct bio *bio, loff_t file_offset)
> +{
> + xfs_data_csum_alloc(bio);
> + iomap_init_ioend(inode, bio, file_offset, 0);
> + bio->bi_end_io = xfs_end_bio;
> + submit_bio(bio);
> +}
> +
> +static const struct iomap_read_folio_ops xfs_iomap_read_ops = {
> + .bio_set = &iomap_ioend_bioset,
> + .submit_io = xfs_buffered_read_submit_io,
> +};
> +
> STATIC int
> xfs_vm_read_folio(
> struct file *unused,
> struct folio *folio)
> {
> - return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
> + return iomap_read_folio(folio, &xfs_read_iomap_ops,
> + &xfs_iomap_read_ops);
> }
>
> STATIC void
> xfs_vm_readahead(
> struct readahead_control *rac)
> {
> - iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
> + iomap_readahead(rac, &xfs_read_iomap_ops, &xfs_iomap_read_ops);
> }
>
> static int
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index e0bd68419764..efed1ab59dbf 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -10,5 +10,6 @@ extern const struct address_space_operations xfs_address_space_operations;
> extern const struct address_space_operations xfs_dax_aops;
>
> int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +void xfs_end_bio(struct bio *bio);
>
> #endif /* __XFS_AOPS_H__ */
> diff --git a/fs/xfs/xfs_data_csum.c b/fs/xfs/xfs_data_csum.c
> new file mode 100644
> index 000000000000..d9d3620654b1
> --- /dev/null
> +++ b/fs/xfs/xfs_data_csum.c
> @@ -0,0 +1,73 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2022-2025 Christoph Hellwig.
> + */
> +#include "xfs.h"
> +#include "xfs_format.h"
> +#include "xfs_shared.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_cksum.h"
> +#include "xfs_data_csum.h"
> +#include <linux/iomap.h>
> +#include <linux/blk-integrity.h>
> +#include <linux/bio-integrity.h>
> +
> +void *
> +xfs_data_csum_alloc(
> + struct bio *bio)
> +{
> + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
> + struct bio_integrity_payload *bip;
> + unsigned int buf_size;
> + void *buf;
> +
> + if (!bi)
> + return NULL;
> +
> + buf_size = bio_integrity_bytes(bi, bio_sectors(bio));
> + /* XXX: this needs proper mempools */
> + /* XXX: needs (partial) zeroing if tuple_size > csum_size */
> + buf = kmalloc(buf_size, GFP_NOFS | __GFP_NOFAIL);
> + bip = bio_integrity_alloc(bio, GFP_NOFS | __GFP_NOFAIL, 1);
> + if (!bio_integrity_add_page(bio, virt_to_page(buf), buf_size,
> + offset_in_page(buf)))
> + WARN_ON_ONCE(1);
> +
> + if (bi->csum_type) {
> + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
> + bip->bip_flags |= BIP_IP_CHECKSUM;
> + bip->bip_flags |= BIP_CHECK_GUARD;
> + }
> + if (bi->flags & BLK_INTEGRITY_REF_TAG)
> + bip->bip_flags |= BIP_CHECK_REFTAG;
> + bip_set_seed(bip, bio->bi_iter.bi_sector);
> +
> + return buf;
> +}
> +
> +void
> +xfs_data_csum_generate(
> + struct bio *bio)
> +{
> + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
> +
> + if (!bi || !bi->csum_type)
> + return;
> +
> + xfs_data_csum_alloc(bio);
> + blk_integrity_generate(bio);
> +}
> +
> +int
> +xfs_data_csum_verify(
> + struct iomap_ioend *ioend)
> +{
> + struct bio *bio = &ioend->io_bio;
> + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
> +
> + if (!bi || !bi->csum_type)
> + return 0;
> + return blk_integrity_verify_all(bio, ioend->io_sector);
> +}
> diff --git a/fs/xfs/xfs_data_csum.h b/fs/xfs/xfs_data_csum.h
> new file mode 100644
> index 000000000000..f32215e8f46c
> --- /dev/null
> +++ b/fs/xfs/xfs_data_csum.h
> @@ -0,0 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +struct iomap_ioend;
> +
> +void *xfs_data_csum_alloc(struct bio *bio);
> +void xfs_data_csum_generate(struct bio *bio);
> +int xfs_data_csum_verify(struct iomap_ioend *ioend);
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index f7a7d89c345e..0d64c54017f0 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -25,6 +25,8 @@
> #include "xfs_iomap.h"
> #include "xfs_reflink.h"
> #include "xfs_file.h"
> +#include "xfs_data_csum.h"
> +#include "xfs_aops.h"
>
> #include <linux/dax.h>
> #include <linux/falloc.h>
> @@ -227,6 +229,20 @@ xfs_ilock_iocb_for_write(
> return 0;
> }
>
> +static void xfs_dio_read_submit_io(const struct iomap_iter *iter,
> + struct bio *bio, loff_t file_offset)
> +{
> + xfs_data_csum_alloc(bio);
> + iomap_init_ioend(iter->inode, bio, file_offset, IOMAP_IOEND_DIRECT);
> + bio->bi_end_io = xfs_end_bio;
> + submit_bio(bio);
> +}
> +
> +static const struct iomap_dio_ops xfs_dio_read_ops = {
> + .bio_set = &iomap_ioend_bioset,
> + .submit_io = xfs_dio_read_submit_io,
> +};
> +
> STATIC ssize_t
> xfs_file_dio_read(
> struct kiocb *iocb,
> @@ -245,7 +261,8 @@ xfs_file_dio_read(
> ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
> if (ret)
> return ret;
> - ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
> + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, &xfs_dio_read_ops, 0,
> + NULL, 0);
> xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>
> return ret;
> @@ -578,8 +595,16 @@ xfs_dio_write_end_io(
> return error;
> }
>
> +static void xfs_dio_write_submit_io(const struct iomap_iter *iter,
> + struct bio *bio, loff_t file_offset)
> +{
> + xfs_data_csum_generate(bio);
> + submit_bio(bio);
> +}
> +
> static const struct iomap_dio_ops xfs_dio_write_ops = {
> .end_io = xfs_dio_write_end_io,
> + .submit_io = xfs_dio_write_submit_io,
> };
>
> /*
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index c08093a65352..ff346bbe454c 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -609,10 +609,8 @@ int xfs_break_layouts(struct inode *inode, uint *iolock,
>
> static inline void xfs_update_stable_writes(struct xfs_inode *ip)
> {
> - if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev))
> - mapping_set_stable_writes(VFS_I(ip)->i_mapping);
> - else
> - mapping_clear_stable_writes(VFS_I(ip)->i_mapping);
> + /* XXX: unconditional for now */
> + mapping_set_stable_writes(VFS_I(ip)->i_mapping);
> }
>
> /*
> --
> 2.45.2
>
>
next prev parent reply other threads:[~2025-02-03 22:21 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-03 9:43 PI and data checksumming for XFS Christoph Hellwig
2025-02-03 9:43 ` [PATCH 1/7] block: support integrity generation and verification from file systems Christoph Hellwig
2025-02-03 19:47 ` Martin K. Petersen
2025-04-21 2:30 ` Anuj gupta
2025-02-03 9:43 ` [PATCH 2/7] iomap: introduce iomap_read_folio_ops Christoph Hellwig
2025-02-03 9:43 ` [PATCH 3/7] iomap: add bioset in iomap_read_folio_ops for filesystems to use own bioset Christoph Hellwig
2025-02-03 22:23 ` Darrick J. Wong
2025-02-04 4:58 ` Christoph Hellwig
2025-03-13 13:53 ` Matthew Wilcox
2025-03-14 16:53 ` Darrick J. Wong
2025-03-17 5:52 ` Christoph Hellwig
2025-02-03 9:43 ` [PATCH 4/7] iomap: support ioends for reads Christoph Hellwig
2025-02-03 22:24 ` Darrick J. Wong
2025-02-03 9:43 ` [PATCH 5/7] iomap: limit buffered I/O size to 128M Christoph Hellwig
2025-02-03 22:22 ` Darrick J. Wong
2025-02-03 9:43 ` [PATCH 6/7] xfs: support T10 protection information Christoph Hellwig
2025-02-03 22:21 ` Darrick J. Wong [this message]
2025-02-03 9:43 ` [PATCH 7/7] xfs: implement block-metadata based data checksums Christoph Hellwig
2025-02-03 22:20 ` Darrick J. Wong
2025-02-04 5:00 ` Christoph Hellwig
2025-02-04 18:36 ` Darrick J. Wong
2025-02-06 6:05 ` Christoph Hellwig
2025-02-03 19:51 ` PI and data checksumming for XFS Martin K. Petersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250203222129.GC134507@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=Johannes.Thumshirn@wdc.com \
--cc=hch@lst.de \
--cc=joshi.k@samsung.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=rgoldwyn@suse.com \
--cc=wqu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).