linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Andrey Albershteyn <aalbersh@redhat.com>
Cc: fsverity@lists.linux.dev, linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org, david@fromorbit.com,
	ebiggers@kernel.org, hch@lst.de,
	Andrey Albershteyn <aalbersh@kernel.org>
Subject: Re: [PATCH RFC 13/29] iomap: integrate fs-verity verification into iomap's read path
Date: Tue, 29 Jul 2025 16:21:52 -0700	[thread overview]
Message-ID: <20250729232152.GP2672049@frogsfrogsfrogs> (raw)
In-Reply-To: <20250728-fsverity-v1-13-9e5443af0e34@kernel.org>

On Mon, Jul 28, 2025 at 10:30:17PM +0200, Andrey Albershteyn wrote:
> From: Andrey Albershteyn <aalbersh@redhat.com>
> 
> This patch adds fs-verity verification into iomap's read path. After
> BIO's io operation is complete the data are verified against
> fs-verity's Merkle tree. Verification work is done in a separate
> workqueue.
> 
> The read path ioend iomap_read_ioend are stored side by side with
> BIOs if FS_VERITY is enabled.
> 
> [djwong: fix doc warning]
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Andrey Albershteyn <aalbersh@kernel.org>
> ---
>  fs/iomap/buffered-io.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/iomap/ioend.c       |  41 +++++++++++++-
>  include/linux/iomap.h  |  13 +++++
>  3 files changed, 198 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index e959a206cba9..87c974e543e0 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -6,6 +6,7 @@
>  #include <linux/module.h>
>  #include <linux/compiler.h>
>  #include <linux/fs.h>
> +#include <linux/fsverity.h>
>  #include <linux/iomap.h>
>  #include <linux/pagemap.h>
>  #include <linux/uio.h>
> @@ -363,6 +364,116 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
>  		pos >= i_size_read(iter->inode);
>  }
>  
> +#ifdef CONFIG_FS_VERITY
> +int iomap_init_fsverity(struct super_block *sb, unsigned int wq_flags,
> +			int max_active)
> +{
> +	int ret;
> +
> +	if (!iomap_fsverity_bioset) {
> +		ret = iomap_fsverity_init_bioset();
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return fsverity_init_wq(sb, wq_flags, max_active);
> +}
> +EXPORT_SYMBOL_GPL(iomap_init_fsverity);
> +
> +static void
> +iomap_read_fsverify_end_io_work(struct work_struct *work)
> +{
> +	struct iomap_fsverity_bio *fbio =
> +		container_of(work, struct iomap_fsverity_bio, work);
> +
> +	fsverity_verify_bio(&fbio->bio);
> +	iomap_read_end_io(&fbio->bio);
> +}
> +
> +static void
> +iomap_read_fsverity_end_io(struct bio *bio)
> +{
> +	struct iomap_fsverity_bio *fbio =
> +		container_of(bio, struct iomap_fsverity_bio, bio);
> +
> +	INIT_WORK(&fbio->work, iomap_read_fsverify_end_io_work);
> +	queue_work(bio->bi_private, &fbio->work);
> +}
> +
> +static struct bio *
> +iomap_fsverity_read_bio_alloc(struct inode *inode, struct block_device *bdev,
> +			    int nr_vecs, gfp_t gfp)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_alloc_bioset(bdev, nr_vecs, REQ_OP_READ, gfp,
> +			iomap_fsverity_bioset);
> +	if (bio) {
> +		bio->bi_private = inode->i_sb->s_verity_wq;
> +		bio->bi_end_io = iomap_read_fsverity_end_io;
> +	}
> +	return bio;
> +}
> +
> +/*
> + * True if tree is not aligned with fs block/folio size and we need zero tail
> + * part of the folio
> + */
> +static bool
> +iomap_fsverity_tree_end_align(struct iomap_iter *iter, struct folio *folio,
> +		loff_t pos, size_t plen)
> +{
> +	int error;
> +	u8 log_blocksize;
> +	u64 tree_size, tree_mask, last_block_tree, last_block_pos;
> +
> +	/* Not a Merkle tree */
> +	if (!(iter->iomap.flags & IOMAP_F_BEYOND_EOF))
> +		return false;
> +
> +	if (plen == folio_size(folio))
> +		return false;
> +
> +	if (iter->inode->i_blkbits == folio_shift(folio))
> +		return false;
> +
> +	error = fsverity_merkle_tree_geometry(iter->inode, &log_blocksize, NULL,
> +			&tree_size);
> +	if (error)
> +		return false;
> +
> +	/*
> +	 * We are beyond EOF reading Merkle tree. Therefore, it has highest
> +	 * offset. Mask pos with a tree size to get a position whare are we in
> +	 * the tree. Then, compare index of a last tree block and the index of
> +	 * current pos block.
> +	 */
> +	last_block_tree = (tree_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +	tree_mask = (1 << fls64(tree_size)) - 1;
> +	last_block_pos = ((pos & tree_mask) >> PAGE_SHIFT) + 1;
> +
> +	return last_block_tree == last_block_pos;
> +}
> +#else
> +# define iomap_fsverity_read_bio_alloc(...)	(NULL)
> +# define iomap_fsverity_tree_end_align(...)	(false)
> +#endif /* CONFIG_FS_VERITY */
> +
> +static struct bio *iomap_read_bio_alloc(struct inode *inode,
> +		const struct iomap *iomap, int nr_vecs, gfp_t gfp)
> +{
> +	struct bio *bio;
> +	struct block_device *bdev = iomap->bdev;
> +
> +	if (fsverity_active(inode) && !(iomap->flags & IOMAP_F_BEYOND_EOF))
> +		return iomap_fsverity_read_bio_alloc(inode, bdev, nr_vecs, gfp);
> +
> +	bio = bio_alloc(bdev, nr_vecs, REQ_OP_READ, gfp);
> +	if (bio)
> +		bio->bi_end_io = iomap_read_end_io;
> +	return bio;
> +}
> +
>  static int iomap_readpage_iter(struct iomap_iter *iter,
>  		struct iomap_readpage_ctx *ctx)
>  {
> @@ -375,6 +486,10 @@ static int iomap_readpage_iter(struct iomap_iter *iter,
>  	sector_t sector;
>  	int ret;
>  
> +	/* Fail reads from broken fsverity files immediately. */
> +	if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> +		return -EIO;
> +
>  	if (iomap->type == IOMAP_INLINE) {
>  		ret = iomap_read_inline_data(iter, folio);
>  		if (ret)
> @@ -391,6 +506,11 @@ static int iomap_readpage_iter(struct iomap_iter *iter,
>  	if (iomap_block_needs_zeroing(iter, pos) &&
>  	    !(iomap->flags & IOMAP_F_BEYOND_EOF)) {
>  		folio_zero_range(folio, poff, plen);
> +		if (fsverity_active(iter->inode) &&
> +		    !fsverity_verify_blocks(folio, plen, poff)) {
> +			return -EIO;
> +		}
> +
>  		iomap_set_range_uptodate(folio, poff, plen);
>  		goto done;
>  	}
> @@ -408,32 +528,51 @@ static int iomap_readpage_iter(struct iomap_iter *iter,
>  	    !bio_add_folio(ctx->bio, folio, plen, poff)) {
>  		gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
>  		gfp_t orig_gfp = gfp;
> -		unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
>  
>  		if (ctx->bio)
>  			submit_bio(ctx->bio);
>  
>  		if (ctx->rac) /* same as readahead_gfp_mask */
>  			gfp |= __GFP_NORETRY | __GFP_NOWARN;
> -		ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs),
> -				     REQ_OP_READ, gfp);
> +
> +		ctx->bio = iomap_read_bio_alloc(iter->inode, iomap,
> +				bio_max_segs(DIV_ROUND_UP(length, PAGE_SIZE)),
> +				gfp);
> +
>  		/*
>  		 * If the bio_alloc fails, try it again for a single page to
>  		 * avoid having to deal with partial page reads.  This emulates
>  		 * what do_mpage_read_folio does.
>  		 */
>  		if (!ctx->bio) {
> -			ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ,
> -					     orig_gfp);
> +			ctx->bio = iomap_read_bio_alloc(iter->inode,
> +					iomap, 1, orig_gfp);
>  		}
>  		if (ctx->rac)
>  			ctx->bio->bi_opf |= REQ_RAHEAD;
>  		ctx->bio->bi_iter.bi_sector = sector;
> -		ctx->bio->bi_end_io = iomap_read_end_io;
>  		bio_add_folio_nofail(ctx->bio, folio, plen, poff);
>  	}
>  
>  done:
> +	/*
> +	 * For post EOF region, zero part of the folio which won't be read. This
> +	 * happens at the end of the region. So far, the only user is
> +	 * fs-verity which stores continuous data region.

Is it ever the case that the zeroed region actually has merkle tree
content on disk?  Or if this region truly was never written by the
fsverity construction code, then why would it access the unwritten
region later?

Or am I misunderstanding something here?

(Probably...)

--D

> +	 * We also check the fs block size as plen could be smaller than the
> +	 * block size. If we zero part of the block and mark the whole block
> +	 * (iomap_set_range_uptodate() works with fsblocks) as uptodate the
> +	 * iomap_finish_folio_read() will toggle the uptodate bit when the folio
> +	 * is read.
> +	 */
> +	if (iomap_fsverity_tree_end_align(iter, folio, pos, plen)) {
> +		folio_zero_range(folio, poff + plen,
> +				folio_size(folio) - (poff + plen));
> +		iomap_set_range_uptodate(folio, poff + plen,
> +				folio_size(folio) - (poff + plen));
> +	}
> +
>  	/*
>  	 * Move the caller beyond our range so that it keeps making progress.
>  	 * For that, we have to include any leading non-uptodate ranges, but
> diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
> index 18894ebba6db..400751d82313 100644
> --- a/fs/iomap/ioend.c
> +++ b/fs/iomap/ioend.c
> @@ -6,6 +6,8 @@
>  #include <linux/list_sort.h>
>  #include "internal.h"
>  
> +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> +
>  struct bio_set iomap_ioend_bioset;
>  EXPORT_SYMBOL_GPL(iomap_ioend_bioset);
>  
> @@ -207,9 +209,46 @@ struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
>  }
>  EXPORT_SYMBOL_GPL(iomap_split_ioend);
>  
> +#ifdef CONFIG_FS_VERITY
> +struct bio_set *iomap_fsverity_bioset;
> +EXPORT_SYMBOL_GPL(iomap_fsverity_bioset);
> +int iomap_fsverity_init_bioset(void)
> +{
> +	struct bio_set *bs, *old;
> +	int error;
> +
> +	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
> +	if (!bs)
> +		return -ENOMEM;
> +
> +	error = bioset_init(bs, IOMAP_POOL_SIZE,
> +			    offsetof(struct iomap_fsverity_bio, bio),
> +			    BIOSET_NEED_BVECS);
> +	if (error) {
> +		kfree(bs);
> +		return error;
> +	}
> +
> +	/*
> +	 * This has to be atomic as readaheads can race to create the
> +	 * bioset.  If someone set the pointer before us, we drop ours.
> +	 */
> +	old = cmpxchg(&iomap_fsverity_bioset, NULL, bs);
> +	if (old) {
> +		bioset_exit(bs);
> +		kfree(bs);
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(iomap_fsverity_init_bioset);
> +#else
> +# define iomap_fsverity_init_bioset(...)	(-EOPNOTSUPP)
> +#endif
> +
>  static int __init iomap_ioend_init(void)
>  {
> -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> +	return bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
>  			   offsetof(struct iomap_ioend, io_bio),
>  			   BIOSET_NEED_BVECS);
>  }
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 73288f28543f..f845876ad8d2 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -339,6 +339,19 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
>  		iter->srcmap.type == IOMAP_MAPPED;
>  }
>  
> +#ifdef CONFIG_FS_VERITY
> +extern struct bio_set *iomap_fsverity_bioset;
> +
> +struct iomap_fsverity_bio {
> +	struct work_struct	work;
> +	struct bio		bio;
> +};
> +
> +int iomap_init_fsverity(struct super_block *sb, unsigned int wq_flags,
> +			int max_active);
> +int iomap_fsverity_init_bioset(void);
> +#endif
> +
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops, void *private);
>  int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
> 
> -- 
> 2.50.0
> 
> 

  reply	other threads:[~2025-07-29 23:21 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-28 20:30 [PATCH RFC 00/29] fs-verity support for XFS with post EOF merkle tree Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 01/29] iomap: add iomap_writepages_unbound() to write beyond EOF Andrey Albershteyn
2025-07-29 22:07   ` Darrick J. Wong
2025-07-31 15:04     ` Andrey Albershteyn
2025-07-31 18:43   ` Joanne Koong
2025-08-04 11:34     ` Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 02/29] iomap: introduce iomap_read/write_region interface Andrey Albershteyn
2025-07-29 22:22   ` Darrick J. Wong
2025-07-31 15:51     ` Andrey Albershteyn
2025-08-11 11:43     ` Christoph Hellwig
2025-07-28 20:30 ` [PATCH RFC 03/29] fs: add FS_XFLAG_VERITY for verity files Andrey Albershteyn
2025-07-29  9:53   ` Amir Goldstein
2025-07-29 10:35     ` Andrey Albershteyn
2025-07-29 12:06       ` Amir Goldstein
2025-08-12  7:51   ` Christoph Hellwig
2025-07-28 20:30 ` [PATCH RFC 04/29] fsverity: add per-sb workqueue for post read processing Andrey Albershteyn
2025-08-11 11:45   ` Christoph Hellwig
2025-08-11 17:51     ` Tejun Heo
2025-08-12  7:43       ` Christoph Hellwig
2025-08-12 19:52         ` Tejun Heo
2025-07-28 20:30 ` [PATCH RFC 05/29] fsverity: add tracepoints Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 06/29] fsverity: report validation errors back to the filesystem Andrey Albershteyn
2025-08-11 11:46   ` Christoph Hellwig
2025-08-11 15:31     ` Darrick J. Wong
2025-08-12  7:34       ` Christoph Hellwig
2025-08-12  7:56         ` Christoph Hellwig
2025-07-28 20:30 ` [PATCH RFC 07/29] fsverity: pass super_block to fsverity_enqueue_verify_work Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 08/29] ext4: use a per-superblock fsverity workqueue Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 09/29] f2fs: " Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 10/29] btrfs: " Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 11/29] fsverity: remove system-wide workqueue Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 12/29] fsverity: expose merkle tree geometry to callers Andrey Albershteyn
2025-08-11 11:48   ` Christoph Hellwig
2025-08-11 15:38     ` Darrick J. Wong
2025-08-11 19:06       ` Andrey Albershteyn
2025-08-12  7:42       ` Christoph Hellwig
2025-08-12 19:09         ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 13/29] iomap: integrate fs-verity verification into iomap's read path Andrey Albershteyn
2025-07-29 23:21   ` Darrick J. Wong [this message]
2025-07-31 11:34     ` Andrey Albershteyn
2025-07-31 14:52       ` Darrick J. Wong
2025-07-31 15:01         ` Andrey Albershteyn
2025-07-31 15:08           ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 14/29] xfs: add attribute type for fs-verity Andrey Albershteyn
2025-08-11 11:50   ` Christoph Hellwig
2025-08-11 19:00     ` Andrey Albershteyn
2025-08-12  7:44       ` Christoph Hellwig
2025-08-12 17:11         ` Andrey Albershteyn
2025-08-12 19:12           ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 15/29] xfs: add fs-verity ro-compat flag Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 16/29] xfs: add inode on-disk VERITY flag Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 17/29] xfs: initialize fs-verity on file open and cleanup on inode destruction Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 18/29] xfs: don't allow to enable DAX on fs-verity sealed inode Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 19/29] xfs: disable direct read path for fs-verity files Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 20/29] xfs: disable preallocations for fsverity Merkle tree writes Andrey Albershteyn
2025-07-29 22:27   ` Darrick J. Wong
2025-07-31 11:42     ` Andrey Albershteyn
2025-07-31 14:49       ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 21/29] xfs: add writeback and iomap reading of Merkel tree pages Andrey Albershteyn
2025-07-29 22:33   ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 22/29] xfs: add fs-verity support Andrey Albershteyn
2025-07-29 23:05   ` Darrick J. Wong
2025-07-31 14:50     ` Andrey Albershteyn
2025-07-31 15:07       ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 23/29] xfs: add fs-verity ioctls Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 24/29] xfs: advertise fs-verity being available on filesystem Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 25/29] xfs: check and repair the verity inode flag state Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 26/29] xfs: fix scrub trace with null pointer in quotacheck Andrey Albershteyn
2025-07-29 15:28   ` Darrick J. Wong
2025-07-31 14:54     ` Andrey Albershteyn
2025-07-31 16:03       ` Carlos Maiolino
2025-07-28 20:30 ` [PATCH RFC 27/29] xfs: report verity failures through the health system Andrey Albershteyn
2025-07-28 20:30 ` [PATCH RFC 28/29] xfs: add fsverity traces Andrey Albershteyn
2025-07-29 23:06   ` Darrick J. Wong
2025-07-28 20:30 ` [PATCH RFC 29/29] xfs: enable ro-compat fs-verity flag Andrey Albershteyn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250729232152.GP2672049@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=aalbersh@kernel.org \
    --cc=aalbersh@redhat.com \
    --cc=david@fromorbit.com \
    --cc=ebiggers@kernel.org \
    --cc=fsverity@lists.linux.dev \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).