public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>,
	Christian Brauner <brauner@kernel.org>,
	Carlos Maiolino <cem@kernel.org>, Qu Wenruo <wqu@suse.com>,
	Al Viro <viro@zeniv.linux.org.uk>,
	linux-block@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 03/14] iov_iter: extract a iov_iter_extract_bvecs helper from bio code
Date: Thu, 22 Jan 2026 09:47:03 -0800	[thread overview]
Message-ID: <20260122174703.GX5945@frogsfrogsfrogs> (raw)
In-Reply-To: <20260119074425.4005867-4-hch@lst.de>

On Mon, Jan 19, 2026 at 08:44:10AM +0100, Christoph Hellwig wrote:
> Massage __bio_iov_iter_get_pages so that it doesn't need the bio, and
> move it to lib/iov_iter.c so that it can be used by block code for
> other things than filling a bio and by other subsystems like netfs.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/bio.c         | 120 +++++++-------------------------------------
>  include/linux/uio.h |   3 ++
>  lib/iov_iter.c      |  98 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 119 insertions(+), 102 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 46ff33f4de04..12cd3c5f6d6d 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1172,102 +1172,6 @@ void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter)
>  	bio_set_flag(bio, BIO_CLONED);
>  }
>  
> -static unsigned int get_contig_folio_len(struct page **pages,
> -					 unsigned int *num_pages, size_t left,
> -					 size_t offset)
> -{
> -	struct folio *folio = page_folio(pages[0]);
> -	size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, left);
> -	unsigned int max_pages, i;
> -	size_t folio_offset, len;
> -
> -	folio_offset = PAGE_SIZE * folio_page_idx(folio, pages[0]) + offset;
> -	len = min(folio_size(folio) - folio_offset, left);
> -
> -	/*
> -	 * We might COW a single page in the middle of a large folio, so we have
> -	 * to check that all pages belong to the same folio.
> -	 */
> -	left -= contig_sz;
> -	max_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
> -	for (i = 1; i < max_pages; i++) {
> -		size_t next = min_t(size_t, PAGE_SIZE, left);
> -
> -		if (page_folio(pages[i]) != folio ||
> -		    pages[i] != pages[i - 1] + 1)
> -			break;
> -		contig_sz += next;
> -		left -= next;
> -	}
> -
> -	*num_pages = i;
> -	return contig_sz;
> -}
> -
> -#define PAGE_PTRS_PER_BVEC     (sizeof(struct bio_vec) / sizeof(struct page *))
> -
> -/**
> - * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
> - * @bio: bio to add pages to
> - * @iter: iov iterator describing the region to be mapped
> - *
> - * Extracts pages from *iter and appends them to @bio's bvec array.  The pages
> - * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag.
> - * For a multi-segment *iter, this function only adds pages from the next
> - * non-empty segment of the iov iterator.
> - */
> -static ssize_t __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
> -{
> -	iov_iter_extraction_t extraction_flags = 0;
> -	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
> -	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
> -	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
> -	struct page **pages = (struct page **)bv;

Huh.  We type-abuse an array of bio_vec's as an array of struct page
pointers??

As a straight hoist the patch looks correct but I'm confused about this.

--D

> -	ssize_t size;
> -	unsigned int i = 0;
> -	size_t offset, left, len;
> -
> -	/*
> -	 * Move page array up in the allocated memory for the bio vecs as far as
> -	 * possible so that we can start filling biovecs from the beginning
> -	 * without overwriting the temporary page array.
> -	 */
> -	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
> -	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
> -
> -	if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
> -		extraction_flags |= ITER_ALLOW_P2PDMA;
> -
> -	size = iov_iter_extract_pages(iter, &pages,
> -				      UINT_MAX - bio->bi_iter.bi_size,
> -				      nr_pages, extraction_flags, &offset);
> -	if (unlikely(size <= 0))
> -		return size ? size : -EFAULT;
> -
> -	nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
> -	for (left = size; left > 0; left -= len) {
> -		unsigned int nr_to_add;
> -
> -		if (bio->bi_vcnt > 0) {
> -			struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
> -
> -			if (!zone_device_pages_have_same_pgmap(prev->bv_page,
> -					pages[i]))
> -				break;
> -		}
> -
> -		len = get_contig_folio_len(&pages[i], &nr_to_add, left, offset);
> -		__bio_add_page(bio, pages[i], len, offset);
> -		i += nr_to_add;
> -		offset = 0;
> -	}
> -
> -	iov_iter_revert(iter, left);
> -	while (i < nr_pages)
> -		bio_release_page(bio, pages[i++]);
> -	return size - left;
> -}
> -
>  /*
>   * Aligns the bio size to the len_align_mask, releasing excessive bio vecs that
>   * __bio_iov_iter_get_pages may have inserted, and reverts the trimmed length
> @@ -1325,7 +1229,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter,
>  int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
>  			   unsigned len_align_mask)
>  {
> -	ssize_t ret;
> +	iov_iter_extraction_t flags = 0;
>  
>  	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
>  		return -EIO;
> @@ -1338,14 +1242,26 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
>  
>  	if (iov_iter_extract_will_pin(iter))
>  		bio_set_flag(bio, BIO_PAGE_PINNED);
> +	if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
> +		flags |= ITER_ALLOW_P2PDMA;
>  
>  	do {
> -		ret = __bio_iov_iter_get_pages(bio, iter);
> -	} while (ret > 0 && iov_iter_count(iter) && !bio_full(bio, 0));
> +		ssize_t ret;
> +
> +		ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec,
> +				UINT_MAX - bio->bi_iter.bi_size, &bio->bi_vcnt,
> +				bio->bi_max_vecs, flags);
> +		if (ret <= 0) {
> +			if (!bio->bi_vcnt)
> +				return ret;
> +			break;
> +		}
> +		bio->bi_iter.bi_size += ret;
> +	} while (iov_iter_count(iter) && !bio_full(bio, 0));
>  
> -	if (bio->bi_vcnt)
> -		return bio_iov_iter_align_down(bio, iter, len_align_mask);
> -	return ret;
> +	if (is_pci_p2pdma_page(bio->bi_io_vec->bv_page))
> +		bio->bi_opf |= REQ_NOMERGE;
> +	return bio_iov_iter_align_down(bio, iter, len_align_mask);
>  }
>  
>  static void submit_bio_wait_endio(struct bio *bio)
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 5b127043a151..a9bc5b3067e3 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -389,6 +389,9 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages,
>  			       size_t maxsize, unsigned int maxpages,
>  			       iov_iter_extraction_t extraction_flags,
>  			       size_t *offset0);
> +ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv,
> +		size_t max_size, unsigned short *nr_vecs,
> +		unsigned short max_vecs, iov_iter_extraction_t extraction_flags);
>  
>  /**
>   * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 896760bad455..545250507f08 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1845,3 +1845,101 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
>  	return -EFAULT;
>  }
>  EXPORT_SYMBOL_GPL(iov_iter_extract_pages);
> +
> +static unsigned int get_contig_folio_len(struct page **pages,
> +		unsigned int *num_pages, size_t left, size_t offset)
> +{
> +	struct folio *folio = page_folio(pages[0]);
> +	size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, left);
> +	unsigned int max_pages, i;
> +	size_t folio_offset, len;
> +
> +	folio_offset = PAGE_SIZE * folio_page_idx(folio, pages[0]) + offset;
> +	len = min(folio_size(folio) - folio_offset, left);
> +
> +	/*
> +	 * We might COW a single page in the middle of a large folio, so we have
> +	 * to check that all pages belong to the same folio.
> +	 */
> +	left -= contig_sz;
> +	max_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
> +	for (i = 1; i < max_pages; i++) {
> +		size_t next = min_t(size_t, PAGE_SIZE, left);
> +
> +		if (page_folio(pages[i]) != folio ||
> +		    pages[i] != pages[i - 1] + 1)
> +			break;
> +		contig_sz += next;
> +		left -= next;
> +	}
> +
> +	*num_pages = i;
> +	return contig_sz;
> +}
> +
> +#define PAGE_PTRS_PER_BVEC     (sizeof(struct bio_vec) / sizeof(struct page *))
> +
> +/**
> + * iov_iter_extract_bvecs - Extract bvecs from an iterator
> + * @iter:	the iterator to extract from
> + * @bv:		bvec return array
> + * @max_size:	maximum size to extract from @iter
> + * @nr_vecs:	number of vectors in @bv (on in and output)
> + * @max_vecs:	maximum vectors in @bv, including those filled before calling
> + * @extraction_flags: flags to qualify request
> + *
> + * Like iov_iter_extract_pages(), but returns physically contiguous ranges
> + * contained in a single folio as a single bvec instead of multiple entries.
> + *
> + * Returns the number of bytes extracted when successful, or a negative errno.
> + * If @nr_vecs was non-zero on entry, the number of successfully extracted bytes
> + * can be 0.
> + */
> +ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv,
> +		size_t max_size, unsigned short *nr_vecs,
> +		unsigned short max_vecs, iov_iter_extraction_t extraction_flags)
> +{
> +	unsigned short entries_left = max_vecs - *nr_vecs;
> +	unsigned short nr_pages, i = 0;
> +	size_t left, offset, len;
> +	struct page **pages;
> +	ssize_t size;
> +
> +	/*
> +	 * Move page array up in the allocated memory for the bio vecs as far as
> +	 * possible so that we can start filling biovecs from the beginning
> +	 * without overwriting the temporary page array.
> +	 */
> +	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
> +	pages = (struct page **)(bv + *nr_vecs) +
> +		entries_left * (PAGE_PTRS_PER_BVEC - 1);
> +
> +	size = iov_iter_extract_pages(iter, &pages, max_size, entries_left,
> +			extraction_flags, &offset);
> +	if (unlikely(size <= 0))
> +		return size ? size : -EFAULT;
> +
> +	nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
> +	for (left = size; left > 0; left -= len) {
> +		unsigned int nr_to_add;
> +
> +		if (*nr_vecs > 0 &&
> +		    !zone_device_pages_have_same_pgmap(bv[*nr_vecs - 1].bv_page,
> +				pages[i]))
> +			break;
> +
> +		len = get_contig_folio_len(&pages[i], &nr_to_add, left, offset);
> +		bvec_set_page(&bv[*nr_vecs], pages[i], len, offset);
> +		i += nr_to_add;
> +		(*nr_vecs)++;
> +		offset = 0;
> +	}
> +
> +	iov_iter_revert(iter, left);
> +	if (iov_iter_extract_will_pin(iter)) {
> +		while (i < nr_pages)
> +			unpin_user_page(pages[i++]);
> +	}
> +	return size - left;
> +}
> +EXPORT_SYMBOL_GPL(iov_iter_extract_bvecs);
> -- 
> 2.47.3
> 
> 

  reply	other threads:[~2026-01-22 17:47 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20260123121444epcas5p4e729259011e031a28be8379ea3b9b749@epcas5p4.samsung.com>
2026-01-19  7:44 ` bounce buffer direct I/O when stable pages are required v2 Christoph Hellwig
2026-01-19  7:44   ` [PATCH 01/14] block: refactor get_contig_folio_len Christoph Hellwig
2026-01-22 11:00     ` Johannes Thumshirn
2026-01-22 17:54     ` Darrick J. Wong
2026-01-23  8:32     ` Damien Le Moal
2026-01-23  8:35       ` Christoph Hellwig
2026-01-23  8:44         ` Damien Le Moal
2026-01-23  8:45     ` Damien Le Moal
2026-01-23 12:14     ` Anuj Gupta
2026-01-19  7:44   ` [PATCH 02/14] block: open code bio_add_page and fix handling of mismatching P2P ranges Christoph Hellwig
2026-01-22 11:04     ` Johannes Thumshirn
2026-01-22 17:59     ` Darrick J. Wong
2026-01-23  5:43       ` Christoph Hellwig
2026-01-23  7:05         ` Darrick J. Wong
2026-01-23  8:35     ` Damien Le Moal
2026-01-23 12:15     ` Anuj Gupta
2026-01-19  7:44   ` [PATCH 03/14] iov_iter: extract a iov_iter_extract_bvecs helper from bio code Christoph Hellwig
2026-01-22 17:47     ` Darrick J. Wong [this message]
2026-01-23  5:44       ` Christoph Hellwig
2026-01-23  7:09         ` Darrick J. Wong
2026-01-23  7:14           ` Christoph Hellwig
2026-01-23 11:37     ` David Howells
2026-01-23 13:58       ` Christoph Hellwig
2026-01-23 14:57         ` David Howells
2026-01-26 17:36           ` Matthew Wilcox
2026-01-27  5:13             ` Christoph Hellwig
2026-01-27  5:44               ` Matthew Wilcox
2026-01-27  5:47                 ` Christoph Hellwig
2026-02-03  8:20           ` Askar Safin
2026-02-03 10:28           ` Askar Safin
2026-02-03 16:32             ` Christoph Hellwig
2026-01-19  7:44   ` [PATCH 04/14] block: remove bio_release_page Christoph Hellwig
2026-01-22 11:14     ` Johannes Thumshirn
2026-01-22 17:26     ` Darrick J. Wong
2026-01-23  8:43     ` Damien Le Moal
2026-01-23 12:17     ` Anuj Gupta
2026-01-19  7:44   ` [PATCH 05/14] block: add helpers to bounce buffer an iov_iter into bios Christoph Hellwig
2026-01-22 13:05     ` Johannes Thumshirn
2026-01-22 17:25     ` Darrick J. Wong
2026-01-23  5:51       ` Christoph Hellwig
2026-01-23  7:11         ` Darrick J. Wong
2026-01-23  7:16           ` Christoph Hellwig
2026-01-23  8:52     ` Damien Le Moal
2026-01-23 12:20     ` Anuj Gupta
2026-01-19  7:44   ` [PATCH 06/14] iomap: fix submission side handling of completion side errors Christoph Hellwig
2026-01-19 17:40     ` Darrick J. Wong
2026-01-23  8:54     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 07/14] iomap: simplify iomap_dio_bio_iter Christoph Hellwig
2026-01-19 17:43     ` Darrick J. Wong
2026-01-23  8:55     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 08/14] iomap: split out the per-bio logic from iomap_dio_bio_iter Christoph Hellwig
2026-01-23  8:57     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 09/14] iomap: share code between iomap_dio_bio_end_io and iomap_finish_ioend_direct Christoph Hellwig
2026-01-23  8:58     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 10/14] iomap: free the bio before completing the dio Christoph Hellwig
2026-01-19 17:43     ` Darrick J. Wong
2026-01-23  8:59     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 11/14] iomap: rename IOMAP_DIO_DIRTY to IOMAP_DIO_USER_BACKED Christoph Hellwig
2026-01-23  9:00     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 12/14] iomap: support ioends for direct reads Christoph Hellwig
2026-01-23  9:02     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 13/14] iomap: add a flag to bounce buffer direct I/O Christoph Hellwig
2026-01-23  9:05     ` Damien Le Moal
2026-01-19  7:44   ` [PATCH 14/14] xfs: use bounce buffering direct I/O when the device requires stable pages Christoph Hellwig
2026-01-19 17:45     ` Darrick J. Wong
2026-01-23  9:08     ` Damien Le Moal
2026-01-23 12:10   ` bounce buffer direct I/O when stable pages are required v2 Anuj Gupta
2026-01-23 14:01     ` Christoph Hellwig
2026-01-23 14:09     ` Keith Busch
2026-01-23 12:24   ` Christian Brauner
2026-01-23 14:10     ` block or iomap tree, was: " Christoph Hellwig
2026-01-27 10:31       ` Christian Brauner
2026-01-27 12:50         ` Christoph Hellwig
2026-01-14  7:40 bounce buffer direct I/O when stable pages are required Christoph Hellwig
2026-01-14  7:41 ` [PATCH 03/14] iov_iter: extract a iov_iter_extract_bvecs helper from bio code Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260122174703.GX5945@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=cem@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox