All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Matthew Wilcox <willy@infradead.org>,
	Christoph Hellwig <hch@infradead.org>,
	Jens Axboe <axboe@kernel.dk>, Leon Romanovsky <leon@kernel.org>
Cc: David Howells <dhowells@redhat.com>,
	Christian Brauner <christian@brauner.io>,
	Paulo Alcantara <pc@manguebit.com>,
	netfs@lists.linux.dev, linux-afs@lists.infradead.org,
	linux-cifs@vger.kernel.org, linux-nfs@vger.kernel.org,
	ceph-devel@vger.kernel.org, v9fs@lists.linux.dev,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	Paulo Alcantara <pc@manguebit.org>
Subject: [RFC PATCH 05/17] netfs: Add some tools for managing bvecq chains
Date: Wed,  4 Mar 2026 14:03:12 +0000	[thread overview]
Message-ID: <20260304140328.112636-6-dhowells@redhat.com> (raw)
In-Reply-To: <20260304140328.112636-1-dhowells@redhat.com>

Provide a selection of tools for managing bvec queue chains.  This
includes:

 (1) Allocation, prepopulation, expansion, shortening and refcounting of
     bvecqs and bvecq chains.

     This can be used to do things like creating an encryption buffer in
     cifs or a directory content buffer in afs.  The memory segments will
     be appropriate disposed off according to the flags on the bvecq.

 (2) Management of a bvecq chain as a rolling buffer and the management of
     positions within it.

 (3) Loading folios, slicing chains and clearing content.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Paulo Alcantara <pc@manguebit.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: Christoph Hellwig <hch@infradead.org>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
---
 fs/netfs/Makefile            |   1 +
 fs/netfs/bvecq.c             | 634 +++++++++++++++++++++++++++++++++++
 fs/netfs/internal.h          |  87 +++++
 fs/netfs/stats.c             |   4 +-
 include/linux/netfs.h        |  24 ++
 include/trace/events/netfs.h |  24 ++
 6 files changed, 773 insertions(+), 1 deletion(-)
 create mode 100644 fs/netfs/bvecq.c

diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index b43188d64bd8..e1f12ecb5abf 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -3,6 +3,7 @@
 netfs-y := \
 	buffered_read.o \
 	buffered_write.o \
+	bvecq.o \
 	direct_read.o \
 	direct_write.o \
 	iterator.o \
diff --git a/fs/netfs/bvecq.c b/fs/netfs/bvecq.c
new file mode 100644
index 000000000000..e223beb6661b
--- /dev/null
+++ b/fs/netfs/bvecq.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Buffering helpers for bvec queues
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "internal.h"
+
+void dump_bvecq(const struct bvecq *bq)
+{
+	int b = 0;
+
+	for (; bq; bq = bq->next, b++) {
+		int skipz = 0;
+
+		pr_notice("BQ[%u] %u/%u fp=%llx\n", b, bq->nr_segs, bq->max_segs, bq->fpos);
+		for (int s = 0; s < bq->nr_segs; s++) {
+			const struct bio_vec *bv = &bq->bv[s];
+
+			if (!bv->bv_page && !bv->bv_len && skipz < 2) {
+				skipz = 1;
+				continue;
+			}
+			if (skipz == 1)
+				pr_notice("BQ[%u:00-%02u] ...\n", b, s - 1);
+			skipz = 2;
+			pr_notice("BQ[%u:%02u] %10lx %04x %04x %u\n",
+				  b, s,
+				  bv->bv_page ? page_to_pfn(bv->bv_page) : 0,
+				  bv->bv_offset, bv->bv_len,
+				  bv->bv_page ? page_count(bv->bv_page) : 0);
+		}
+	}
+}
+
+/*
+ * Allocate a single bvecq chain element and initialise the header.
+ */
+struct bvecq *netfs_alloc_one_bvecq(size_t nr_slots, gfp_t gfp)
+{
+	struct bvecq *bq;
+	const size_t max_size = 512;
+	const size_t max_segs = (max_size - sizeof(*bq)) / sizeof(bq->__bv[0]);
+	size_t part = umin(nr_slots, max_segs);
+	size_t size = roundup_pow_of_two(struct_size(bq, __bv, part));
+
+	bq = kmalloc(size, gfp);
+	if (bq) {
+		*bq = (struct bvecq) {
+			.ref		= REFCOUNT_INIT(1),
+			.bv		= bq->__bv,
+			.inline_bv	= true,
+			.max_segs	= (size - sizeof(*bq)) / sizeof(bq->__bv[0]),
+		};
+		netfs_stat(&netfs_n_bvecq);
+	}
+	return bq;
+}
+
+/**
+ * netfs_alloc_bvecq - Allocate an unpopulated bvec queue
+ * @nr_slots: Number of slots to allocate
+ * @gfp: The allocation constraints.
+ *
+ * Allocate a chain of bvecq buffers providing at least the requested
+ * cumulative number of slots.
+ */
+struct bvecq *netfs_alloc_bvecq(size_t nr_slots, gfp_t gfp)
+{
+	struct bvecq *head = NULL, *tail = NULL;
+
+	_enter("%zu", nr_slots);
+
+	for (;;) {
+		struct bvecq *bq;
+
+		bq = netfs_alloc_one_bvecq(nr_slots, gfp);
+		if (!bq)
+			goto oom;
+
+		if (tail) {
+			tail->next = bq;
+			bq->prev = tail;
+		} else {
+			head = bq;
+		}
+		tail = bq;
+		if (tail->max_segs >= nr_slots)
+			break;
+		nr_slots -= tail->max_segs;
+	}
+
+	return head;
+oom:
+	netfs_free_bvecq_buffer(head);
+	return NULL;
+}
+EXPORT_SYMBOL(netfs_alloc_bvecq);
+
+/**
+ * netfs_alloc_bvecq_buffer - Allocate buffer space into a bvec queue
+ * @size: Target size of the buffer (can be 0 for an empty buffer)
+ * @pre_slots: Number of preamble slots to set aside
+ * @gfp: The allocation constraints.
+ */
+struct bvecq *netfs_alloc_bvecq_buffer(size_t size, unsigned int pre_slots, gfp_t gfp)
+{
+	struct bvecq *head = NULL, *tail = NULL, *p = NULL;
+	size_t count = DIV_ROUND_UP(size, PAGE_SIZE);
+
+	_enter("%zx,%zx,%u", size, count, pre_slots);
+
+	do {
+		struct page **pages;
+		int want, got;
+
+		p = netfs_alloc_one_bvecq(umin(count, 32 - 3), gfp);
+		if (!p)
+			goto oom;
+
+		p->free = true;
+
+		if (tail) {
+			tail->next = p;
+			p->prev = tail;
+		} else {
+			head = p;
+		}
+		tail = p;
+		if (!count)
+			break;
+
+		pages = (struct page **)&p->bv[p->max_segs];
+		pages -= p->max_segs - pre_slots;
+
+		want = umin(count, p->max_segs - pre_slots);
+		got = alloc_pages_bulk(gfp, want, pages);
+		if (got < want) {
+			for (int i = 0; i < got; i++)
+				__free_page(pages[i]);
+			goto oom;
+		}
+
+		tail->nr_segs = pre_slots + got;
+		for (int i = 0; i < got; i++) {
+			int j = pre_slots + i;
+
+			set_page_count(pages[i], 1);
+			bvec_set_page(&tail->bv[j], pages[i], PAGE_SIZE, 0);
+		}
+
+		count -= got;
+		pre_slots = 0;
+	} while (count > 0);
+
+	return head;
+oom:
+	netfs_free_bvecq_buffer(head);
+	return NULL;
+}
+EXPORT_SYMBOL(netfs_alloc_bvecq_buffer);
+
+/**
+ * netfs_expand_bvecq_buffer - Allocate buffer space into a bvec queue
+ * @mapping: Address space to set on the folio (or NULL).
+ * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated).
+ * @_cur_size: Current size of the buffer (updated).
+ * @size: Target size of the buffer.
+ * @gfp: The allocation constraints.
+ */
+int netfs_expand_bvecq_buffer(struct bvecq **_buffer, size_t *_cur_size, ssize_t size, gfp_t gfp)
+{
+	struct bvecq *tail = *_buffer, *p;
+	const size_t max_segs = 32;
+
+	size = round_up(size, PAGE_SIZE);
+	if (*_cur_size >= size)
+		return 0;
+
+	if (tail)
+		while (tail->next)
+			tail = tail->next;
+
+	do {
+		struct page *page;
+		int order = 0;
+
+		if (!tail || bvecq_is_full(tail)) {
+			p = netfs_alloc_one_bvecq(max_segs, gfp);
+			if (!p)
+				return -ENOMEM;
+			if (tail) {
+				tail->next = p;
+				p->prev = tail;
+			} else {
+				*_buffer = p;
+			}
+			tail = p;
+		}
+
+		if (size - *_cur_size > PAGE_SIZE)
+			order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT,
+				     MAX_PAGECACHE_ORDER);
+
+		page = alloc_pages(gfp | __GFP_COMP, order);
+		if (!page && order > 0)
+			page = alloc_pages(gfp | __GFP_COMP, 0);
+		if (!page)
+			return -ENOMEM;
+
+		bvec_set_page(&p->bv[p->nr_segs++], page, PAGE_SIZE << order, 0);
+		*_cur_size += PAGE_SIZE << order;
+	} while (*_cur_size < size);
+
+	return 0;
+}
+EXPORT_SYMBOL(netfs_expand_bvecq_buffer);
+
+static void netfs_bvecq_free_seg(struct bvecq *bq, unsigned int seg)
+{
+	if (!bq->free ||
+	    !bq->bv[seg].bv_page)
+		return;
+
+	if (bq->unpin)
+		unpin_user_page(bq->bv[seg].bv_page);
+	else
+		__free_page(bq->bv[seg].bv_page);
+}
+
+/**
+ * netfs_free_bvecq_buffer - Free a bvec queue
+ * @bq: The start of the folio queue to free
+ *
+ * Free up a chain of bvecqs and the pages it points to.
+ */
+void netfs_free_bvecq_buffer(struct bvecq *bq)
+{
+	struct bvecq *next;
+
+	for (; bq; bq = next) {
+		for (int seg = 0; seg < bq->nr_segs; seg++)
+			netfs_bvecq_free_seg(bq, seg);
+		next = bq->next;
+		netfs_stat_d(&netfs_n_bvecq);
+		kfree(bq);
+	}
+}
+EXPORT_SYMBOL(netfs_free_bvecq_buffer);
+
+/**
+ * netfs_put_bvecq - Put a bvec queue
+ * @bq: The start of the folio queue to free
+ *
+ * Put the ref(s) on the nodes in a bvec queue, freeing up the node and the
+ * page fragments it points to as the refcounts become zero.
+ */
+void netfs_put_bvecq(struct bvecq *bq)
+{
+	struct bvecq *next;
+
+	for (; bq; bq = next) {
+		if (!refcount_dec_and_test(&bq->ref))
+			break;
+		for (int seg = 0; seg < bq->nr_segs; seg++)
+			netfs_bvecq_free_seg(bq, seg);
+		next = bq->next;
+		netfs_stat_d(&netfs_n_bvecq);
+		kfree(bq);
+	}
+}
+EXPORT_SYMBOL(netfs_put_bvecq);
+
+/**
+ * netfs_shorten_bvecq_buffer - Shorten a bvec queue buffer
+ * @bq: The start of the buffer to shorten
+ * @seg: The slot to start from
+ * @size: The size to retain
+ *
+ * Shorten the content of a bvec queue down to the minimum number of segments,
+ * starting at the specified segment, to retain the specified size.  An error
+ * will be reported if the bvec queue is undersized.
+ */
+int netfs_shorten_bvecq_buffer(struct bvecq *bq, unsigned int seg, size_t size)
+{
+	ssize_t retain = size;
+
+	/* Skip through the segments we want to keep. */
+	for (; bq; bq = bq->next) {
+		for (; seg < bq->nr_segs; seg++) {
+			retain -= bq->bv[seg].bv_len;
+			if (retain < 0)
+				goto found;
+		}
+		seg = 0;
+	}
+	if (WARN_ON_ONCE(retain > 0))
+		return -EMSGSIZE;
+	return 0;
+
+found:
+	/* Shorten the entry to be retained and clean the rest of this bvecq. */
+	bq->bv[seg].bv_len += retain;
+	seg++;
+	for (int i = seg; i < bq->nr_segs; i++)
+		netfs_bvecq_free_seg(bq, i);
+	bq->nr_segs = seg;
+
+	/* Free the queue tail. */
+	netfs_free_bvecq_buffer(bq->next);
+	bq->next = NULL;
+	return 0;
+}
+
+/*
+ * Initialise a rolling buffer.  We allocate an empty bvecq struct to so that
+ * the pointers can be independently driven by the producer and the consumer.
+ */
+int bvecq_buffer_init(struct bvecq_pos *pos, unsigned int rreq_id)
+{
+	struct bvecq *bq;
+
+	bq = netfs_alloc_bvecq(14, GFP_NOFS);
+	if (!bq)
+		return -ENOMEM;
+
+	pos->bvecq  = bq; /* Comes with a ref. */
+	pos->slot   = 0;
+	pos->offset = 0;
+	return 0;
+}
+
+/*
+ * Add a new segment on to the rolling buffer; either because the previous one
+ * is full or because we have a discontiguity to contend with.
+ */
+int bvecq_buffer_make_space(struct bvecq_pos *pos)
+{
+	struct bvecq *bq, *head = pos->bvecq;
+
+	bq = netfs_alloc_bvecq(14, GFP_NOFS);
+	if (!bq)
+		return -ENOMEM;
+	bq->prev = head;
+
+	pos->bvecq = netfs_get_bvecq(bq);
+	pos->slot = 0;
+	pos->offset = 0;
+
+	/* Make sure the initialisation is stored before the next pointer.
+	 *
+	 * [!] NOTE: After we set head->next, the consumer is at liberty to
+	 * immediately delete the old head.
+	 */
+	smp_store_release(&head->next, bq);
+	netfs_put_bvecq(head);
+	return 0;
+}
+
+/*
+ * Advance a bvecq position by the given amount of data.
+ */
+void bvecq_pos_advance(struct bvecq_pos *pos, size_t amount)
+{
+	struct bvecq *bq = pos->bvecq;
+	unsigned int slot = pos->slot;
+	size_t offset = pos->offset;
+
+	if (slot >= bq->nr_segs) {
+		bq = bq->next;
+		slot = 0;
+	}
+
+	while (amount) {
+		const struct bio_vec *bv = &bq->bv[slot];
+		size_t part = umin(bv->bv_len - offset, amount);
+
+		if (likely(part < bv->bv_len)) {
+			offset += part;
+			break;
+		}
+		amount -= part;
+		offset = 0;
+		slot++;
+		if (slot >= bq->nr_segs) {
+			if (!bq->next)
+				break;
+			bq = bq->next;
+			slot = 0;
+		}
+	}
+
+	pos->slot   = slot;
+	pos->offset = offset;
+	bvecq_pos_move(pos, bq);
+}
+
+/*
+ * Clear memory fragments pointed to by a bvec queue, advancing the position.
+ */
+ssize_t bvecq_zero(struct bvecq_pos *pos, size_t amount)
+{
+	struct bvecq *bq = pos->bvecq;
+	unsigned int slot = pos->slot;
+	ssize_t cleared = 0;
+	size_t offset = pos->offset;
+
+	if (WARN_ON_ONCE(!bq))
+		return 0;
+
+	if (slot >= bq->nr_segs) {
+		bq = bq->next;
+		if (WARN_ON_ONCE(!bq))
+			return 0;
+		slot = 0;
+	}
+
+	do {
+		const struct bio_vec *bv = &bq->bv[slot];
+
+		if (offset < bv->bv_len) {
+			size_t part = umin(amount - cleared, bv->bv_len - offset);
+
+			memzero_page(bv->bv_page, bv->bv_offset + offset, part);
+
+			offset += part;
+			cleared += part;
+		}
+
+		if (offset >= bv->bv_len) {
+			offset = 0;
+			slot++;
+			if (slot >= bq->nr_segs) {
+				if (!bq->next)
+					break;
+				bq = bq->next;
+				slot = 0;
+			}
+		}
+	} while (cleared < amount);
+
+	bvecq_pos_move(pos, bq);
+	pos->slot = slot;
+	pos->offset = offset;
+	return cleared;
+}
+
+/*
+ * Determine the size and number of segments that can be obtained the next
+ * slice of bvec queue up to the maximum size and segment count specified.  The
+ * position cursor is updated to the end of the slice.
+ */
+size_t bvecq_slice(struct bvecq_pos *pos, size_t max_size,
+		   unsigned int max_segs, unsigned int *_nr_segs)
+{
+	struct bvecq *bq;
+	unsigned int slot = pos->slot, nsegs = 0;
+	size_t size = 0;
+	size_t offset = pos->offset;
+
+	bq = pos->bvecq;
+	for (;;) {
+		for (; slot < bq->nr_segs; slot++) {
+			const struct bio_vec *bvec = &bq->bv[slot];
+
+			if (offset < bvec->bv_len && bvec->bv_page) {
+				size_t part = umin(bvec->bv_len - offset, max_size);
+
+				size += part;
+				offset += part;
+				max_size -= part;
+				nsegs++;
+				if (!max_size || nsegs >= max_segs)
+					goto out;
+			}
+			offset = 0;
+		}
+
+		/* pos->bvecq isn't allowed to go NULL as the queue may get
+		 * extended and we would lose our place.
+		 */
+		if (!bq->next)
+			break;
+		slot = 0;
+		bq = bq->next;
+	}
+
+out:
+	*_nr_segs = nsegs;
+	if (slot == bq->nr_segs && bq->next) {
+		bq = bq->next;
+		slot = 0;
+		offset = 0;
+	}
+	bvecq_pos_move(pos, bq);
+	pos->slot = slot;
+	pos->offset = offset;
+	return size;
+}
+
+/*
+ * Extract page fragments from a bvec queue position into another bvecq, which
+ * we allocate.  The position is advanced.
+ */
+ssize_t bvecq_extract(struct bvecq_pos *pos, size_t amount,
+		      unsigned int max_segs, struct bvecq **to)
+{
+	struct bvecq_pos tmp_pos;
+	struct bvecq *src, *dst = NULL;
+	unsigned int slot = pos->slot, nsegs;
+	ssize_t extracted = 0;
+	size_t offset = pos->offset;
+
+	*to = NULL;
+	if (!max_segs)
+		max_segs = UINT_MAX;
+
+	bvecq_pos_attach(&tmp_pos, pos);
+	amount = bvecq_slice(&tmp_pos, amount, max_segs, &nsegs);
+	bvecq_pos_detach(&tmp_pos);
+	if (nsegs == 0)
+		return -EIO;
+
+	dst = netfs_alloc_bvecq(nsegs, GFP_KERNEL);
+	if (!dst)
+		return -ENOMEM;
+	*to = dst;
+
+	/* Transcribe the segments */
+	src = pos->bvecq;
+	for (;;) {
+		for (; slot < src->nr_segs; slot++) {
+			const struct bio_vec *sv = &src->bv[slot];
+			struct bio_vec *dv = &dst->bv[dst->nr_segs];
+
+			_debug("EXTR sl=%x off=%zx am=%zx p=%lx",
+			       slot, offset, amount, page_to_pfn(sv->bv_page));
+
+			if (offset < sv->bv_len && sv->bv_page) {
+				size_t part = umin(sv->bv_len - offset, amount);
+
+				bvec_set_page(dv, sv->bv_page, part,
+					      sv->bv_offset + offset);
+				extracted += part;
+				amount -= part;
+				offset += part;
+				trace_netfs_bv_slot(dst, dst->nr_segs);
+				dst->nr_segs++;
+				if (bvecq_is_full(dst))
+					dst = dst->next;
+				if (nsegs >= max_segs)
+					goto out;
+				if (amount == 0)
+					goto out;
+			}
+			offset = 0;
+		}
+
+		/* pos->bvecq isn't allowed to go NULL as the queue may get
+		 * extended and we would lose our place.
+		 */
+		if (!src->next)
+			break;
+		slot = 0;
+		src = src->next;
+	}
+
+out:
+	if (slot == src->nr_segs && src->next) {
+		src = src->next;
+		slot = 0;
+		offset = 0;
+	}
+	bvecq_pos_move(pos, src);
+	pos->slot = slot;
+	pos->offset = offset;
+	return extracted;
+}
+
+/*
+ * Decant part of the list of folios to read onto a bvecq.  The list must be
+ * pre-seeded with a bvecq object.
+ */
+ssize_t bvecq_load_from_ra(struct bvecq_pos *pos,
+			   struct readahead_control *ractl,
+			   struct folio_batch *put_batch)
+{
+	struct folio **folios;
+	struct bvecq *bq = pos->bvecq;
+	unsigned int space;
+	ssize_t loaded = 0;
+	int nr;
+
+	if (bvecq_is_full(bq)) {
+		bq = netfs_alloc_bvecq(14, GFP_NOFS);
+		if (!bq)
+			return -ENOMEM;
+		bq->prev = pos->bvecq;
+	}
+
+	space = bq->max_segs - bq->nr_segs;
+
+	folios = (struct folio **)(bq->bv + bq->max_segs);
+	folios -= space;
+
+	nr = __readahead_batch(ractl, (struct page **)folios, space);
+
+	_enter("%u/%u %u/%u", bq->nr_segs, bq->max_segs, nr, space);
+
+	bq->fpos = folio_pos(folios[0]);
+
+	for (int i = 0; i < nr; i++) {
+		struct folio *folio = folios[i];
+		size_t len = folio_size(folio);
+
+		loaded += len;
+		bvec_set_folio(&bq->bv[bq->nr_segs + i], folio, len, 0);
+
+		trace_netfs_folio(folio, netfs_folio_trace_read);
+		if (!folio_batch_add(put_batch, folio))
+			folio_batch_release(put_batch);
+	}
+
+	/* Update the counter after setting the slots. */
+	smp_store_release(&bq->nr_segs, bq->nr_segs + nr);
+
+	if (bq != pos->bvecq) {
+		/* Write the next pointer after initialisation. */
+		smp_store_release(&pos->bvecq->next, bq);
+		bvecq_pos_move(pos, bq);
+	}
+	return loaded;
+}
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index d436e20d3418..89ebeb49e969 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -33,6 +33,92 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
 void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
 			 loff_t pos, size_t copied);
 
+/*
+ * bvecq.c
+ */
+struct bvecq *netfs_alloc_one_bvecq(size_t nr_slots, gfp_t gfp);
+int bvecq_buffer_init(struct bvecq_pos *pos, unsigned int rreq_id);
+int bvecq_buffer_make_space(struct bvecq_pos *pos);
+void bvecq_pos_advance(struct bvecq_pos *pos, size_t amount);
+ssize_t bvecq_zero(struct bvecq_pos *pos, size_t amount);
+size_t bvecq_slice(struct bvecq_pos *pos, size_t max_size,
+		   unsigned int max_segs, unsigned int *_nr_segs);
+ssize_t bvecq_extract(struct bvecq_pos *pos, size_t amount,
+		      unsigned int max_segs, struct bvecq **to);
+ssize_t bvecq_load_from_ra(struct bvecq_pos *pos,
+			   struct readahead_control *ractl,
+			   struct folio_batch *put_batch);
+
+struct bvecq *netfs_get_bvecq(struct bvecq *bq);
+
+static inline void bvecq_pos_attach(struct bvecq_pos *pos, const struct bvecq_pos *at)
+{
+	*pos = *at;
+	netfs_get_bvecq(pos->bvecq);
+}
+
+static inline void bvecq_pos_detach(struct bvecq_pos *pos)
+{
+	netfs_put_bvecq(pos->bvecq);
+	pos->bvecq = NULL;
+	pos->slot = 0;
+	pos->offset = 0;
+}
+
+static inline void bvecq_pos_transfer(struct bvecq_pos *pos, struct bvecq_pos *from)
+{
+	*pos = *from;
+	from->bvecq = NULL;
+	from->slot = 0;
+	from->offset = 0;
+}
+
+static inline void bvecq_pos_move(struct bvecq_pos *pos, struct bvecq *to)
+{
+	struct bvecq *old = pos->bvecq;
+
+	if (old != to) {
+		pos->bvecq = netfs_get_bvecq(to);
+		netfs_put_bvecq(old);
+	}
+}
+
+static inline bool bvecq_buffer_step(struct bvecq_pos *pos)
+{
+	struct bvecq *bq = pos->bvecq;
+
+	pos->slot++;
+	pos->offset = 0;
+	if (pos->slot <= bq->nr_segs)
+		return true;
+	if (!bq->next)
+		return false;
+	bvecq_pos_move(pos, bq->next);
+	return true;
+}
+
+static inline struct bvecq *bvecq_buffer_delete_spent(struct bvecq_pos *pos)
+{
+	struct bvecq *spent = pos->bvecq;
+	/* Read the contents of the queue segment after the pointer to it. */
+	struct bvecq *next = smp_load_acquire(&spent->next);
+
+	if (!next)
+		return NULL;
+	next->prev = NULL;
+	spent->next = NULL;
+	netfs_put_bvecq(spent);
+	pos->bvecq = next; /* We take spent's ref */
+	pos->slot = 0;
+	pos->offset = 0;
+	return next;
+}
+
+static inline bool bvecq_is_full(const struct bvecq *bvecq)
+{
+	return bvecq->nr_segs >= bvecq->max_segs;
+}
+
 /*
  * main.c
  */
@@ -166,6 +252,7 @@ extern atomic_t netfs_n_wh_retry_write_subreq;
 extern atomic_t netfs_n_wb_lock_skip;
 extern atomic_t netfs_n_wb_lock_wait;
 extern atomic_t netfs_n_folioq;
+extern atomic_t netfs_n_bvecq;
 
 int netfs_stats_show(struct seq_file *m, void *v);
 
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index ab6b916addc4..84c2a4bcc762 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -48,6 +48,7 @@ atomic_t netfs_n_wh_retry_write_subreq;
 atomic_t netfs_n_wb_lock_skip;
 atomic_t netfs_n_wb_lock_wait;
 atomic_t netfs_n_folioq;
+atomic_t netfs_n_bvecq;
 
 int netfs_stats_show(struct seq_file *m, void *v)
 {
@@ -90,9 +91,10 @@ int netfs_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&netfs_n_rh_retry_read_subreq),
 		   atomic_read(&netfs_n_wh_retry_write_req),
 		   atomic_read(&netfs_n_wh_retry_write_subreq));
-	seq_printf(m, "Objs   : rr=%u sr=%u foq=%u wsc=%u\n",
+	seq_printf(m, "Objs   : rr=%u sr=%u bq=%u foq=%u wsc=%u\n",
 		   atomic_read(&netfs_n_rh_rreq),
 		   atomic_read(&netfs_n_rh_sreq),
+		   atomic_read(&netfs_n_bvecq),
 		   atomic_read(&netfs_n_folioq),
 		   atomic_read(&netfs_n_wh_wstream_conflict));
 	seq_printf(m, "WbLock : skip=%u wait=%u\n",
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 72ee7d210a74..f360b25ceb31 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -17,12 +17,14 @@
 #include <linux/workqueue.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/bvec.h>
 #include <linux/uio.h>
 #include <linux/rolling_buffer.h>
 
 enum netfs_sreq_ref_trace;
 typedef struct mempool mempool_t;
 struct folio_queue;
+struct bvecq;
 
 /**
  * folio_start_private_2 - Start an fscache write on a folio.  [DEPRECATED]
@@ -40,6 +42,16 @@ static inline void folio_start_private_2(struct folio *folio)
 	folio_set_private_2(folio);
 }
 
+/*
+ * Position in a bio_vec queue.  The bvecq holds a ref on the queue segment it
+ * points to.
+ */
+struct bvecq_pos {
+	struct bvecq		*bvecq;		/* The first bvecq */
+	unsigned int		offset;		/* The offset within the starting slot */
+	u16			slot;		/* The starting slot */
+};
+
 enum netfs_io_source {
 	NETFS_SOURCE_UNKNOWN,
 	NETFS_FILL_WITH_ZEROES,
@@ -462,6 +474,12 @@ int netfs_alloc_folioq_buffer(struct address_space *mapping,
 			      struct folio_queue **_buffer,
 			      size_t *_cur_size, ssize_t size, gfp_t gfp);
 void netfs_free_folioq_buffer(struct folio_queue *fq);
+void dump_bvecq(const struct bvecq *bq);
+struct bvecq *netfs_alloc_bvecq(size_t nr_slots, gfp_t gfp);
+struct bvecq *netfs_alloc_bvecq_buffer(size_t size, unsigned int pre_slots, gfp_t gfp);
+void netfs_free_bvecq_buffer(struct bvecq *bq);
+void netfs_put_bvecq(struct bvecq *bq);
+int netfs_shorten_bvecq_buffer(struct bvecq *bq, unsigned int seg, size_t size);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
@@ -552,4 +570,10 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
 	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
 }
 
+static inline struct bvecq *netfs_get_bvecq(struct bvecq *bq)
+{
+	refcount_inc(&bq->ref);
+	return bq;
+}
+
 #endif /* _LINUX_NETFS_H */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 2d366be46a1c..2523adc3ad85 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -778,6 +778,30 @@ TRACE_EVENT(netfs_folioq,
 		      __print_symbolic(__entry->trace, netfs_folioq_traces))
 	    );
 
+TRACE_EVENT(netfs_bv_slot,
+	    TP_PROTO(const struct bvecq *bq, int slot),
+
+	    TP_ARGS(bq, slot),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long,		pfn)
+		    __field(unsigned int,		offset)
+		    __field(unsigned int,		len)
+		    __field(unsigned int,		slot)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->slot = slot;
+		    __entry->pfn = page_to_pfn(bq->bv[slot].bv_page);
+		    __entry->offset = bq->bv[slot].bv_offset;
+		    __entry->len = bq->bv[slot].bv_len;
+			   ),
+
+	    TP_printk("bq[%x] p=%lx %x-%x",
+		      __entry->slot,
+		      __entry->pfn, __entry->offset, __entry->offset + __entry->len)
+	    );
+
 #undef EM
 #undef E_
 #endif /* _TRACE_NETFS_H */


  parent reply	other threads:[~2026-03-04 14:04 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-04 14:03 [RFC PATCH 00/17] netfs: [WIP] Keep track of folios in a segmented bio_vec[] chain David Howells
2026-03-04 14:03 ` [RFC PATCH 01/17] netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence David Howells
2026-03-04 14:03 ` [RFC PATCH 02/17] vfs: Implement a FIEMAP callback David Howells
2026-03-04 14:06   ` Christoph Hellwig
2026-03-04 14:21     ` David Howells
2026-03-04 14:25       ` Christoph Hellwig
2026-03-04 14:34         ` David Howells
2026-03-04 14:03 ` [RFC PATCH 03/17] iov_iter: Add a segmented queue of bio_vec[] David Howells
2026-03-04 14:03 ` [RFC PATCH 04/17] Add a function to kmap one page of a multipage bio_vec David Howells
2026-03-04 14:03 ` David Howells [this message]
2026-03-04 14:03 ` [RFC PATCH 06/17] afs: Use a bvecq to hold dir content rather than folioq David Howells
2026-03-04 14:03 ` [RFC PATCH 07/17] netfs: Add a function to extract from an iter into a bvecq David Howells
2026-03-04 14:03 ` [RFC PATCH 08/17] cifs: Use a bvecq for buffering instead of a folioq David Howells
2026-03-04 14:03 ` [RFC PATCH 09/17] cifs: Support ITER_BVECQ in smb_extract_iter_to_rdma() David Howells
2026-03-04 14:03 ` [RFC PATCH 10/17] netfs: Switch to using bvecq rather than folio_queue and rolling_buffer David Howells
2026-03-04 14:03 ` [RFC PATCH 11/17] cifs: Remove support for ITER_KVEC/BVEC/FOLIOQ from smb_extract_iter_to_rdma() David Howells
2026-03-04 14:03 ` [RFC PATCH 12/17] netfs: Remove netfs_alloc/free_folioq_buffer() David Howells
2026-03-04 14:03 ` [RFC PATCH 13/17] netfs: Remove netfs_extract_user_iter() David Howells
2026-03-04 14:03 ` [RFC PATCH 14/17] iov_iter: Remove ITER_FOLIOQ David Howells
2026-03-04 14:03 ` [RFC PATCH 15/17] netfs: Remove folio_queue and rolling_buffer David Howells
2026-03-04 14:03 ` [RFC PATCH 16/17] netfs: Check for too much data being read David Howells
2026-03-04 14:03 ` [RFC PATCH 17/17] netfs: Combine prepare and issue ops and grab the buffers on request David Howells
2026-03-04 14:39   ` Christoph Hellwig
2026-03-04 14:51     ` David Howells
2026-03-04 15:01       ` Christoph Hellwig
2026-03-23 18:37   ` ChenXiaoSong
2026-03-23 20:14     ` David Howells
2026-03-23 22:44     ` Paulo Alcantara
2026-03-24  1:03       ` ChenXiaoSong
2026-03-24  7:16         ` David Howells
2026-03-24  7:38           ` ChenXiaoSong
2026-03-24  7:53             ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260304140328.112636-6-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=ceph-devel@vger.kernel.org \
    --cc=christian@brauner.io \
    --cc=hch@infradead.org \
    --cc=leon@kernel.org \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=netfs@lists.linux.dev \
    --cc=pc@manguebit.com \
    --cc=pc@manguebit.org \
    --cc=v9fs@lists.linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.