public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neilb@ownmail.net>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, Chuck Lever <chuck.lever@oracle.com>
Subject: [PATCH v2 4/8] sunrpc: add per-transport page recycling pool
Date: Tue, 10 Feb 2026 11:20:21 -0500	[thread overview]
Message-ID: <20260210162025.2356389-5-cel@kernel.org> (raw)
In-Reply-To: <20260210162025.2356389-1-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

RPC server transports allocate pages for receiving incoming data on
every request. Under high load, this repeated allocation and freeing
creates unnecessary overhead in the page allocator hot path.

Introduce svc_page_pool, a lock-free page recycling mechanism that
enables efficient page reuse between receive operations. A follow-up
commit wires this into the TCP transport's receive path; svcrdma's
RDMA Read path might also make use of this mechanism some day.

The pool uses llist for lock-free producer-consumer handoff: worker
threads returning pages after RPC processing act as producers, while
receiver threads allocating pages for incoming data act as
consumers. Pages are linked via page->pcp_llist, which is safe
because these pages are owned exclusively by the transport.

Each pool tracks its NUMA node affinity, allowing page allocations
to target the same node as the transport's receiver thread. Provide
svc_pool_node() to enable transports to determine the NUMA node
associated with a service pool for NUMA-aware resource allocation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h      |   1 +
 include/linux/sunrpc/svc_xprt.h |  32 +++++++
 net/sunrpc/svc.c                |  13 +++
 net/sunrpc/svc_xprt.c           | 151 ++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5506d20857c3..f4efe60f4dad 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -457,6 +457,7 @@ void		   svc_wake_up(struct svc_serv *);
 void		   svc_reserve(struct svc_rqst *rqstp, int space);
 void		   svc_pool_wake_idle_thread(struct svc_pool *pool);
 struct svc_pool   *svc_pool_for_cpu(struct svc_serv *serv);
+int		   svc_pool_node(struct svc_pool *pool);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 const char *	   svc_proc_name(const struct svc_rqst *rqstp);
 int		   svc_encode_result_payload(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index da2a2531e110..e60c2936b1ce 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -9,9 +9,33 @@
 #define SUNRPC_SVC_XPRT_H
 
 #include <linux/sunrpc/svc.h>
+#include <linux/llist.h>
 
 struct module;
 
+/**
+ * struct svc_page_pool - per-transport page recycling pool
+ * @pp_pages: lock-free list of recycled pages
+ * @pp_count: number of pages currently in pool
+ * @pp_numa_node: NUMA node for page allocations
+ * @pp_max: maximum pages to retain in pool
+ *
+ * Lock-free page recycling between producers (svc threads returning
+ * pages) and a single consumer (the thread allocating pages for
+ * receives). Uses llist for efficient producer-consumer handoff
+ * without spinlocks.
+ *
+ * Callers must serialize calls to svc_page_pool_get(); multiple
+ * concurrent consumers are not supported.
+ * Allocate with svc_page_pool_alloc(); free with svc_page_pool_free().
+ */
+struct svc_page_pool {
+	struct llist_head	pp_pages;
+	atomic_t		pp_count;
+	int			pp_numa_node;
+	unsigned int		pp_max;
+};
+
 struct svc_xprt_ops {
 	struct svc_xprt	*(*xpo_create)(struct svc_serv *,
 				       struct net *net,
@@ -187,6 +211,14 @@ void	svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt);
 void	svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *);
 void	svc_xprt_deferred_close(struct svc_xprt *xprt);
 
+/* Page pool helpers */
+struct svc_page_pool *svc_page_pool_alloc(int numa_node, unsigned int max);
+void	svc_page_pool_free(struct svc_page_pool *pool);
+void	svc_page_pool_put(struct svc_page_pool *pool, struct page *page);
+void	svc_page_pool_put_bulk(struct svc_page_pool *pool,
+			       struct page **pages, unsigned int count);
+struct page *svc_page_pool_get(struct svc_page_pool *pool);
+
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4704dce7284e..6b350cb7d539 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -418,6 +418,19 @@ struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
 	return &serv->sv_pools[pidx % serv->sv_nrpools];
 }
 
+/**
+ * svc_pool_node - Return the NUMA node affinity of a service pool
+ * @pool: the service pool
+ *
+ * Return value:
+ *   The NUMA node the pool is associated with, or the local node
+ *   if no explicit mapping exists
+ */
+int svc_pool_node(struct svc_pool *pool)
+{
+	return svc_pool_map_get_node(pool->sp_id);
+}
+
 static int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
 {
 	int err;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6973184ff667..fe31cf6a9c5d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1497,4 +1497,155 @@ int svc_pool_stats_open(struct svc_info *info, struct file *file)
 }
 EXPORT_SYMBOL(svc_pool_stats_open);
 
+static struct llist_node *svc_page_to_llist(struct page *page)
+{
+	return &page->pcp_llist;
+}
+
+static struct page *svc_llist_to_page(struct llist_node *node)
+{
+	return container_of(node, struct page, pcp_llist);
+}
+
+/**
+ * svc_page_pool_alloc - Allocate a page pool
+ * @numa_node: NUMA node for page allocations
+ * @max: maximum pages to retain in pool
+ *
+ * Pages in an svc_page_pool are linked via page->pcp_llist, which is
+ * safe since these pages are owned exclusively by the transport.
+ *
+ * The caller must free the pool with svc_page_pool_free() when
+ * the transport is destroyed.
+ *
+ * Returns a new page pool, or NULL on allocation failure.
+ */
+struct svc_page_pool *svc_page_pool_alloc(int numa_node, unsigned int max)
+{
+	struct svc_page_pool *pool;
+
+	pool = kmalloc_node(sizeof(*pool), GFP_KERNEL, numa_node);
+	if (!pool)
+		return NULL;
+
+	init_llist_head(&pool->pp_pages);
+	atomic_set(&pool->pp_count, 0);
+	pool->pp_numa_node = numa_node;
+	pool->pp_max = max;
+	return pool;
+}
+
+/**
+ * svc_page_pool_free - Free a page pool and all pages in it
+ * @pool: pool to free (may be NULL)
+ */
+void svc_page_pool_free(struct svc_page_pool *pool)
+{
+	struct llist_node *node;
+
+	if (!pool)
+		return;
+
+	while ((node = llist_del_first(&pool->pp_pages)) != NULL)
+		put_page(svc_llist_to_page(node));
+	kfree(pool);
+}
+
+/**
+ * svc_page_pool_put - Return a page to the pool
+ * @pool: pool to return page to (may be NULL)
+ * @page: page to return (may be NULL)
+ *
+ * Transfers ownership of @page to the pool. The caller's reference
+ * is consumed: either the pool retains the page, or put_page() is
+ * called if @pool is NULL or full.
+ */
+void svc_page_pool_put(struct svc_page_pool *pool, struct page *page)
+{
+	if (!page)
+		return;
+	if (!pool || atomic_read(&pool->pp_count) >= pool->pp_max) {
+		put_page(page);
+		return;
+	}
+	llist_add(svc_page_to_llist(page), &pool->pp_pages);
+	atomic_inc(&pool->pp_count);
+}
+
+/**
+ * svc_page_pool_put_bulk - Return multiple pages to the pool
+ * @pool: pool to return pages to (may be NULL)
+ * @pages: array of pages to return
+ * @count: number of pages in @pages array
+ *
+ * Batch version of svc_page_pool_put() that reduces atomic operations
+ * when returning many pages at once. Transfers ownership of all pages
+ * in @pages to the pool. Uses release_pages() for efficient bulk
+ * freeing when the pool is full.
+ *
+ * Unlike svc_page_pool_put(), this function does not handle NULL
+ * entries in @pages. All @count entries must be valid page pointers.
+ */
+void svc_page_pool_put_bulk(struct svc_page_pool *pool,
+			    struct page **pages, unsigned int count)
+{
+	struct llist_node *head, *last, *node;
+	unsigned int i, to_add, avail;
+
+	if (!count)
+		return;
+	if (!pool) {
+		release_pages(pages, count);
+		return;
+	}
+
+	avail = pool->pp_max - atomic_read(&pool->pp_count);
+	to_add = min_t(unsigned int, count, avail);
+	if (!to_add) {
+		release_pages(pages, count);
+		return;
+	}
+
+	head = NULL;
+	last = NULL;
+	for (i = 0; i < to_add; i++) {
+		node = svc_page_to_llist(pages[i]);
+		node->next = head;
+		head = node;
+		if (!last)
+			last = node;
+	}
+	llist_add_batch(head, last, &pool->pp_pages);
+	atomic_add(to_add, &pool->pp_count);
+
+	/* Free overflow pages that didn't fit in the pool */
+	if (to_add < count)
+		release_pages(pages + to_add, count - to_add);
+}
+EXPORT_SYMBOL_GPL(svc_page_pool_put_bulk);
+
+/**
+ * svc_page_pool_get - Get a page from the pool
+ * @pool: pool to take from (may be NULL)
+ *
+ * Returns a recycled page with one reference, or NULL if @pool is
+ * NULL or empty. The caller owns the returned page and must either
+ * return it via svc_page_pool_put() or release it with put_page().
+ *
+ * Caller must serialize; concurrent calls for the same pool are
+ * not supported.
+ */
+struct page *svc_page_pool_get(struct svc_page_pool *pool)
+{
+	struct llist_node *node;
+
+	if (!pool)
+		return NULL;
+	node = llist_del_first(&pool->pp_pages);
+	if (!node)
+		return NULL;
+	atomic_dec(&pool->pp_count);
+	return svc_llist_to_page(node);
+}
+
 /*----------------------------------------------------------------------------*/
-- 
2.52.0


  parent reply	other threads:[~2026-02-10 16:20 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-10 16:20 [PATCH v2 0/8] sunrpc: Reduce lock contention for NFSD TCP sockets Chuck Lever
2026-02-10 16:20 ` [PATCH v2 1/8] sunrpc: Add XPT flags missing from SVC_XPRT_FLAG_LIST Chuck Lever
2026-02-10 16:20 ` [PATCH v2 2/8] net: datagram: bypass usercopy checks for kernel iterators Chuck Lever
2026-02-10 16:20 ` [PATCH v2 3/8] sunrpc: split svc_data_ready into protocol-specific callbacks Chuck Lever
2026-02-10 16:20 ` Chuck Lever [this message]
2026-02-10 16:20 ` [PATCH v2 5/8] sunrpc: add dedicated TCP receiver thread Chuck Lever
2026-02-10 16:20 ` [PATCH v2 6/8] sunrpc: implement flat combining for TCP socket sends Chuck Lever
2026-02-10 16:20 ` [PATCH v2 7/8] sunrpc: unify fore and backchannel server TCP send paths Chuck Lever
2026-02-10 16:20 ` [PATCH v2 8/8] sunrpc: Set explicit TCP socket buffer sizes for NFSD Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260210162025.2356389-5-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@ownmail.net \
    --cc=okorniev@redhat.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox