[RFC PATCH 12/15] svcrdma: Add per-recv_ctxt chunk context cache

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neilb@ownmail.net>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	Chuck Lever <chuck.lever@oracle.com>
Subject: [RFC PATCH 12/15] svcrdma: Add per-recv_ctxt chunk context cache
Date: Tue, 10 Feb 2026 11:32:19 -0500	[thread overview]
Message-ID: <20260210163222.2356793-13-cel@kernel.org> (raw)
In-Reply-To: <20260210163222.2356793-1-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

Parsed chunk list (PCL) processing currently allocates a new
svc_rdma_chunk structure via kmalloc for each chunk in every
incoming RPC. These allocations add overhead to the receive path.

Introduce a per-recv_ctxt single-entry cache. Over 99% of RPC Calls
that specify RPC/RDMA chunks provide only a single chunk, so a
single cached chunk handles the common case. Chunks with up to
SVC_RDMA_CHUNK_SEGMAX (4) segments are eligible for caching; larger
chunks fall back to dynamic allocation.

Using per-recv_ctxt caching instead of a per-transport pool avoids
the need for locking or atomic operations, since a recv_ctxt is
used by only one thread at a time.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h         |  2 +
 include/linux/sunrpc/svc_rdma_pcl.h     | 12 +++++-
 net/sunrpc/xprtrdma/svc_rdma_pcl.c      | 55 +++++++++++++++++++++----
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 +++--
 4 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 8e78f958fa46..2164504093fd 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -204,6 +204,8 @@ struct svc_rdma_chunk_ctxt {
 
 struct svc_rdma_recv_ctxt {
 	struct llist_node	rc_node;
+	struct svcxprt_rdma	*rc_rdma;
+	struct svc_rdma_chunk	*rc_chunk_cache;
 	struct ib_recv_wr	rc_recv_wr;
 	struct ib_cqe		rc_cqe;
 	struct rpc_rdma_cid	rc_cid;
diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h
index 7516ad0fae80..e23803b19e66 100644
--- a/include/linux/sunrpc/svc_rdma_pcl.h
+++ b/include/linux/sunrpc/svc_rdma_pcl.h
@@ -22,6 +22,7 @@ struct svc_rdma_chunk {
 	u32			ch_payload_length;
 
 	u32			ch_segcount;
+	u32			ch_segmax;
 	struct svc_rdma_segment	ch_segments[];
 };
 
@@ -114,7 +115,16 @@ pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk)
 
 struct svc_rdma_recv_ctxt;
 
-extern void pcl_free(struct svc_rdma_pcl *pcl);
+/*
+ * Cached chunks have capacity for this many segments.
+ * Typical clients can register up to 120KB per segment, so 4
+ * segments covers most NFS I/O operations. Larger chunks fall
+ * back to kmalloc.
+ */
+#define SVC_RDMA_CHUNK_SEGMAX		4
+
+extern void pcl_free(struct svc_rdma_recv_ctxt *rctxt,
+		     struct svc_rdma_pcl *pcl);
 extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
 extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
 extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_pcl.c b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
index b63cfeaa2923..079af7c633fd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_pcl.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
@@ -9,30 +9,71 @@
 #include "xprt_rdma.h"
 #include <trace/events/rpcrdma.h>
 
+static struct svc_rdma_chunk *rctxt_chunk_get(struct svc_rdma_recv_ctxt *rctxt)
+{
+	struct svc_rdma_chunk *chunk = rctxt->rc_chunk_cache;
+
+	if (chunk)
+		rctxt->rc_chunk_cache = NULL;
+	return chunk;
+}
+
+static void rctxt_chunk_put(struct svc_rdma_recv_ctxt *rctxt,
+			    struct svc_rdma_chunk *chunk)
+{
+	if (rctxt->rc_chunk_cache) {
+		kfree(chunk);
+		return;
+	}
+	rctxt->rc_chunk_cache = chunk;
+}
+
+static void rctxt_chunk_free(struct svc_rdma_recv_ctxt *rctxt,
+			     struct svc_rdma_chunk *chunk)
+{
+	if (chunk->ch_segmax == SVC_RDMA_CHUNK_SEGMAX)
+		rctxt_chunk_put(rctxt, chunk);
+	else
+		kfree(chunk);
+}
+
 /**
  * pcl_free - Release all memory associated with a parsed chunk list
+ * @rctxt: receive context containing @pcl
  * @pcl: parsed chunk list
  *
  */
-void pcl_free(struct svc_rdma_pcl *pcl)
+void pcl_free(struct svc_rdma_recv_ctxt *rctxt, struct svc_rdma_pcl *pcl)
 {
 	while (!list_empty(&pcl->cl_chunks)) {
 		struct svc_rdma_chunk *chunk;
 
 		chunk = pcl_first_chunk(pcl);
 		list_del(&chunk->ch_list);
-		kfree(chunk);
+		rctxt_chunk_free(rctxt, chunk);
 	}
 }
 
-static struct svc_rdma_chunk *pcl_alloc_chunk(u32 segcount, u32 position)
+static struct svc_rdma_chunk *pcl_alloc_chunk(struct svc_rdma_recv_ctxt *rctxt,
+					      u32 segcount, u32 position)
 {
+	struct ib_device *device = rctxt->rc_rdma->sc_cm_id->device;
 	struct svc_rdma_chunk *chunk;
 
-	chunk = kmalloc(struct_size(chunk, ch_segments, segcount), GFP_KERNEL);
+	if (segcount <= SVC_RDMA_CHUNK_SEGMAX) {
+		chunk = rctxt_chunk_get(rctxt);
+		if (chunk)
+			goto out;
+		segcount = SVC_RDMA_CHUNK_SEGMAX;
+	}
+
+	chunk = kmalloc_node(struct_size(chunk, ch_segments, segcount),
+			     GFP_KERNEL, ibdev_to_node(device));
 	if (!chunk)
 		return NULL;
+	chunk->ch_segmax = segcount;
 
+out:
 	chunk->ch_position = position;
 	chunk->ch_length = 0;
 	chunk->ch_payload_length = 0;
@@ -117,7 +158,7 @@ bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
 			continue;
 
 		if (pcl_is_empty(pcl)) {
-			chunk = pcl_alloc_chunk(segcount, position);
+			chunk = pcl_alloc_chunk(rctxt, segcount, position);
 			if (!chunk)
 				return false;
 			pcl_insert_position(pcl, chunk);
@@ -172,7 +213,7 @@ bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
 
 		chunk = pcl_lookup_position(pcl, position);
 		if (!chunk) {
-			chunk = pcl_alloc_chunk(segcount, position);
+			chunk = pcl_alloc_chunk(rctxt, segcount, position);
 			if (!chunk)
 				return false;
 			pcl_insert_position(pcl, chunk);
@@ -210,7 +251,7 @@ bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
 		p++;	/* skip the list discriminator */
 		segcount = be32_to_cpup(p++);
 
-		chunk = pcl_alloc_chunk(segcount, 0);
+		chunk = pcl_alloc_chunk(rctxt, segcount, 0);
 		if (!chunk)
 			return false;
 		list_add_tail(&chunk->ch_list, &pcl->cl_chunks);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 333b9468a15b..b48ef78c79c2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -122,6 +122,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
 			    GFP_KERNEL, ibdev_to_node(device));
 	if (!ctxt)
 		goto fail0;
+	ctxt->rc_rdma = rdma;
 	ctxt->rc_maxpages = pages;
 	buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL,
 			      ibdev_to_node(device));
@@ -161,6 +162,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
 static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
 				       struct svc_rdma_recv_ctxt *ctxt)
 {
+	kfree(ctxt->rc_chunk_cache);
 	ib_dma_unmap_single(rdma->sc_cm_id->device, ctxt->rc_recv_sge.addr,
 			    ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
 	kfree(ctxt->rc_recv_buf);
@@ -219,10 +221,10 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 	 */
 	release_pages(ctxt->rc_pages, ctxt->rc_page_count);
 
-	pcl_free(&ctxt->rc_call_pcl);
-	pcl_free(&ctxt->rc_read_pcl);
-	pcl_free(&ctxt->rc_write_pcl);
-	pcl_free(&ctxt->rc_reply_pcl);
+	pcl_free(ctxt, &ctxt->rc_call_pcl);
+	pcl_free(ctxt, &ctxt->rc_read_pcl);
+	pcl_free(ctxt, &ctxt->rc_write_pcl);
+	pcl_free(ctxt, &ctxt->rc_reply_pcl);
 
 	llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
 }
-- 
2.52.0

next prev parent reply	other threads:[~2026-02-10 16:32 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-10 16:32 [RFC PATCH 00/15] svcrdma performance scalability enhancements Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 01/15] svcrdma: Add fair queuing for Send Queue access Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 02/15] svcrdma: Clean up use of rdma->sc_pd->device in Receive paths Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 03/15] svcrdma: Clean up use of rdma->sc_pd->device Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 04/15] svcrdma: Add Write chunk WRs to the RPC's Send WR chain Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 05/15] svcrdma: Factor out WR chain linking into helper Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 06/15] svcrdma: Reduce false sharing in struct svcxprt_rdma Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 07/15] svcrdma: Use lock-free list for Receive Queue tracking Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 08/15] svcrdma: Convert Read completion queue to use lock-free list Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 09/15] svcrdma: Release write chunk resources without re-queuing Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 10/15] svcrdma: Use per-transport kthread for send context release Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 11/15] svcrdma: Use watermark-based Receive Queue replenishment Chuck Lever
2026-02-10 16:32 ` Chuck Lever [this message]
2026-02-10 16:32 ` [RFC PATCH 13/15] svcrdma: clear XPT_DATA on sc_read_complete_q consumption Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 14/15] svcrdma: retry when receive queues drain transiently Chuck Lever
2026-02-10 16:32 ` [RFC PATCH 15/15] svcrdma: clear XPT_DATA on sc_rq_dto_q consumption Chuck Lever

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8e78f958fa4 dfblob:2164504093f dfblob:7516ad0fae8
dfblob:e23803b19e6 dfblob:b63cfeaa292 dfblob:079af7c633f
dfblob:333b9468a15 dfblob:b48ef78c79c )
 OR (
bs:"[RFC PATCH 12/15] svcrdma: Add per-recv_ctxt chunk context cache" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260210163222.2356793-13-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=neilb@ownmail.net \
    --cc=okorniev@redhat.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.