linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: bfields@fieldses.org
Cc: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v1 11/19] svcrdma: Allocate recv_ctxt's on CPU handling Receives
Date: Mon, 07 May 2018 15:27:48 -0400	[thread overview]
Message-ID: <20180507192748.4608.81602.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <20180507192126.4608.63295.stgit@klimt.1015granger.net>

There is a significant latency penalty when processing an ingress
Receive if the Receive buffer resides in memory that is not on the
same NUMA node as the the CPU handling completions for a CQ.

The system administrator and the device driver determine which CPU
handles completions. This CPU does not change during life of the CQ.
Further the Upper Layer does not have any visibility of which CPU it
is.

Allocating Receive buffers in the Receive completion handler
guarantees that Receive buffers are allocated on the preferred NUMA
node for that CQ.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h         |    1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |   52 +++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 01baabf..27cf59c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -151,6 +151,7 @@ struct svc_rdma_recv_ctxt {
 	struct ib_sge		rc_recv_sge;
 	void			*rc_recv_buf;
 	struct xdr_buf		rc_arg;
+	bool			rc_temp;
 	u32			rc_byte_len;
 	unsigned int		rc_page_count;
 	unsigned int		rc_hdr_count;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d4ccd1c..0445e75 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -144,6 +144,7 @@
 	ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
 	ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
 	ctxt->rc_recv_buf = buffer;
+	ctxt->rc_temp = false;
 	return ctxt;
 
 fail2:
@@ -154,6 +155,15 @@
 	return NULL;
 }
 
+static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
+				       struct svc_rdma_recv_ctxt *ctxt)
+{
+	ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
+			    ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
+	kfree(ctxt->rc_recv_buf);
+	kfree(ctxt);
+}
+
 /**
  * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
  * @rdma: svcxprt_rdma being torn down
@@ -165,12 +175,7 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 
 	while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
 		list_del(&ctxt->rc_list);
-		ib_dma_unmap_single(rdma->sc_pd->device,
-				    ctxt->rc_recv_sge.addr,
-				    ctxt->rc_recv_sge.length,
-				    DMA_FROM_DEVICE);
-		kfree(ctxt->rc_recv_buf);
-		kfree(ctxt);
+		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 	}
 }
 
@@ -212,21 +217,21 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 
 	for (i = 0; i < ctxt->rc_page_count; i++)
 		put_page(ctxt->rc_pages[i]);
-	spin_lock(&rdma->sc_recv_lock);
-	list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-	spin_unlock(&rdma->sc_recv_lock);
+
+	if (!ctxt->rc_temp) {
+		spin_lock(&rdma->sc_recv_lock);
+		list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
+		spin_unlock(&rdma->sc_recv_lock);
+	} else
+		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
-static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
+static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
+				struct svc_rdma_recv_ctxt *ctxt)
 {
-	struct svc_rdma_recv_ctxt *ctxt;
 	struct ib_recv_wr *bad_recv_wr;
 	int ret;
 
-	ctxt = svc_rdma_recv_ctxt_get(rdma);
-	if (!ctxt)
-		return -ENOMEM;
-
 	svc_xprt_get(&rdma->sc_xprt);
 	ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
 	trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
@@ -240,6 +245,16 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
 	return ret;
 }
 
+static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_recv_ctxt *ctxt;
+
+	ctxt = svc_rdma_recv_ctxt_get(rdma);
+	if (!ctxt)
+		return -ENOMEM;
+	return __svc_rdma_post_recv(rdma, ctxt);
+}
+
 /**
  * svc_rdma_post_recvs - Post initial set of Recv WRs
  * @rdma: fresh svcxprt_rdma
@@ -248,11 +263,16 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
  */
 bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
 {
+	struct svc_rdma_recv_ctxt *ctxt;
 	unsigned int i;
 	int ret;
 
 	for (i = 0; i < rdma->sc_max_requests; i++) {
-		ret = svc_rdma_post_recv(rdma);
+		ctxt = svc_rdma_recv_ctxt_get(rdma);
+		if (!ctxt)
+			return -ENOMEM;
+		ctxt->rc_temp = true;
+		ret = __svc_rdma_post_recv(rdma, ctxt);
 		if (ret) {
 			pr_err("svcrdma: failure posting recv buffers: %d\n",
 			       ret);


  parent reply	other threads:[~2018-05-07 19:27 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-07 19:26 [PATCH v1 00/19] NFS/RDMA server for-next Chuck Lever
2018-05-07 19:26 ` [PATCH v1 01/19] svcrdma: Add proper SPDX tags for NetApp-contributed source Chuck Lever
2018-05-09 20:23   ` J. Bruce Fields
2018-05-09 20:42     ` Chuck Lever
2018-05-15 14:52       ` Doug Ledford
2018-05-07 19:27 ` [PATCH v1 02/19] svcrdma: Use passed-in net namespace when creating RDMA listener Chuck Lever
2018-05-07 19:27 ` [PATCH v1 03/19] xprtrdma: Prepare RPC/RDMA includes for server-side trace points Chuck Lever
2018-05-07 19:27 ` [PATCH v1 04/19] svcrdma: Trace key RPC/RDMA protocol events Chuck Lever
2018-05-07 19:27 ` [PATCH v1 05/19] svcrdma: Trace key RDMA API events Chuck Lever
2018-05-07 19:27 ` [PATCH v1 06/19] svcrdma: Introduce svc_rdma_recv_ctxt Chuck Lever
2018-05-09 20:48   ` J. Bruce Fields
2018-05-09 21:02     ` Chuck Lever
2018-05-07 19:27 ` [PATCH v1 07/19] svcrdma: Remove sc_rq_depth Chuck Lever
2018-05-07 19:27 ` [PATCH v1 08/19] svcrdma: Simplify svc_rdma_recv_ctxt_put Chuck Lever
2018-05-07 19:27 ` [PATCH v1 09/19] svcrdma: Preserve Receive buffer until svc_rdma_sendto Chuck Lever
2018-05-09 21:03   ` J. Bruce Fields
2018-05-07 19:27 ` [PATCH v1 10/19] svcrdma: Persistently allocate and DMA-map Receive buffers Chuck Lever
2018-05-09 21:18   ` J. Bruce Fields
2018-05-09 21:31     ` Chuck Lever
2018-05-09 21:37       ` Bruce Fields
2018-05-07 19:27 ` Chuck Lever [this message]
2018-05-07 19:27 ` [PATCH v1 12/19] svcrdma: Refactor svc_rdma_dma_map_buf Chuck Lever
2018-05-07 19:27 ` [PATCH v1 13/19] svcrdma: Clean up Send SGE accounting Chuck Lever
2018-05-07 19:28 ` [PATCH v1 14/19] svcrdma: Introduce svc_rdma_send_ctxt Chuck Lever
2018-05-07 19:28 ` [PATCH v1 15/19] svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt Chuck Lever
2018-05-07 19:28 ` [PATCH v1 16/19] svcrdma: Remove post_send_wr Chuck Lever
2018-05-07 19:28 ` [PATCH v1 17/19] svcrdma: Simplify svc_rdma_send() Chuck Lever
2018-05-07 19:28 ` [PATCH v1 18/19] svcrdma: Persistently allocate and DMA-map Send buffers Chuck Lever
2018-05-07 19:28 ` [PATCH v1 19/19] svcrdma: Remove unused svc_rdma_op_ctxt Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180507192748.4608.81602.stgit@klimt.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).