From: Chuck Lever <chuck.lever@oracle.com>
To: anna.schumaker@netapp.com
Cc: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH 9/9] xprtrdma: Allocate rpcrdma_reps during Receive completion
Date: Mon, 05 Mar 2018 15:13:39 -0500 [thread overview]
Message-ID: <20180305201339.10904.10673.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20180305200825.10904.40829.stgit@manet.1015granger.net>
Receive completion for a CQ runs on one CPU core only. Ensure that
Receive buffers are allocated on the same CPU core where Receive
completions are handled. This guarantees that a transport's Receive
buffers are on the NUMA node that is local to the device no matter
where the transport was created.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/backchannel.c | 21 ---------------------
net/sunrpc/xprtrdma/rpc_rdma.c | 8 ++++++++
net/sunrpc/xprtrdma/verbs.c | 35 ++++++++++++++++++++++++++---------
net/sunrpc/xprtrdma/xprt_rdma.h | 4 +++-
4 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 4034788..6b21fb8 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -71,23 +71,6 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
return -ENOMEM;
}
-/* Allocate and add receive buffers to the rpcrdma_buffer's
- * existing list of rep's. These are released when the
- * transport is destroyed.
- */
-static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
- unsigned int count)
-{
- int rc = 0;
-
- while (count--) {
- rc = rpcrdma_create_rep(r_xprt);
- if (rc)
- break;
- }
- return rc;
-}
-
/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
@@ -116,10 +99,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
if (rc)
goto out_free;
- rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
- if (rc)
- goto out_free;
-
rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
if (rc)
goto out_free;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e8adad3..d15aa27 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1331,8 +1331,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
u32 credits;
+ int total;
__be32 *p;
+ total = buf->rb_max_requests + (buf->rb_bc_srv_max_requests << 1);
+ total -= buf->rb_reps;
+ if (total > 0)
+ while (total--)
+ if (!rpcrdma_create_rep(r_xprt, false))
+ break;
+
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 6a7a5a2..af74953 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1095,11 +1095,12 @@ struct rpcrdma_req *
/**
* rpcrdma_create_rep - Allocate an rpcrdma_rep object
* @r_xprt: controlling transport
+ * @temp: destroy rep upon release
*
* Returns 0 on success or a negative errno on failure.
*/
int
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
+rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1127,9 +1128,11 @@ struct rpcrdma_req *
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
+ rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ ++buf->rb_reps;
spin_unlock(&buf->rb_lock);
return 0;
@@ -1179,11 +1182,9 @@ struct rpcrdma_req *
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i <= buf->rb_max_requests; i++) {
- rc = rpcrdma_create_rep(r_xprt);
- if (rc)
- goto out;
- }
+ rc = rpcrdma_create_rep(r_xprt, true);
+ if (rc)
+ goto out;
rc = rpcrdma_sendctxs_create(r_xprt);
if (rc)
@@ -1220,8 +1221,14 @@ struct rpcrdma_req *
static void
rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
rpcrdma_free_regbuf(rep->rr_rdmabuf);
kfree(rep);
+
+ spin_lock(&buf->rb_lock);
+ --buf->rb_reps;
+ spin_unlock(&buf->rb_lock);
}
void
@@ -1417,12 +1424,17 @@ struct rpcrdma_req *
spin_lock(&buffers->rb_lock);
buffers->rb_send_count--;
- list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
+ list_add(&req->rl_list, &buffers->rb_send_bufs);
if (rep) {
buffers->rb_recv_count--;
- list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+ if (!rep->rr_temp) {
+ list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+ rep = NULL;
+ }
}
spin_unlock(&buffers->rb_lock);
+ if (rep)
+ rpcrdma_destroy_rep(rep);
}
/*
@@ -1450,8 +1462,13 @@ struct rpcrdma_req *
spin_lock(&buffers->rb_lock);
buffers->rb_recv_count--;
- list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+ if (!rep->rr_temp) {
+ list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+ rep = NULL;
+ }
spin_unlock(&buffers->rb_lock);
+ if (rep)
+ rpcrdma_destroy_rep(rep);
}
/**
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b35d80b..5f069c7 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -196,6 +196,7 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
+ bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
@@ -401,6 +402,7 @@ struct rpcrdma_buffer {
struct list_head rb_recv_bufs;
u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
+ unsigned int rb_reps;
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -563,7 +565,7 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
-int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
prev parent reply other threads:[~2018-03-05 20:13 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-05 20:12 [PATCH 0/9] Second round of v4.17 NFS/RDMA client patches Chuck Lever
2018-03-05 20:12 ` [PATCH 1/9] SUNRPC: Move xprt_update_rtt callsite Chuck Lever
2018-03-05 20:13 ` [PATCH 2/9] SUNRPC: Make RTT measurement more precise (Receive) Chuck Lever
2018-03-05 20:13 ` [PATCH 3/9] SUNRPC: Make RTT measurement more precise (Send) Chuck Lever
2018-03-05 20:13 ` [PATCH 4/9] SUNRPC: Make num_reqs a non-atomic integer Chuck Lever
2018-03-05 20:13 ` [PATCH 5/9] SUNRPC: Initialize rpc_rqst outside of xprt->reserve_lock Chuck Lever
2018-03-06 22:02 ` Anna Schumaker
2018-03-06 22:07 ` Chuck Lever
2018-03-06 22:30 ` Chuck Lever
2018-03-07 20:00 ` Anna Schumaker
2018-03-07 20:23 ` Chuck Lever
2018-03-07 20:32 ` Anna Schumaker
2018-03-07 20:44 ` Chuck Lever
2018-03-05 20:13 ` [PATCH 6/9] SUNRPC: Add a ->free_slot transport callout Chuck Lever
2018-03-05 20:13 ` [PATCH 7/9] xprtrdma: Introduce ->alloc_slot call-out for xprtrdma Chuck Lever
2018-03-05 20:13 ` [PATCH 8/9] xprtrdma: Make rpc_rqst part of rpcrdma_req Chuck Lever
2018-03-05 20:13 ` Chuck Lever [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180305201339.10904.10673.stgit@manet.1015granger.net \
--to=chuck.lever@oracle.com \
--cc=anna.schumaker@netapp.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).