From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH RFC 08/12] xprtrdma: Wake RPCs directly in rpcrdma_wc_send path
Date: Tue, 28 May 2019 14:21:32 -0400 [thread overview]
Message-ID: <20190528182132.19012.55642.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20190528181018.19012.61210.stgit@manet.1015granger.net>
Eliminate a context switch in the path that handles RPC wake-ups
when a Receive completion has to wait for a Send completion.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 61 +++++++++++++++------------------------
net/sunrpc/xprtrdma/transport.c | 9 +++++-
net/sunrpc/xprtrdma/verbs.c | 3 +-
net/sunrpc/xprtrdma/xprt_rdma.h | 12 ++------
4 files changed, 35 insertions(+), 50 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ea39f74..6de90d4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -511,6 +511,16 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return 0;
}
+static void rpcrdma_sendctx_done(struct kref *kref)
+{
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
+ struct rpcrdma_rep *rep = req->rl_reply;
+
+ rpcrdma_complete_rqst(rep);
+ rep->rr_rxprt->rx_stats.reply_waits_for_send++;
+}
+
/**
* rpcrdma_sendctx_unmap - DMA-unmap Send buffer
* @sc: sendctx containing SGEs to unmap
@@ -520,6 +530,9 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
struct ib_sge *sge;
+ if (!sc->sc_unmap_count)
+ return;
+
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -529,9 +542,7 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
DMA_TO_DEVICE);
- if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
- &sc->sc_req->rl_flags))
- wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -666,7 +677,7 @@ static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
out:
sc->sc_wr.num_sge += sge_no;
if (sc->sc_unmap_count)
- __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_get(&req->rl_kref);
return true;
out_regbuf:
@@ -708,7 +719,7 @@ static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
- __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_init(&req->rl_kref);
ret = -EIO;
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
@@ -1268,36 +1279,12 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
goto out;
}
-/* Ensure that any DMA mapped pages associated with
- * the Send of the RPC Call have been unmapped before
- * allowing the RPC to complete. This protects argument
- * memory not controlled by the RPC client from being
- * re-used before we're done with it.
- */
-static void rpcrdma_release_tx(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req)
+static void rpcrdma_reply_done(struct kref *kref)
{
- if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
- r_xprt->rx_stats.reply_waits_for_send++;
- out_of_line_wait_on_bit(&req->rl_flags,
- RPCRDMA_REQ_F_TX_RESOURCES,
- bit_wait,
- TASK_UNINTERRUPTIBLE);
- }
-}
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
-/**
- * rpcrdma_release_rqst - Release hardware resources
- * @r_xprt: controlling transport instance
- * @req: request with resources to release
- *
- */
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
- if (!list_empty(&req->rl_registered))
- frwr_unmap_sync(r_xprt, req);
-
- rpcrdma_release_tx(r_xprt, req);
+ rpcrdma_complete_rqst(req->rl_reply);
}
/**
@@ -1367,13 +1354,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
frwr_reminv(rep, &req->rl_registered);
- if (!list_empty(&req->rl_registered)) {
+ if (!list_empty(&req->rl_registered))
frwr_unmap_async(r_xprt, req);
/* LocalInv completion will complete the RPC */
- } else {
- rpcrdma_release_tx(r_xprt, req);
- rpcrdma_complete_rqst(rep);
- }
+ else
+ kref_put(&req->rl_kref, rpcrdma_reply_done);
return;
out_badversion:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index f84375d..5b1d5d7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -618,8 +618,15 @@ static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_op_free(task, req);
+
+ if (!list_empty(&req->rl_registered))
+ frwr_unmap_sync(r_xprt, req);
+
+ /* XXX: We should wait for the Send completion here,
+ * although it's very likely it's already fired in
+ * this case.
+ */
}
/**
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 0be455b..729266e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1463,8 +1463,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
int rc;
- if (!ep->rep_send_count ||
- test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->rep_send_count = ep->rep_send_batch;
} else {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e465221..5475f0d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -44,7 +44,8 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
-#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/kref.h> /* struct kref */
#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
@@ -329,17 +330,12 @@ struct rpcrdma_req {
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct list_head rl_all;
- unsigned long rl_flags;
+ struct kref rl_kref;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
-/* rl_flags */
-enum {
- RPCRDMA_REQ_F_TX_RESOURCES,
-};
-
static inline struct rpcrdma_req *
rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
@@ -584,8 +580,6 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
next prev parent reply other threads:[~2019-05-28 18:21 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-28 18:20 [PATCH RFC 00/12] for-5.3 NFS/RDMA patches for review Chuck Lever
2019-05-28 18:20 ` [PATCH RFC 01/12] xprtrdma: Fix use-after-free in rpcrdma_post_recvs Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 02/12] xprtrdma: Replace use of xdr_stream_pos in rpcrdma_marshal_req Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 03/12] xprtrdma: Fix occasional transport deadlock Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 04/12] xprtrdma: Remove the RPCRDMA_REQ_F_PENDING flag Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 05/12] xprtrdma: Remove fr_state Chuck Lever
2019-05-30 14:05 ` Anna Schumaker
2019-05-31 13:36 ` Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 06/12] xprtrdma: Add mechanism to place MRs back on the free list Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 07/12] xprtrdma: Reduce context switching due to Local Invalidation Chuck Lever
2019-05-28 18:21 ` Chuck Lever [this message]
2019-05-28 18:21 ` [PATCH RFC 09/12] xprtrdma: Simplify rpcrdma_rep_create Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 10/12] xprtrdma: Streamline rpcrdma_post_recvs Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 11/12] xprtrdma: Refactor chunk encoding Chuck Lever
2019-05-28 18:21 ` [PATCH RFC 12/12] xprtrdma: Remove rpcrdma_req::rl_buffer Chuck Lever
2019-05-29 6:40 ` [PATCH RFC 00/12] for-5.3 NFS/RDMA patches for review Christoph Hellwig
2019-05-29 14:35 ` Chuck Lever
2019-05-31 14:32 ` Dennis Dalessandro
2019-05-31 14:34 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190528182132.19012.55642.stgit@manet.1015granger.net \
--to=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).