public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v2 03/13] xprtrdma: Pass only the list of registered MRs to ro_unmap_sync
Date: Thu, 08 Jun 2017 11:52:04 -0400	[thread overview]
Message-ID: <20170608155204.18945.50477.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20170608154339.18945.5500.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>

There are rare cases where an rpcrdma_req can be re-used (via
rpcrdma_buffer_put) while the RPC reply handler is still running.
This is due to a signal firing at just the wrong instant.

Since commit 9d6b04097882 ("xprtrdma: Place registered MWs on a
per-req list"), rpcrdma_mws are self-contained; ie., they fully
describe an MR and scatterlist, and no part of that information is
stored in struct rpcrdma_req.

As part of closing the above race window, pass only the req's list
of registered MRs to ro_unmap_sync, rather than the rpcrdma_req
itself.

Some extra transport header sanity checking is removed. Since the
client depends on its own recollection of what memory had been
registered, there doesn't seem to be a way to abuse this change.

And, the check was not terribly effective. If the client had sent
Read chunks, the "list_empty" test is negative in both of the
removed cases, which are actually looking for Write or Reply
chunks.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=305
Fixes: 68791649a725 ('xprtrdma: Invalidate in the RPC reply ... ')
Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c   |   16 +++++++++-------
 net/sunrpc/xprtrdma/frwr_ops.c  |   19 +++++++++----------
 net/sunrpc/xprtrdma/rpc_rdma.c  |   16 +++++++---------
 net/sunrpc/xprtrdma/xprt_rdma.h |    2 +-
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 21f3cd5..5556ed9 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -255,24 +255,26 @@ enum {
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
 	struct rpcrdma_mw *mw, *tmp;
 	LIST_HEAD(unmap_list);
 	int rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
 	/* ORDER: Invalidate all of the req's MRs first
 	 *
 	 * ib_unmap_fmr() is slow, so use a single call instead
 	 * of one call per mapped FMR.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list)
+	list_for_each_entry(mw, mws, mw_list) {
+		dprintk("RPC:       %s: unmapping fmr %p\n",
+			__func__, &mw->fmr);
 		list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+	}
 	r_xprt->rx_stats.local_inv_needed++;
 	rc = ib_unmap_fmr(&unmap_list);
 	if (rc)
@@ -281,7 +283,7 @@ enum {
 	/* ORDER: Now DMA unmap all of the req's MRs, and return
 	 * them to the free MW list.
 	 */
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
 		list_del_init(&mw->mw_list);
 		list_del_init(&mw->fmr.fm_mr->list);
 		ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
@@ -294,7 +296,7 @@ enum {
 out_reset:
 	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
 
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
 		list_del_init(&mw->mw_list);
 		list_del_init(&mw->fmr.fm_mr->list);
 		fmr_op_recover_mr(mw);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 31290cb..97f9f85 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -458,10 +458,11 @@
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
 	struct ib_send_wr *first, **prev, *last, *bad_wr;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -469,9 +470,7 @@
 	struct rpcrdma_mw *mw;
 	int count, rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
-	/* ORDER: Invalidate all of the req's MRs first
+	/* ORDER: Invalidate all of the MRs first
 	 *
 	 * Chain the LOCAL_INV Work Requests and post them with
 	 * a single ib_post_send() call.
@@ -479,7 +478,7 @@
 	f = NULL;
 	count = 0;
 	prev = &first;
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
+	list_for_each_entry(mw, mws, mw_list) {
 		mw->frmr.fr_state = FRMR_IS_INVALID;
 
 		if (mw->mw_flags & RPCRDMA_MW_F_RI)
@@ -528,12 +527,12 @@
 
 	wait_for_completion(&f->fr_linv_done);
 
-	/* ORDER: Now DMA unmap all of the req's MRs, and return
+	/* ORDER: Now DMA unmap all of the MRs, and return
 	 * them to the free MW list.
 	 */
 unmap:
-	while (!list_empty(&req->rl_registered)) {
-		mw = rpcrdma_pop_mw(&req->rl_registered);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
 		dprintk("RPC:       %s: DMA unmapping frmr %p\n",
 			__func__, &mw->frmr);
 		ib_dma_unmap_sg(ia->ri_device,
@@ -549,7 +548,7 @@
 	/* Find and reset the MRs in the LOCAL_INV WRs that did not
 	 * get posted. This is synchronous, and slow.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
+	list_for_each_entry(mw, mws, mw_list) {
 		f = &mw->frmr;
 		if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
 			__frwr_reset_mr(ia, mw);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2356a63..c88132d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -995,6 +995,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	__be32 *iptr;
 	int rdmalen, status, rmerr;
 	unsigned long cwnd;
+	struct list_head mws;
 
 	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
 
@@ -1024,7 +1025,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	/* Sanity checking has passed. We are now committed
 	 * to complete this transaction.
 	 */
-	rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+	list_replace_init(&req->rl_registered, &mws);
+	rpcrdma_mark_remote_invalidation(&mws, rep);
 	list_del_init(&rqst->rq_list);
 	req->rl_reply = rep;
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1042,12 +1044,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	case rdma_msg:
 		/* never expect read chunks */
 		/* never expect reply chunks (two ways to check) */
-		/* never expect write chunks without having offered RDMA */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
-		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
-		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
-		     list_empty(&req->rl_registered)))
+		     headerp->rm_body.rm_chunks[2] != xdr_zero))
 			goto badheader;
 		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
 			/* count any expected write chunks in read reply */
@@ -1084,8 +1083,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 		/* never expect read or write chunks, always reply chunks */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
-		    headerp->rm_body.rm_chunks[2] != xdr_one ||
-		    list_empty(&req->rl_registered))
+		    headerp->rm_body.rm_chunks[2] != xdr_one)
 			goto badheader;
 		iptr = (__be32 *)((unsigned char *)headerp +
 							RPCRDMA_HDRLEN_MIN);
@@ -1118,8 +1116,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	 * control: waking the next RPC waits until this RPC has
 	 * relinquished all its Send Queue entries.
 	 */
-	if (!list_empty(&req->rl_registered))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
+	if (!list_empty(&mws))
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
 
 	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2e02733..1c23117 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -467,7 +467,7 @@ struct rpcrdma_memreg_ops {
 				  struct rpcrdma_mr_seg *, int, bool,
 				  struct rpcrdma_mw **);
 	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
-					 struct rpcrdma_req *);
+					 struct list_head *);
 	void		(*ro_unmap_safe)(struct rpcrdma_xprt *,
 					 struct rpcrdma_req *, bool);
 	void		(*ro_recover_mr)(struct rpcrdma_mw *);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-06-08 15:52 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-08 15:51 [PATCH v2 00/13] NFS/RDMA client-side patches proposed for v4.13 Chuck Lever
     [not found] ` <20170608154339.18945.5500.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-06-08 15:51   ` [PATCH v2 01/13] xprtrdma: On invalidation failure, remove MWs from rl_registered Chuck Lever
2017-06-08 15:51   ` [PATCH v2 02/13] xprtrdma: Pre-mark remotely invalidated MRs Chuck Lever
2017-06-08 15:52   ` Chuck Lever [this message]
2017-06-08 15:52   ` [PATCH v2 04/13] xprtrdma: Rename rpcrdma_req::rl_free Chuck Lever
     [not found]     ` <20170608155212.18945.37327.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-06-09 18:58       ` Anna Schumaker
     [not found]         ` <7ee9b1fd-9628-2767-444e-90c6736d63f9-ZwjVKphTwtPQT0dZR+AlfA@public.gmane.org>
2017-06-09 19:03           ` Chuck Lever
     [not found]             ` <5332A4D9-ADC4-4DA4-A20E-ACC6019F83BF-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-06-09 19:12               ` Chuck Lever
     [not found]                 ` <C0C5BD8B-9D97-48F4-A0AD-C7FC17DA9C00-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-06-09 19:15                   ` Anna Schumaker
2017-06-08 15:52   ` [PATCH v2 05/13] xprtrdma: Fix client lock-up after application signal fires Chuck Lever
2017-06-08 15:52   ` [PATCH v2 06/13] xprtrdma: Fix FRWR invalidation error recovery Chuck Lever
2017-06-08 15:52   ` [PATCH v2 07/13] xprtrdma: Don't defer MR recovery if ro_map fails Chuck Lever
2017-06-08 15:52   ` [PATCH v2 08/13] NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration Chuck Lever
2017-06-08 15:52   ` [PATCH v2 09/13] NFSv4.1: Use seqid returned by EXCHANGE_ID after state migration Chuck Lever
2017-06-08 15:53   ` [PATCH v2 10/13] xprtrdma: Demote "connect" log messages Chuck Lever
2017-06-08 15:53   ` [PATCH v2 11/13] xprtrdma: FMR does not need list_del_init() Chuck Lever
2017-06-08 15:53   ` [PATCH v2 12/13] xprtrdma: Replace PAGE_MASK with offset_in_page() Chuck Lever
2017-06-08 15:53   ` [PATCH v2 13/13] xprtrdma: Fix documenting comments in frwr_ops.c Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170608155204.18945.50477.stgit@manet.1015granger.net \
    --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox