public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v1 2/7] xprtrdma: Handle stale connection rejection
Date: Thu, 26 Jan 2017 12:55:47 -0500	[thread overview]
Message-ID: <20170126175547.5794.86451.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20170126174806.5794.14678.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>

A server rejects a connection attempt with STALE_CONNECTION when a
client attempts to connect to a working remote service, but uses a
QPN and GUID that corresponds to an old connection that was
abandoned. This might occur after a client crashes and restarts.

Fix rpcrdma_conn_upcall() to distinguish between a normal rejection
and rejection of stale connection parameters.

As an additional clean-up, remove the code that retries the
connection attempt with different ORD/IRD values. Code audit of
other ULP initiators shows no similar special case handling of
initiator_depth or responder_resources.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/verbs.c |   65 +++++++++++++------------------------------
 1 file changed, 20 insertions(+), 45 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 61d16c3..45db2b4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -54,6 +54,7 @@
 #include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
 #include <linux/module.h> /* try_module_get()/module_put() */
+#include <rdma/ib_cm.h>
 
 #include "xprt_rdma.h"
 
@@ -279,7 +280,14 @@
 		connstate = -ENETDOWN;
 		goto connected;
 	case RDMA_CM_EVENT_REJECTED:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+		pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
+			sap, rpc_get_port(sap), ia->ri_device->name,
+			rdma_reject_msg(id, event->status));
+#endif
 		connstate = -ECONNREFUSED;
+		if (event->status == IB_CM_REJ_STALE_CONN)
+			connstate = -EAGAIN;
 		goto connected;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
@@ -643,20 +651,20 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
 int
 rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
+	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+						   rx_ia);
 	struct rdma_cm_id *id, *old;
+	unsigned int extras;
 	int rc = 0;
-	int retry_count = 0;
 
 	if (ep->rep_connected != 0) {
-		struct rpcrdma_xprt *xprt;
 retry:
 		dprintk("RPC:       %s: reconnecting...\n", __func__);
 
 		rpcrdma_ep_disconnect(ep, ia);
 
-		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-		id = rpcrdma_create_id(xprt, ia,
-				(struct sockaddr *)&xprt->rx_data.addr);
+		id = rpcrdma_create_id(r_xprt, ia,
+				(struct sockaddr *)&r_xprt->rx_data.addr);
 		if (IS_ERR(id)) {
 			rc = -EHOSTUNREACH;
 			goto out;
@@ -711,51 +719,18 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
 	}
 
 	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
-
-	/*
-	 * Check state. A non-peer reject indicates no listener
-	 * (ECONNREFUSED), which may be a transient state. All
-	 * others indicate a transport condition which has already
-	 * undergone a best-effort.
-	 */
-	if (ep->rep_connected == -ECONNREFUSED &&
-	    ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
-		dprintk("RPC:       %s: non-peer_reject, retry\n", __func__);
-		goto retry;
-	}
 	if (ep->rep_connected <= 0) {
-		/* Sometimes, the only way to reliably connect to remote
-		 * CMs is to use same nonzero values for ORD and IRD. */
-		if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
-		    (ep->rep_remote_cma.responder_resources == 0 ||
-		     ep->rep_remote_cma.initiator_depth !=
-				ep->rep_remote_cma.responder_resources)) {
-			if (ep->rep_remote_cma.responder_resources == 0)
-				ep->rep_remote_cma.responder_resources = 1;
-			ep->rep_remote_cma.initiator_depth =
-				ep->rep_remote_cma.responder_resources;
+		if (ep->rep_connected == -EAGAIN)
 			goto retry;
-		}
 		rc = ep->rep_connected;
-	} else {
-		struct rpcrdma_xprt *r_xprt;
-		unsigned int extras;
-
-		dprintk("RPC:       %s: connected\n", __func__);
-
-		r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-		extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
-
-		if (extras) {
-			rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
-			if (rc) {
-				pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
-					__func__, rc);
-				rc = 0;
-			}
-		}
+		goto out;
 	}
 
+	dprintk("RPC:       %s: connected\n", __func__);
+	extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
+	if (extras)
+		rpcrdma_ep_post_extra_recv(r_xprt, extras);
+
 out:
 	if (rc)
 		ep->rep_connected = rc;

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-01-26 17:55 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-26 17:55 [PATCH v1 0/7] NFS/RDMA client-side patches for 4.11 Chuck Lever
     [not found] ` <20170126174806.5794.14678.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-01-26 17:55   ` [PATCH v1 1/7] xprtrdma: Properly recover FRWRs with in-flight FASTREG WRs Chuck Lever
2017-01-26 17:55   ` Chuck Lever [this message]
2017-01-26 17:55   ` [PATCH v1 3/7] xprtrdma: Refactor management of mw_list field Chuck Lever
2017-01-26 17:56   ` [PATCH v1 4/7] sunrpc: Allow xprt->ops->timer method to sleep Chuck Lever
2017-01-26 17:56   ` [PATCH v1 5/7] sunrpc: Enable calls to rpc_call_null_helper() from other modules Chuck Lever
     [not found]     ` <20170126175611.5794.20844.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-01-27 22:07       ` Anna Schumaker
     [not found]         ` <edab4aee-158d-0d5d-6ef2-9a44e63a929c-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-01-27 22:10           ` Chuck Lever
2017-01-26 17:56   ` [PATCH v1 6/7] xprtrdma: Detect unreachable NFS/RDMA servers more reliably Chuck Lever
2017-01-26 17:56   ` [PATCH v1 7/7] sunrpc: Allow keepalive ping on a credit-full transport Chuck Lever
     [not found]     ` <20170126175628.5794.56813.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-01-30 20:29       ` Anna Schumaker
     [not found]         ` <5268b237-2c33-2058-d4b2-798afe021243-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-01-30 20:32           ` Chuck Lever
     [not found]             ` <7694B03F-328F-472A-8A41-F3D6DCF0125E-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-01-30 21:36               ` Anna Schumaker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170126175547.5794.86451.stgit@manet.1015granger.net \
    --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox