Linux NFS development
 help / color / mirror / Atom feed
* [PATCH] svcrdma: wake sq waiters when the transport closes
@ 2026-05-22 18:13 Chuck Lever
  0 siblings, 0 replies; only message in thread
From: Chuck Lever @ 2026-05-22 18:13 UTC (permalink / raw)
  To: NeilBrown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
  Cc: linux-nfs, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Threads parked in svc_rdma_sq_wait() on sc_sq_ticket_wait or
sc_send_wait can hang indefinitely in TASK_UNINTERRUPTIBLE state
across transport teardown, pinning svc_xprt references and
blocking svc_rdma_free().

The close path sets XPT_CLOSE before invoking xpo_detach and both
wait_event predicates include an XPT_CLOSE term, but the
predicates are re-evaluated only on wakeup. sc_sq_ticket_wait has
no completion-driven wake path; it is advanced solely by the
chained ticket handoff inside svc_rdma_sq_wait() itself. Without
an explicit wake at close, parked threads never observe
XPT_CLOSE, hold their svc_xprt_get reference forever, and
svc_rdma_free() blocks on xpt_ref dropping to zero.

Two close entry points reach this transport. Local teardown runs
svc_rdma_detach() from svc_handle_xprt() -> svc_delete_xprt() ->
xpo_detach() on a worker thread. A remote disconnect arrives at
svc_rdma_cma_handler(), which calls svc_xprt_deferred_close():
that sets XPT_CLOSE and enqueues the transport but does not
access either RDMA waitqueue, so a worker already parked in
svc_rdma_sq_wait() never re-evaluates its predicate. With every
worker parked on this transport, no thread is available to run
the local teardown either, and the wake site there is
unreachable.

Introduce svc_rdma_xprt_deferred_close(), a thin svcrdma wrapper
that calls svc_xprt_deferred_close() and then wakes both
sc_sq_ticket_wait and sc_send_wait. Convert the svcrdma producers
that called svc_xprt_deferred_close() directly:
svc_rdma_cma_handler(), qp_event_handler(),
svc_rdma_post_send_err(), svc_rdma_wc_send(), the sendto drop
path, the rw completion error paths, and the recvfrom flush and
read-list error paths.

Wake both waitqueues from svc_rdma_detach() as well. The
synchronous svc_xprt_close() path (backchannel ENOTCONN, device
removal via svc_rdma_xprt_done) reaches detach without flowing
through svc_xprt_deferred_close() and therefore does not invoke
the new helper.

Fixes: ccc89b9d1ed2 ("svcrdma: Add fair queuing for Send Queue access")
Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
[ cel: add svc_rdma_xprt_deferred_close() to complete the fix ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h          |  1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |  4 ++--
 net/sunrpc/xprtrdma/svc_rdma_rw.c        |  6 ++---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    |  6 ++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 30 ++++++++++++++++++++++--
 5 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 4ba39f07371d..5aadb47b3b0e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -328,6 +328,7 @@ extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
 				   unsigned int length);
 
 /* svc_rdma_transport.c */
+extern void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma);
 extern struct svc_xprt_class svc_rdma_class;
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 extern struct svc_xprt_class svc_rdma_bc_class;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 19503a12d0a2..fe9bf0371b6e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -383,7 +383,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 		trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid);
 dropped:
 	svc_rdma_recv_ctxt_put(rdma, ctxt);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -1010,7 +1010,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		if (ret == -EINVAL)
 			svc_rdma_send_error(rdma_xprt, ctxt, ret);
 		svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
-		svc_xprt_deferred_close(xprt);
+		svc_rdma_xprt_deferred_close(rdma_xprt);
 		return ret;
 	}
 	return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index cca8ec973de4..c535e6de9654 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -304,7 +304,7 @@ static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
 		trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
 	}
 
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -336,7 +336,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	 * some of the outgoing RPC message. Signal the loss
 	 * to the client by closing the connection.
 	 */
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -381,7 +381,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
 	 */
 	svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE);
 	svc_rdma_recv_ctxt_put(rdma, ctxt);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 }
 
 /*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index eceefd21bec8..7f6d17bf8c1f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -438,7 +438,7 @@ int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
 			   int sqecount, int ret)
 {
 	trace_svcrdma_sq_post_err(rdma, cid, ret);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 
 	/* If even one WR was posted, a Send completion will
 	 * return the reserved SQ slots.
@@ -480,7 +480,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 	else
 		trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
 	svc_rdma_send_ctxt_put(rdma, ctxt);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -1201,7 +1201,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	svc_rdma_send_ctxt_put(rdma, sctxt);
 drop_connection:
 	trace_svcrdma_send_err(rqstp, ret);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
+	svc_rdma_xprt_deferred_close(rdma);
 	return -ENOTCONN;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f99cd6177504..7ca71741106b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -98,10 +98,27 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+/**
+ * svc_rdma_xprt_deferred_close - Close an RDMA transport (deferred)
+ * @rdma: transport to close
+ */
+void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma)
+{
+	svc_xprt_deferred_close(&rdma->sc_xprt);
+
+	/* Release parked sc_sq_ticket_wait and sc_send_wait waiters.
+	 * Once XPT_CLOSE is observed each returns -ENOTCONN.
+	 */
+	wake_up_all(&rdma->sc_sq_ticket_wait);
+	wake_up_all(&rdma->sc_send_wait);
+}
+
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
 	struct svc_xprt *xprt = context;
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
 	trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
 	switch (event->event) {
@@ -119,7 +136,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
 	case IB_EVENT_QP_ACCESS_ERR:
 	case IB_EVENT_DEVICE_FATAL:
 	default:
-		svc_xprt_deferred_close(xprt);
+		svc_rdma_xprt_deferred_close(rdma);
 		break;
 	}
 }
@@ -341,7 +358,7 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
 		svc_xprt_enqueue(xprt);
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
-		svc_xprt_deferred_close(xprt);
+		svc_rdma_xprt_deferred_close(rdma);
 		break;
 	default:
 		break;
@@ -598,6 +615,15 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
 	rdma_disconnect(rdma->sc_cm_id);
+
+	/*
+	 * Most close paths go through svc_rdma_xprt_deferred_close(),
+	 * which wakes the SQ waitqueues. svc_xprt_close() reaches
+	 * detach without that helper, so wake any threads parked in
+	 * svc_rdma_sq_wait() here as well.
+	 */
+	wake_up_all(&rdma->sc_sq_ticket_wait);
+	wake_up_all(&rdma->sc_send_wait);
 }
 
 /**
-- 
2.54.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-05-22 18:14 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-22 18:13 [PATCH] svcrdma: wake sq waiters when the transport closes Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox