From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Chuck Lever <chuck.lever@oracle.com>,
Anna Schumaker <Anna.Schumaker@Netapp.com>,
Sasha Levin <sashal@kernel.org>,
linux-nfs@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 4.20 304/304] xprtrdma: Don't wake pending tasks until disconnect is done
Date: Mon, 28 Jan 2019 10:43:41 -0500 [thread overview]
Message-ID: <20190128154341.47195-304-sashal@kernel.org> (raw)
In-Reply-To: <20190128154341.47195-1-sashal@kernel.org>
From: Chuck Lever <chuck.lever@oracle.com>
[ Upstream commit 0c0829bcf51aef713806e49b8ea2bac7962f54e2 ]
Transport disconnect processing does a "wake pending tasks" at
various points.
Suppose an RPC Reply is being processed. The RPC task that Reply
goes with is waiting on the pending queue. If a disconnect wake-up
happens before reply processing is done, that reply, even if it is
good, is thrown away, and the RPC has to be sent again.
This window apparently does not exist for socket transports because
there is a lock held while a reply is being received which prevents
the wake-up call until after reply processing is done.
To resolve this, all RPC replies being processed on an RPC-over-RDMA
transport have to complete before pending tasks are awoken due to a
transport disconnect.
Callers that already hold the transport write lock may invoke
->ops->close directly. Others use a generic helper that schedules
a close when the write lock can be taken safely.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
net/sunrpc/xprtrdma/backchannel.c | 13 +++++++------
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 +++++---
net/sunrpc/xprtrdma/transport.c | 17 ++++++++++-------
net/sunrpc/xprtrdma/verbs.c | 1 -
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
5 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e5b367a3e517..88fde80b9347 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -193,14 +193,15 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*/
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
int rc;
- if (!xprt_connected(rqst->rq_xprt))
- goto drop_connection;
+ if (!xprt_connected(xprt))
+ return -ENOTCONN;
- if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+ if (!xprt_request_get_cong(xprt, rqst))
return -EBADSLT;
rc = rpcrdma_bc_marshal_reply(rqst);
@@ -216,7 +217,7 @@ failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
- xprt_disconnect_done(rqst->rq_xprt);
+ xprt_rdma_close(xprt);
return -ENOTCONN;
}
@@ -339,7 +340,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
out_overflow:
pr_warn("RPC/RDMA backchannel overflow\n");
- xprt_disconnect_done(xprt);
+ xprt_force_disconnect(xprt);
/* This receive buffer gets reposted automatically
* when the connection is re-established.
*/
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index f3c147d70286..b908f2ca08fd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
svc_rdma_send_ctxt_put(rdma, ctxt);
goto drop_connection;
}
- return rc;
+ return 0;
drop_connection:
dprintk("svcrdma: failed to send bc call\n");
- xprt_disconnect_done(xprt);
return -ENOTCONN;
}
@@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
ret = -ENOTCONN;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ }
mutex_unlock(&sxprt->xpt_mutex);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ae2a83828953..e7683d3b1e6c 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -437,8 +437,7 @@ out1:
* Caller holds @xprt's send lock to prevent activity on this
* transport while the connection is torn down.
*/
-static void
-xprt_rdma_close(struct rpc_xprt *xprt)
+void xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
@@ -449,13 +448,13 @@ xprt_rdma_close(struct rpc_xprt *xprt)
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
xprt_clear_connected(xprt);
rpcrdma_ia_remove(ia);
- return;
+ goto out;
}
+
if (ep->rep_connected == -ENODEV)
return;
if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0;
- xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(ep, ia);
/* Prepare @xprt for the next connection by reinitializing
@@ -463,6 +462,10 @@ xprt_rdma_close(struct rpc_xprt *xprt)
*/
r_xprt->rx_buf.rb_credits = 1;
xprt->cwnd = RPC_CWNDSHIFT;
+
+out:
+ ++xprt->connect_cookie;
+ xprt_disconnect_done(xprt);
}
/**
@@ -713,7 +716,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
if (!xprt_connected(xprt))
- goto drop_connection;
+ return -ENOTCONN;
if (!xprt_request_get_cong(xprt, rqst))
return -EBADSLT;
@@ -745,8 +748,8 @@ failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
- xprt_disconnect_done(xprt);
- return -ENOTCONN; /* implies disconnect */
+ xprt_rdma_close(xprt);
+ return -ENOTCONN;
}
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b9bc7f9f6bb9..919fddec0197 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -316,7 +316,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->rep_connected = -EAGAIN;
goto disconnected;
case RDMA_CM_EVENT_DISCONNECTED:
- ++xprt->connect_cookie;
ep->rep_connected = -ECONNABORTED;
disconnected:
xprt_force_disconnect(xprt);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a13ccb643ce0..0af75b1405f8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -653,6 +653,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
extern unsigned int xprt_rdma_max_inline_read;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_close(struct rpc_xprt *xprt);
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
--
2.19.1
prev parent reply other threads:[~2019-01-28 15:59 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20190128154341.47195-1-sashal@kernel.org>
2019-01-28 15:38 ` [PATCH AUTOSEL 4.20 015/304] wil6210: fix reset flow for Talyn-mb Sasha Levin
2019-01-28 15:38 ` [PATCH AUTOSEL 4.20 016/304] wil6210: fix memory leak in wil_find_tx_bcast_2 Sasha Levin
2019-01-28 15:38 ` [PATCH AUTOSEL 4.20 017/304] ath10k: assign 'n_cipher_suites' for WCN3990 Sasha Levin
2019-01-28 15:38 ` [PATCH AUTOSEL 4.20 018/304] ath9k: dynack: use authentication messages for 'late' ack Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 030/304] bpftool: Improve handling of ENOENT on map dumps Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 036/304] ptp: check gettime64 return code in PTP_SYS_OFFSET ioctl Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 038/304] dpaa2-ptp: defer probe when portal allocation failed Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 039/304] sctp: Fix SKB list traversal in sctp_intl_store_ordered() Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 040/304] sctp: Fix SKB list traversal in sctp_intl_store_reasm() Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 041/304] iwlwifi: fw: do not set sgi bits for HE connection Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 048/304] i40e: suppress bogus error message Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 049/304] i40e: prevent overlapping tx_timeout recover Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 052/304] usbnet: smsc95xx: fix rx packet alignment Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 057/304] bpf: libbpf: retry map creation without the name Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 058/304] net/mlx5: EQ, Use the right place to store/read IRQ affinity hint Sasha Levin
2019-01-28 15:39 ` [PATCH AUTOSEL 4.20 076/304] ptp: Fix pass zero to ERR_PTR() in ptp_clock_register Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 092/304] net: aquantia: return 'err' if set MPI_DEINIT state fails Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 104/304] mt76x0: dfs: fix IBI_R11 configuration on non-radar channels Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 105/304] mt76x0: use band parameter for LC calibration Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 109/304] nfp: add locking around representor changes Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 127/304] mac80211: fix deauth TX when we disconnect Sasha Levin
2019-01-28 15:40 ` [PATCH AUTOSEL 4.20 130/304] tipc: fix node keep alive interval calculation Sasha Levin
2019-01-28 15:41 ` [PATCH AUTOSEL 4.20 169/304] mlxsw: spectrum_acl: Limit priority value Sasha Levin
2019-01-28 15:41 ` [PATCH AUTOSEL 4.20 172/304] selftests/bpf: use __bpf_constant_htons in test_prog.c Sasha Levin
2019-01-28 15:41 ` [PATCH AUTOSEL 4.20 179/304] ipv6: Fix handling of LLA with VRF and sockets bound to VRF Sasha Levin
2019-01-28 15:41 ` [PATCH AUTOSEL 4.20 180/304] tools: bpftool: fix -Wmissing declaration warnings Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 203/304] bnxt_en: Disable MSIX before re-reserving NQs/CMPL rings Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 207/304] mac80211: Properly handle SKB with radiotap only Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 216/304] net: hns3: fix error handling int the hns3_get_vector_ring_chain Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 217/304] net: hns3: fix incomplete uninitialization of IRQ in the hns3_nic_uninit_vector_data() Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 223/304] mac80211: fix radiotap vendor presence bitmap handling Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 224/304] xfrm6_tunnel: Fix spi check in __xfrm6_tunnel_alloc_spi Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 225/304] mlxsw: spectrum: Properly cleanup LAG uppers when removing port from LAG Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 229/304] cw1200: Fix concurrency use-after-free bugs in cw1200_hw_scan() Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 230/304] net: hns3: add max vector number check for pf Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 231/304] net: hns3: fix the descriptor index when get rss type Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 232/304] net: hns3: don't restore rules when flow director is disabled Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 234/304] iwlwifi: mvm: fix setting HE ppe FW config Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 236/304] mlx5: update timecounter at least twice per counter overflow Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 241/304] ath10k: fix kernel panic due to use after free Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 242/304] ath10k: fix tx_stats memory leak Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 247/304] i40e: define proper net_device::neigh_priv_len Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 248/304] ice: Do not enable NAPI on q_vectors that have no rings Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 249/304] igb: Fix an issue that PME is not enabled during runtime suspend Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 252/304] bpf: sk_msg, zap ingress queue on psock down Sasha Levin
2019-01-28 15:42 ` [PATCH AUTOSEL 4.20 254/304] bpf: sk_msg, fix socket data_ready events Sasha Levin
2019-01-28 15:43 ` [PATCH AUTOSEL 4.20 272/304] niu: fix missing checks of niu_pci_eeprom_read Sasha Levin
2019-01-28 15:43 ` [PATCH AUTOSEL 4.20 283/304] isdn: hisax: hfc_pci: Fix a possible concurrency use-after-free bug in HFCPCI_l1hw() Sasha Levin
2019-01-28 15:43 ` [PATCH AUTOSEL 4.20 285/304] fsl/fman: Use GFP_ATOMIC in {memac,tgec}_add_hash_mac_address() Sasha Levin
2019-01-28 15:43 ` [PATCH AUTOSEL 4.20 293/304] bpf: fix check_map_access smin_value test when pointer contains offset Sasha Levin
2019-01-28 15:43 ` Sasha Levin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190128154341.47195-304-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=Anna.Schumaker@Netapp.com \
--cc=chuck.lever@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).