From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v1 07/14] xprtrdma: Introduce an FRMR recovery workqueue
Date: Mon, 04 May 2015 13:57:49 -0400 [thread overview]
Message-ID: <20150504175749.3483.56570.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20150504174626.3483.97639.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
After a transport disconnect, FRMRs can be left in an undetermined
state. In particular, the MR's rkey is no good.
Currently, FRMRs are fixed up by the transport connect worker, but
that can race with ->ro_unmap if an RPC happens to exit while the
transport connect worker is running.
A better way of dealing with broken FRMRs is to detect them before
they are re-used by ->ro_map. Such FRMRs are either already invalid
or are owned by the sending RPC, and thus no race with ->ro_unmap
is possible.
Introduce a mechanism for handing broken FRMRs to a workqueue to be
reset in a context that is appropriate for allocating resources
(ie. an ib_alloc_fast_reg_mr() API call).
This mechanism is not yet used, but will be in subsequent patches.
Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
net/sunrpc/xprtrdma/frwr_ops.c | 71 ++++++++++++++++++++++++++++++++++++++-
net/sunrpc/xprtrdma/transport.c | 11 +++++-
net/sunrpc/xprtrdma/xprt_rdma.h | 5 +++
3 files changed, 84 insertions(+), 3 deletions(-)
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 66a85fa..a06d9a3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,6 +17,74 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+static struct workqueue_struct *frwr_recovery_wq;
+
+#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM)
+
+int
+frwr_alloc_recovery_wq(void)
+{
+ frwr_recovery_wq = alloc_workqueue("frwr_recovery",
+ FRWR_RECOVERY_WQ_FLAGS, 0);
+ return !frwr_recovery_wq ? -ENOMEM : 0;
+}
+
+void
+frwr_destroy_recovery_wq(void)
+{
+ struct workqueue_struct *wq;
+
+ if (!frwr_recovery_wq)
+ return;
+
+ wq = frwr_recovery_wq;
+ frwr_recovery_wq = NULL;
+ destroy_workqueue(wq);
+}
+
+/* Deferred reset of a single FRMR. Generate a fresh rkey by
+ * replacing the MR.
+ *
+ * There's no recovery if this fails. The FRMR is abandoned, but
+ * remains in rb_all. It will be cleaned up when the transport is
+ * destroyed.
+ */
+static void
+__frwr_recovery_worker(struct work_struct *work)
+{
+ struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
+ r.frmr.fr_work);
+ struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
+ unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+ struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+
+ if (ib_dereg_mr(r->r.frmr.fr_mr))
+ goto out_fail;
+
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+ if (IS_ERR(r->r.frmr.fr_mr))
+ goto out_fail;
+
+ dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+ rpcrdma_put_mw(r_xprt, r);
+ return;
+
+out_fail:
+ pr_warn("RPC: %s: FRMR %p unrecovered\n",
+ __func__, r);
+}
+
+/* A broken MR was discovered in a context that can't sleep.
+ * Defer recovery to the recovery worker.
+ */
+static void
+__frwr_queue_recovery(struct rpcrdma_mw *r)
+{
+ INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
+ queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
+}
+
static int
__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
unsigned int depth)
@@ -128,7 +196,7 @@ frwr_sendcompletion(struct ib_wc *wc)
/* WARNING: Only wr_id and status are reliable at this point */
r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- dprintk("RPC: %s: frmr %p (stale), status %d\n",
+ pr_warn("RPC: %s: frmr %p flushed, status %d\n",
__func__, r, wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE;
}
@@ -165,6 +233,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
list_add(&r->mw_list, &buf->rb_mws);
list_add(&r->mw_all, &buf->rb_all);
r->mw_sendcompletion = frwr_sendcompletion;
+ r->r.frmr.fr_xprt = r_xprt;
}
return 0;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ed70551..f1fa6a7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -757,17 +757,24 @@ static void __exit xprt_rdma_cleanup(void)
if (rc)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
+
+ frwr_destroy_recovery_wq();
}
static int __init xprt_rdma_init(void)
{
int rc;
- rc = xprt_register_transport(&xprt_rdma);
-
+ rc = frwr_alloc_recovery_wq();
if (rc)
return rc;
+ rc = xprt_register_transport(&xprt_rdma);
+ if (rc) {
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 7de424e..98227d6 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -204,6 +204,8 @@ struct rpcrdma_frmr {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
+ struct work_struct fr_work;
+ struct rpcrdma_xprt *fr_xprt;
};
struct rpcrdma_mw {
@@ -429,6 +431,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+int frwr_alloc_recovery_wq(void);
+void frwr_destroy_recovery_wq(void);
+
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2015-05-04 17:57 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-04 17:56 [PATCH v1 00/14] client NFS/RDMA patches for 4.2 Chuck Lever
[not found] ` <20150504174626.3483.97639.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-04 17:56 ` [PATCH v1 01/14] xprtrdma: Transport fault injection Chuck Lever
[not found] ` <20150504175651.3483.35554.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-05 13:49 ` Anna Schumaker
[not found] ` <5548CA82.9060903-ZwjVKphTwtPQT0dZR+AlfA@public.gmane.org>
2015-05-05 13:53 ` Chuck Lever
[not found] ` <06C797AA-4E45-49DA-8CA8-2B8D22B818DB-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-05 14:44 ` Anna Schumaker
[not found] ` <5548D741.7090803-ZwjVKphTwtPQT0dZR+AlfA@public.gmane.org>
2015-05-05 15:15 ` Chuck Lever
[not found] ` <2098B4A5-48C7-4458-BAC6-10F64359C405-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-05 15:16 ` Anna Schumaker
2015-05-05 15:10 ` Steve Wise
2015-05-04 17:57 ` [PATCH v1 02/14] xprtrdma: Warn when there are orphaned IB objects Chuck Lever
[not found] ` <20150504175700.3483.57728.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-06 11:37 ` Devesh Sharma
[not found] ` <CANjDDBicLDb7K=RZB5Gqv+oACaAG8DANJX3ML971jnXd_5KEuQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-05-06 13:24 ` Chuck Lever
[not found] ` <963F9850-38D0-4434-88E8-14BC42F74499-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-06 14:05 ` Sagi Grimberg
2015-05-06 14:22 ` Devesh Sharma
[not found] ` <CANjDDBg3Ey0DEM3n9PY9VLW+chF0=AJPghYOopo1a9vL3CKzKg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-05-06 16:48 ` Jason Gunthorpe
[not found] ` <20150506164817.GC11331-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-05-07 7:53 ` Devesh Sharma
2015-05-04 17:57 ` [PATCH v1 03/14] xprtrdma: Replace rpcrdma_rep::rr_buffer with rr_rxprt Chuck Lever
[not found] ` <20150504175711.3483.17222.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 9:38 ` Sagi Grimberg
[not found] ` <554B328B.9040804-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-07 13:25 ` Chuck Lever
2015-05-04 17:57 ` [PATCH v1 04/14] xprtrdma: Use ib_device pointer safely Chuck Lever
[not found] ` <20150504175720.3483.80356.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:00 ` Sagi Grimberg
[not found] ` <554B37CF.2070206-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-07 13:39 ` Chuck Lever
[not found] ` <E1ADA91B-45DA-46B6-A114-E2600613969A-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-07 13:56 ` Sagi Grimberg
[not found] ` <554B6F2A.6000608-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-07 14:12 ` Chuck Lever
[not found] ` <BDE22240-DC37-4C54-B71E-D88EF54D3119-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-07 15:11 ` Sagi Grimberg
[not found] ` <554B80B7.8090900-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-11 15:22 ` Chuck Lever
2015-05-11 18:26 ` Hefty, Sean
[not found] ` <1828884A29C6694DAF28B7E6B8A82373A8FCE01F-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2015-05-11 18:57 ` Chuck Lever
2015-05-12 10:01 ` Sagi Grimberg
2015-05-04 17:57 ` [PATCH v1 05/14] xprtrdma: Introduce helpers for allocating MWs Chuck Lever
[not found] ` <20150504175730.3483.51996.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:16 ` Sagi Grimberg
2015-05-04 17:57 ` [PATCH v1 06/14] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external() Chuck Lever
[not found] ` <20150504175739.3483.46010.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:15 ` Sagi Grimberg
2015-05-04 17:57 ` Chuck Lever [this message]
[not found] ` <20150504175749.3483.56570.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:37 ` [PATCH v1 07/14] xprtrdma: Introduce an FRMR recovery workqueue Devesh Sharma
2015-05-04 17:57 ` [PATCH v1 08/14] xprtrdma: Acquire MRs in rpcrdma_register_external() Chuck Lever
[not found] ` <20150504175758.3483.44890.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:31 ` Sagi Grimberg
[not found] ` <554B3EEB.7070302-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:24 ` Devesh Sharma
[not found] ` <CANjDDBiGLcaAofGwz6OGEXUUE_b2rcZepv0ebvTc-XNVEBq5Mw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-05-08 15:40 ` Chuck Lever
[not found] ` <6FBAAAF3-3E70-418F-A887-C022525D6C4F-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-05-10 10:17 ` Sagi Grimberg
2015-05-04 17:58 ` [PATCH v1 09/14] xprtrdma: Remove unused LOCAL_INV recovery logic Chuck Lever
[not found] ` <20150504175808.3483.57643.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:35 ` Sagi Grimberg
[not found] ` <554B3FFA.5020101-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:31 ` Devesh Sharma
2015-05-04 17:58 ` [PATCH v1 10/14] xprtrdma: Remove ->ro_reset Chuck Lever
[not found] ` <20150504175818.3483.22408.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:36 ` Sagi Grimberg
[not found] ` <554B402F.3000604-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:33 ` Devesh Sharma
2015-05-04 17:58 ` [PATCH v1 11/14] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Chuck Lever
[not found] ` <20150504175827.3483.62904.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:36 ` Sagi Grimberg
[not found] ` <554B404B.9090301-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:34 ` Devesh Sharma
2015-05-04 17:58 ` [PATCH v1 12/14] xprtrdma: Split rb_lock Chuck Lever
[not found] ` <20150504175837.3483.28838.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:37 ` Sagi Grimberg
2015-05-04 17:58 ` [PATCH v1 13/14] xprtrdma: Stack relief in fmr_op_map() Chuck Lever
[not found] ` <20150504175846.3483.32959.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 10:50 ` Sagi Grimberg
[not found] ` <554B436B.5040108-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:36 ` Devesh Sharma
2015-05-04 17:58 ` [PATCH v1 14/14] xprtrmda: Reduce per-transport MR allocation Chuck Lever
[not found] ` <20150504175856.3483.57373.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-05-07 11:00 ` Sagi Grimberg
[not found] ` <554B45E7.4040902-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-05-08 15:53 ` Devesh Sharma
2015-05-05 15:17 ` [PATCH v1 00/14] client NFS/RDMA patches for 4.2 Steve Wise
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150504175749.3483.56570.stgit@manet.1015granger.net \
--to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
--cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox