From: Shirley Ma <shirley.ma-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>,
linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH v1 10/13] xprtrdma: Release FRMR segment buffers during LOCAL_INV completion
Date: Tue, 24 Jun 2014 22:17:38 -0700 [thread overview]
Message-ID: <53AA5B72.3010200@oracle.com> (raw)
In-Reply-To: <20140623224023.1634.67233.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
Would it be possible to delay rpcrdma_buffer_put() until LOCAL_INV request send completion? remove rpcrdma_buffer_put() from xprt_rdma_free(), add a call back after LOCAL_INV completed?
Shirley
On 06/23/2014 03:40 PM, Chuck Lever wrote:
> FRMR uses a LOCAL_INV Work Request, which is asynchronous, to
> deregister segment buffers. Other registration strategies use
> synchronous deregistration mechanisms (like ib_unmap_fmr()).
>
> For a synchronous deregistration mechanism, it makes sense for
> xprt_rdma_free() to put segment buffers back into the buffer pool
> immediately once rpcrdma_deregister_external() returns.
>
> This is currently also what FRMR is doing. It is releasing segment
> buffers just after the LOCAL_INV WR is posted.
>
> But segment buffers need to be put back after the LOCAL_INV WR
> _completes_ (or flushes). Otherwise, rpcrdma_buffer_get() can then
> assign these segment buffers to another RPC task while they are
> still "in use" by the hardware.
>
> The result of re-using an FRMR too quickly is that it's rkey
> no longer matches the rkey that was registered with the provider.
> This results in FAST_REG_MR or LOCAL_INV Work Requests completing
> with IB_WC_MW_BIND_ERR, and the FRMR, and thus the transport,
> becomes unusable.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
> net/sunrpc/xprtrdma/verbs.c | 44 +++++++++++++++++++++++++++++++++++----
> net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++
> 2 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index f24f0bf..52f57f7 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -62,6 +62,8 @@
> #endif
>
> static void rpcrdma_decrement_frmr_rkey(struct rpcrdma_mw *);
> +static void rpcrdma_get_mw(struct rpcrdma_mw *);
> +static void rpcrdma_put_mw(struct rpcrdma_mw *);
>
> /*
> * internal functions
> @@ -167,6 +169,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
> if (fastreg)
> rpcrdma_decrement_frmr_rkey(mw);
> }
> + rpcrdma_put_mw(mw);
> }
>
> static int
> @@ -1034,7 +1037,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> len += cdata->padding;
> switch (ia->ri_memreg_strategy) {
> case RPCRDMA_FRMR:
> - len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
> + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
> sizeof(struct rpcrdma_mw);
> break;
> case RPCRDMA_MTHCAFMR:
> @@ -1076,7 +1079,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> r = (struct rpcrdma_mw *)p;
> switch (ia->ri_memreg_strategy) {
> case RPCRDMA_FRMR:
> - for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
> + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
> r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> ia->ri_max_frmr_depth);
> if (IS_ERR(r->r.frmr.fr_mr)) {
> @@ -1252,12 +1255,36 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
> }
>
> static void
> -rpcrdma_put_mw_locked(struct rpcrdma_mw *mw)
> +rpcrdma_free_mw(struct kref *kref)
> {
> + struct rpcrdma_mw *mw = container_of(kref, struct rpcrdma_mw, mw_ref);
> list_add_tail(&mw->mw_list, &mw->mw_pool->rb_mws);
> }
>
> static void
> +rpcrdma_put_mw_locked(struct rpcrdma_mw *mw)
> +{
> + kref_put(&mw->mw_ref, rpcrdma_free_mw);
> +}
> +
> +static void
> +rpcrdma_get_mw(struct rpcrdma_mw *mw)
> +{
> + kref_get(&mw->mw_ref);
> +}
> +
> +static void
> +rpcrdma_put_mw(struct rpcrdma_mw *mw)
> +{
> + struct rpcrdma_buffer *buffers = mw->mw_pool;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&buffers->rb_lock, flags);
> + rpcrdma_put_mw_locked(mw);
> + spin_unlock_irqrestore(&buffers->rb_lock, flags);
> +}
> +
> +static void
> rpcrdma_buffer_put_mw(struct rpcrdma_mw **mw)
> {
> rpcrdma_put_mw_locked(*mw);
> @@ -1304,6 +1331,7 @@ rpcrdma_buffer_get_mws(struct rpcrdma_req *req, struct rpcrdma_buffer *buffers)
> r = list_entry(buffers->rb_mws.next,
> struct rpcrdma_mw, mw_list);
> list_del(&r->mw_list);
> + kref_init(&r->mw_ref);
> r->mw_pool = buffers;
> req->rl_segments[i].mr_chunk.rl_mw = r;
> }
> @@ -1583,6 +1611,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> dprintk("RPC: %s: Using frmr %p to map %d segments\n",
> __func__, seg1->mr_chunk.rl_mw, i);
>
> + rpcrdma_get_mw(seg1->mr_chunk.rl_mw);
> if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.fr_state == FRMR_IS_VALID)) {
> dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
> __func__,
> @@ -1595,6 +1624,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> invalidate_wr.send_flags = IB_SEND_SIGNALED;
> invalidate_wr.ex.invalidate_rkey =
> seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
> + rpcrdma_get_mw(seg1->mr_chunk.rl_mw);
> DECR_CQCOUNT(&r_xprt->rx_ep);
> post_wr = &invalidate_wr;
> } else
> @@ -1638,6 +1668,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> *nsegs = i;
> return 0;
> out_err:
> + rpcrdma_put_mw(seg1->mr_chunk.rl_mw);
> + if (post_wr == &invalidate_wr)
> + rpcrdma_put_mw(seg1->mr_chunk.rl_mw);
> while (i--)
> rpcrdma_unmap_one(ia, --seg);
> return rc;
> @@ -1653,6 +1686,7 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>
> while (seg1->mr_nsegs--)
> rpcrdma_unmap_one(ia, seg++);
> + rpcrdma_get_mw(seg1->mr_chunk.rl_mw);
>
> memset(&invalidate_wr, 0, sizeof invalidate_wr);
> invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
> @@ -1664,9 +1698,11 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
> read_lock(&ia->ri_qplock);
> rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> read_unlock(&ia->ri_qplock);
> - if (rc)
> + if (rc) {
> + rpcrdma_put_mw(seg1->mr_chunk.rl_mw);
> dprintk("RPC: %s: failed ib_post_send for invalidate,"
> " status %i\n", __func__, rc);
> + }
> return rc;
> }
>
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index b81e5b5..7a140fe 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -44,6 +44,7 @@
> #include <linux/spinlock.h> /* spinlock_t, etc */
> #include <linux/atomic.h> /* atomic_t, etc */
> #include <linux/workqueue.h> /* struct work_struct */
> +#include <linux/kref.h>
>
> #include <rdma/rdma_cm.h> /* RDMA connection api */
> #include <rdma/ib_verbs.h> /* RDMA verbs api */
> @@ -176,6 +177,7 @@ struct rpcrdma_mw {
> } r;
> struct list_head mw_list;
> struct rpcrdma_buffer *mw_pool;
> + struct kref mw_ref;
> };
>
> #define RPCRDMA_BIT_FASTREG (0)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2014-06-25 5:17 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-23 22:39 [PATCH v1 00/13] NFS/RDMA patches for 3.17 Chuck Lever
[not found] ` <20140623223201.1634.83888.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-06-23 22:39 ` [PATCH v1 01/13] xprtrdma: Fix panic in rpcrdma_register_frmr_external() Chuck Lever
[not found] ` <20140623223909.1634.33362.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-06-24 14:37 ` Or Gerlitz
2014-06-23 22:39 ` [PATCH v1 02/13] xprtrdma: Protect ->qp during FRMR deregistration Chuck Lever
2014-06-23 22:39 ` [PATCH v1 03/13] xprtrdma: Limit data payload size for ALLPHYSICAL Chuck Lever
2014-06-23 22:39 ` [PATCH v1 04/13] xprtrdma: Update rkeys after transport reconnect Chuck Lever
2014-06-23 22:39 ` [PATCH v1 05/13] xprtrdma: Don't drain CQs on transport disconnect Chuck Lever
[not found] ` <20140623223942.1634.89063.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-07-02 19:06 ` Devesh Sharma
[not found] ` <EE7902D3F51F404C82415C4803930ACD3FE0C540-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2014-07-02 19:28 ` Steve Wise
[not found] ` <53B45D7B.4020705-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2014-07-02 19:40 ` Chuck Lever
[not found] ` <C9B761DF-7960-4346-949E-17A9BDD357DB-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2014-07-02 19:46 ` Steve Wise
2014-07-02 19:48 ` Devesh Sharma
[not found] ` <EE7902D3F51F404C82415C4803930ACD3FE0C594-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2014-07-02 19:59 ` Chuck Lever
[not found] ` <8D65ABF9-DC2B-4906-BBDE-60F03FCEF990-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2014-07-03 5:33 ` Devesh Sharma
2014-07-02 19:42 ` Devesh Sharma
[not found] ` <EE7902D3F51F404C82415C4803930ACD3FE0C57A-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2014-07-02 19:50 ` Steve Wise
2014-07-02 19:53 ` Devesh Sharma
[not found] ` <EE7902D3F51F404C82415C4803930ACD3FE0C5AE-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2014-07-02 19:56 ` Steve Wise
2014-07-02 19:57 ` Devesh Sharma
2014-07-02 19:56 ` Devesh Sharma
2014-06-23 22:39 ` [PATCH v1 06/13] xprtrdma: Unclutter struct rpcrdma_mr_seg Chuck Lever
2014-06-23 22:39 ` [PATCH v1 07/13] xprtrdma: Encode Work Request opcode in wc->wr_id Chuck Lever
2014-06-23 22:40 ` [PATCH v1 08/13] xprtrdma: Back off rkey when FAST_REG_MR fails Chuck Lever
[not found] ` <20140623224007.1634.55636.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-06-24 15:47 ` Anna Schumaker
[not found] ` <53A99DA6.90808-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2014-06-24 16:26 ` Chuck Lever
2014-06-23 22:40 ` [PATCH v1 09/13] xprtrdma: Refactor rpcrdma_buffer_put() Chuck Lever
2014-06-23 22:40 ` [PATCH v1 10/13] xprtrdma: Release FRMR segment buffers during LOCAL_INV completion Chuck Lever
[not found] ` <20140623224023.1634.67233.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-06-25 5:17 ` Shirley Ma [this message]
[not found] ` <53AA5B72.3010200-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2014-06-25 14:32 ` Chuck Lever
[not found] ` <89930B1D-AE3B-48AD-922C-6FCA754D2B01-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2014-06-25 16:14 ` Shirley Ma
2014-06-23 22:40 ` [PATCH v1 11/13] xprtrdma: Clean up rpcrdma_ep_disconnect() Chuck Lever
2014-06-23 22:40 ` [PATCH v1 12/13] xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro Chuck Lever
2014-06-23 22:40 ` [PATCH v1 13/13] xprtrdma: Handle additional connection events Chuck Lever
[not found] ` <20140623224048.1634.23972.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2014-06-24 15:58 ` Anna Schumaker
2014-06-24 14:35 ` [PATCH v1 00/13] NFS/RDMA patches for 3.17 Or Gerlitz
[not found] ` <CAJZOPZ+ix6tPDHXbVrSnVzofHSbzqOoyTBvzkEo-GJpOYOaPFA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-06-24 17:07 ` Chuck Lever
2014-06-25 22:47 ` Steve Wise
2014-06-27 16:17 ` Shirley Ma
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53AA5B72.3010200@oracle.com \
--to=shirley.ma-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
--cc=chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
--cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox