From: Sagi Grimberg <sagi@grimberg.me>
To: Chuck Lever <chuck.lever@oracle.com>,
linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: Re: [PATCH v2 16/18] xprtrdma: Add ro_unmap_safe memreg method
Date: Tue, 26 Apr 2016 23:26:43 +0300 [thread overview]
Message-ID: <571FCF03.4060406@grimberg.me> (raw)
In-Reply-To: <20160425192259.3566.58807.stgit@manet.1015granger.net>
On 25/04/16 22:22, Chuck Lever wrote:
> There needs to be a safe method of releasing registered memory
> resources when an RPC terminates. Safe can mean a number of things:
>
> + Doesn't have to sleep
>
> + Doesn't rely on having a QP in RTS
>
> ro_unmap_safe will be that safe method. It can be used in cases
> where synchronous memory invalidation can deadlock, or needs to have
> an active QP.
>
> The important case is fencing an RPC's memory regions after it is
> signaled (^C) and before it exits. If this is not done, there is a
> window where the server can write an RPC reply into memory that the
> client has released and re-used for some other purpose.
>
> Note that this is a full solution for FRWR, but FMR and physical
> still have some gaps where a particularly bad server can wreak
> some havoc on the client. These gaps are not made worse by this
> patch and are expected to be exceptionally rare and timing-based.
> They are noted in documenting comments.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> net/sunrpc/xprtrdma/fmr_ops.c | 105 +++++++++++++++++++++++++++++++++---
> net/sunrpc/xprtrdma/frwr_ops.c | 27 +++++++++
> net/sunrpc/xprtrdma/physical_ops.c | 20 +++++++
> net/sunrpc/xprtrdma/rpc_rdma.c | 5 --
> net/sunrpc/xprtrdma/transport.c | 9 +--
> net/sunrpc/xprtrdma/xprt_rdma.h | 3 +
> 6 files changed, 150 insertions(+), 19 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 9d50f3a..a658dcf 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -35,6 +35,64 @@
> /* Maximum scatter/gather per FMR */
> #define RPCRDMA_MAX_FMR_SGES (64)
>
> +static struct workqueue_struct *fmr_recovery_wq;
> +
> +#define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND)
> +
> +int
> +fmr_alloc_recovery_wq(void)
> +{
> + fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
> + return !fmr_recovery_wq ? -ENOMEM : 0;
> +}
> +
> +void
> +fmr_destroy_recovery_wq(void)
> +{
> + struct workqueue_struct *wq;
> +
> + if (!fmr_recovery_wq)
> + return;
> +
> + wq = fmr_recovery_wq;
> + fmr_recovery_wq = NULL;
> + destroy_workqueue(wq);
> +}
> +
> +static int
> +__fmr_unmap(struct rpcrdma_mw *mw)
> +{
> + LIST_HEAD(l);
> +
> + list_add(&mw->fmr.fmr->list, &l);
> + return ib_unmap_fmr(&l);
> +}
> +
> +/* Deferred reset of a single FMR. Generate a fresh rkey by
> + * replacing the MR. There's no recovery if this fails.
> + */
> +static void
> +__fmr_recovery_worker(struct work_struct *work)
> +{
> + struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw,
> + mw_work);
> + struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
> +
> + __fmr_unmap(mw);
> + rpcrdma_put_mw(r_xprt, mw);
> + return;
> +}
> +
> +/* A broken MR was discovered in a context that can't sleep.
> + * Defer recovery to the recovery worker.
> + */
> +static void
> +__fmr_queue_recovery(struct rpcrdma_mw *mw)
> +{
> + INIT_WORK(&mw->mw_work, __fmr_recovery_worker);
> + queue_work(fmr_recovery_wq, &mw->mw_work);
> +}
> +
> static int
> fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
> struct rpcrdma_create_data_internal *cdata)
> @@ -92,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
> if (IS_ERR(r->fmr.fmr))
> goto out_fmr_err;
>
> + r->mw_xprt = r_xprt;
> list_add(&r->mw_list, &buf->rb_mws);
> list_add(&r->mw_all, &buf->rb_all);
> }
> @@ -107,15 +166,6 @@ out:
> return rc;
> }
>
> -static int
> -__fmr_unmap(struct rpcrdma_mw *r)
> -{
> - LIST_HEAD(l);
> -
> - list_add(&r->fmr.fmr->list, &l);
> - return ib_unmap_fmr(&l);
> -}
> -
> /* Use the ib_map_phys_fmr() verb to register a memory region
> * for remote access via RDMA READ or RDMA WRITE.
> */
> @@ -242,6 +292,42 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
> req->rl_nchunks = 0;
> }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + *
> + * In the asynchronous case, DMA unmapping occurs first here
> + * because the rpcrdma_mr_seg is released immediately after this
> + * call. It's contents won't be available in __fmr_dma_unmap later.
> + * FIXME.
> + */
> +static void
> +fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> + bool sync)
> +{
> + struct rpcrdma_mr_seg *seg;
> + struct rpcrdma_mw *mw;
> + unsigned int i;
> +
> + for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
> + seg = &req->rl_segments[i];
> + mw = seg->rl_mw;
> +
> + if (sync) {
> + /* ORDER */
> + __fmr_unmap(mw);
> + __fmr_dma_unmap(r_xprt, seg);
> + rpcrdma_put_mw(r_xprt, mw);
> + } else {
> + __fmr_dma_unmap(r_xprt, seg);
> + __fmr_queue_recovery(mw);
> + }
> +
> + i += seg->mr_nsegs;
> + seg->mr_nsegs = 0;
> + seg->rl_mw = NULL;
> + }
> +}
> +
> /* Use the ib_unmap_fmr() verb to prevent further remote
> * access via RDMA READ or RDMA WRITE.
> */
> @@ -295,6 +381,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
> const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
> .ro_map = fmr_op_map,
> .ro_unmap_sync = fmr_op_unmap_sync,
> + .ro_unmap_safe = fmr_op_unmap_safe,
> .ro_unmap = fmr_op_unmap,
> .ro_open = fmr_op_open,
> .ro_maxpages = fmr_op_maxpages,
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 1251a1d..79ba323 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -614,6 +614,32 @@ reset_mrs:
> goto unmap;
> }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + */
> +static void
> +frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> + bool sync)
> +{
> + struct rpcrdma_mr_seg *seg;
> + struct rpcrdma_mw *mw;
> + unsigned int i;
> +
> + for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
> + seg = &req->rl_segments[i];
> + mw = seg->rl_mw;
> +
> + if (sync)
> + __frwr_reset_and_unmap(r_xprt, mw);
> + else
> + __frwr_queue_recovery(mw);
> +
> + i += seg->mr_nsegs;
> + seg->mr_nsegs = 0;
> + seg->rl_mw = NULL;
> + }
> +}
> +
> /* Post a LOCAL_INV Work Request to prevent further remote access
> * via RDMA READ or RDMA WRITE.
> */
> @@ -675,6 +701,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
> const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
> .ro_map = frwr_op_map,
> .ro_unmap_sync = frwr_op_unmap_sync,
> + .ro_unmap_safe = frwr_op_unmap_safe,
> .ro_unmap = frwr_op_unmap,
> .ro_open = frwr_op_open,
> .ro_maxpages = frwr_op_maxpages,
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 2dc6ec2..95ef3a7 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -97,6 +97,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
> rpcrdma_unmap_one(device, &req->rl_segments[i++]);
> }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + *
> + * For physical memory registration, there is no good way to
> + * fence a single MR that has been advertised to the server. The
> + * client has already handed the server an R_key that cannot be
> + * invalidated and is shared by all MRs on this connection.
> + * Tearing down the PD might be the only safe choice, but it's
> + * not clear that a freshly acquired DMA R_key would be different
> + * than the one used by the PD that was just destroyed.
> + * FIXME.
> + */
> +static void
> +physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> + bool sync)
> +{
> + physical_op_unmap_sync(r_xprt, req);
> +}
> +
So physical has no async mode?
Is there a device that makes you resort to physical memreg?
It's an awful lot of maintenance on what looks to be a esoteric (at
best) code path.
The rest looks fine to me.
next prev parent reply other threads:[~2016-04-26 20:26 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-25 19:20 [PATCH v2 00/18] NFS/RDMA client patches for v4.7 Chuck Lever
2016-04-25 19:20 ` [PATCH v2 01/18] sunrpc: Advertise maximum backchannel payload size Chuck Lever
2016-04-25 19:21 ` [PATCH v2 02/18] xprtrdma: Bound the inline threshold values Chuck Lever
2016-04-25 19:21 ` [PATCH v2 03/18] xprtrdma: Limit number of RDMA segments in RPC-over-RDMA headers Chuck Lever
2016-04-26 19:43 ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 04/18] xprtrdma: Prevent inline overflow Chuck Lever
2016-04-26 19:55 ` Sagi Grimberg
2016-04-26 20:04 ` Chuck Lever
2016-04-26 20:42 ` Sagi Grimberg
2016-04-26 20:56 ` Chuck Lever
2016-04-25 19:21 ` [PATCH v2 05/18] xprtrdma: Avoid using Write list for small NFS READ requests Chuck Lever
2016-04-26 19:56 ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 06/18] xprtrdma: Update comments in rpcrdma_marshal_req() Chuck Lever
2016-04-26 19:57 ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 07/18] xprtrdma: Allow Read list and Reply chunk simultaneously Chuck Lever
2016-04-26 20:04 ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 08/18] xprtrdma: Remove rpcrdma_create_chunks() Chuck Lever
2016-04-26 20:04 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 09/18] xprtrdma: Use core ib_drain_qp() API Chuck Lever
2016-04-26 20:07 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 10/18] xprtrdma: Rename rpcrdma_frwr::sg and sg_nents Chuck Lever
2016-04-26 20:08 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 11/18] xprtrdma: Save I/O direction in struct rpcrdma_frwr Chuck Lever
2016-04-26 20:12 ` Sagi Grimberg
2016-04-26 20:14 ` Chuck Lever
2016-04-25 19:22 ` [PATCH v2 12/18] xprtrdma: Reset MRs in frwr_op_unmap_sync() Chuck Lever
2016-04-26 20:13 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 13/18] xprtrdma: Refactor the FRWR recovery worker Chuck Lever
2016-04-26 20:16 ` Sagi Grimberg
2016-04-26 20:30 ` Chuck Lever
2016-04-26 20:33 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 14/18] xprtrdma: Move fr_xprt and fr_worker to struct rpcrdma_mw Chuck Lever
2016-04-26 20:18 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 15/18] xprtrdma: Refactor __fmr_dma_unmap() Chuck Lever
2016-04-26 20:21 ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 16/18] xprtrdma: Add ro_unmap_safe memreg method Chuck Lever
2016-04-26 20:26 ` Sagi Grimberg [this message]
2016-04-26 20:44 ` Chuck Lever
2016-04-27 15:59 ` Removing NFS/RDMA client support for PHYSICAL memory registration Chuck Lever
2016-04-28 10:59 ` Sagi Grimberg
2016-04-25 19:23 ` [PATCH v2 17/18] xprtrdma: Remove ro_unmap() from all registration modes Chuck Lever
2016-04-26 20:29 ` Sagi Grimberg
2016-04-26 20:46 ` Chuck Lever
2016-04-26 20:50 ` Sagi Grimberg
2016-04-25 19:23 ` [PATCH v2 18/18] xprtrdma: Faster server reboot recovery Chuck Lever
2016-04-26 20:31 ` Sagi Grimberg
2016-04-26 14:13 ` [PATCH v2 00/18] NFS/RDMA client patches for v4.7 Steve Wise
2016-04-26 14:57 ` Chuck Lever
2016-04-26 16:45 ` Steve Wise
2016-04-26 17:15 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=571FCF03.4060406@grimberg.me \
--to=sagi@grimberg.me \
--cc=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).