linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sagi Grimberg <sagi@grimberg.me>
To: Chuck Lever <chuck.lever@oracle.com>,
	linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: Re: [PATCH v2 16/18] xprtrdma: Add ro_unmap_safe memreg method
Date: Tue, 26 Apr 2016 23:26:43 +0300	[thread overview]
Message-ID: <571FCF03.4060406@grimberg.me> (raw)
In-Reply-To: <20160425192259.3566.58807.stgit@manet.1015granger.net>



On 25/04/16 22:22, Chuck Lever wrote:
> There needs to be a safe method of releasing registered memory
> resources when an RPC terminates. Safe can mean a number of things:
>
> + Doesn't have to sleep
>
> + Doesn't rely on having a QP in RTS
>
> ro_unmap_safe will be that safe method. It can be used in cases
> where synchronous memory invalidation can deadlock, or needs to have
> an active QP.
>
> The important case is fencing an RPC's memory regions after it is
> signaled (^C) and before it exits. If this is not done, there is a
> window where the server can write an RPC reply into memory that the
> client has released and re-used for some other purpose.
>
> Note that this is a full solution for FRWR, but FMR and physical
> still have some gaps where a particularly bad server can wreak
> some havoc on the client. These gaps are not made worse by this
> patch and are expected to be exceptionally rare and timing-based.
> They are noted in documenting comments.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |  105 +++++++++++++++++++++++++++++++++---
>   net/sunrpc/xprtrdma/frwr_ops.c     |   27 +++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |   20 +++++++
>   net/sunrpc/xprtrdma/rpc_rdma.c     |    5 --
>   net/sunrpc/xprtrdma/transport.c    |    9 +--
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    3 +
>   6 files changed, 150 insertions(+), 19 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 9d50f3a..a658dcf 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -35,6 +35,64 @@
>   /* Maximum scatter/gather per FMR */
>   #define RPCRDMA_MAX_FMR_SGES	(64)
>
> +static struct workqueue_struct *fmr_recovery_wq;
> +
> +#define FMR_RECOVERY_WQ_FLAGS		(WQ_UNBOUND)
> +
> +int
> +fmr_alloc_recovery_wq(void)
> +{
> +	fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
> +	return !fmr_recovery_wq ? -ENOMEM : 0;
> +}
> +
> +void
> +fmr_destroy_recovery_wq(void)
> +{
> +	struct workqueue_struct *wq;
> +
> +	if (!fmr_recovery_wq)
> +		return;
> +
> +	wq = fmr_recovery_wq;
> +	fmr_recovery_wq = NULL;
> +	destroy_workqueue(wq);
> +}
> +
> +static int
> +__fmr_unmap(struct rpcrdma_mw *mw)
> +{
> +	LIST_HEAD(l);
> +
> +	list_add(&mw->fmr.fmr->list, &l);
> +	return ib_unmap_fmr(&l);
> +}
> +
> +/* Deferred reset of a single FMR. Generate a fresh rkey by
> + * replacing the MR. There's no recovery if this fails.
> + */
> +static void
> +__fmr_recovery_worker(struct work_struct *work)
> +{
> +	struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw,
> +					    mw_work);
> +	struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
> +
> +	__fmr_unmap(mw);
> +	rpcrdma_put_mw(r_xprt, mw);
> +	return;
> +}
> +
> +/* A broken MR was discovered in a context that can't sleep.
> + * Defer recovery to the recovery worker.
> + */
> +static void
> +__fmr_queue_recovery(struct rpcrdma_mw *mw)
> +{
> +	INIT_WORK(&mw->mw_work, __fmr_recovery_worker);
> +	queue_work(fmr_recovery_wq, &mw->mw_work);
> +}
> +
>   static int
>   fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>   	    struct rpcrdma_create_data_internal *cdata)
> @@ -92,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
>   		if (IS_ERR(r->fmr.fmr))
>   			goto out_fmr_err;
>
> +		r->mw_xprt = r_xprt;
>   		list_add(&r->mw_list, &buf->rb_mws);
>   		list_add(&r->mw_all, &buf->rb_all);
>   	}
> @@ -107,15 +166,6 @@ out:
>   	return rc;
>   }
>
> -static int
> -__fmr_unmap(struct rpcrdma_mw *r)
> -{
> -	LIST_HEAD(l);
> -
> -	list_add(&r->fmr.fmr->list, &l);
> -	return ib_unmap_fmr(&l);
> -}
> -
>   /* Use the ib_map_phys_fmr() verb to register a memory region
>    * for remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -242,6 +292,42 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
>   	req->rl_nchunks = 0;
>   }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + *
> + * In the asynchronous case, DMA unmapping occurs first here
> + * because the rpcrdma_mr_seg is released immediately after this
> + * call. It's contents won't be available in __fmr_dma_unmap later.
> + * FIXME.
> + */
> +static void
> +fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		  bool sync)
> +{
> +	struct rpcrdma_mr_seg *seg;
> +	struct rpcrdma_mw *mw;
> +	unsigned int i;
> +
> +	for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
> +		seg = &req->rl_segments[i];
> +		mw = seg->rl_mw;
> +
> +		if (sync) {
> +			/* ORDER */
> +			__fmr_unmap(mw);
> +			__fmr_dma_unmap(r_xprt, seg);
> +			rpcrdma_put_mw(r_xprt, mw);
> +		} else {
> +			__fmr_dma_unmap(r_xprt, seg);
> +			__fmr_queue_recovery(mw);
> +		}
> +
> +		i += seg->mr_nsegs;
> +		seg->mr_nsegs = 0;
> +		seg->rl_mw = NULL;
> +	}
> +}
> +
>   /* Use the ib_unmap_fmr() verb to prevent further remote
>    * access via RDMA READ or RDMA WRITE.
>    */
> @@ -295,6 +381,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>   	.ro_map				= fmr_op_map,
>   	.ro_unmap_sync			= fmr_op_unmap_sync,
> +	.ro_unmap_safe			= fmr_op_unmap_safe,
>   	.ro_unmap			= fmr_op_unmap,
>   	.ro_open			= fmr_op_open,
>   	.ro_maxpages			= fmr_op_maxpages,
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 1251a1d..79ba323 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -614,6 +614,32 @@ reset_mrs:
>   	goto unmap;
>   }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + */
> +static void
> +frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		   bool sync)
> +{
> +	struct rpcrdma_mr_seg *seg;
> +	struct rpcrdma_mw *mw;
> +	unsigned int i;
> +
> +	for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
> +		seg = &req->rl_segments[i];
> +		mw = seg->rl_mw;
> +
> +		if (sync)
> +			__frwr_reset_and_unmap(r_xprt, mw);
> +		else
> +			__frwr_queue_recovery(mw);
> +
> +		i += seg->mr_nsegs;
> +		seg->mr_nsegs = 0;
> +		seg->rl_mw = NULL;
> +	}
> +}
> +
>   /* Post a LOCAL_INV Work Request to prevent further remote access
>    * via RDMA READ or RDMA WRITE.
>    */
> @@ -675,6 +701,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>   	.ro_map				= frwr_op_map,
>   	.ro_unmap_sync			= frwr_op_unmap_sync,
> +	.ro_unmap_safe			= frwr_op_unmap_safe,
>   	.ro_unmap			= frwr_op_unmap,
>   	.ro_open			= frwr_op_open,
>   	.ro_maxpages			= frwr_op_maxpages,
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 2dc6ec2..95ef3a7 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -97,6 +97,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
>   		rpcrdma_unmap_one(device, &req->rl_segments[i++]);
>   }
>
> +/* Use a slow, safe mechanism to invalidate all memory regions
> + * that were registered for "req".
> + *
> + * For physical memory registration, there is no good way to
> + * fence a single MR that has been advertised to the server. The
> + * client has already handed the server an R_key that cannot be
> + * invalidated and is shared by all MRs on this connection.
> + * Tearing down the PD might be the only safe choice, but it's
> + * not clear that a freshly acquired DMA R_key would be different
> + * than the one used by the PD that was just destroyed.
> + * FIXME.
> + */
> +static void
> +physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		       bool sync)
> +{
> +	physical_op_unmap_sync(r_xprt, req);
> +}
> +

So physical has no async mode?

Is there a device that makes you resort to physical memreg?
It's an awful lot of maintenance on what looks to be a esoteric (at
best) code path.

The rest looks fine to me.

  reply	other threads:[~2016-04-26 20:26 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-25 19:20 [PATCH v2 00/18] NFS/RDMA client patches for v4.7 Chuck Lever
2016-04-25 19:20 ` [PATCH v2 01/18] sunrpc: Advertise maximum backchannel payload size Chuck Lever
2016-04-25 19:21 ` [PATCH v2 02/18] xprtrdma: Bound the inline threshold values Chuck Lever
2016-04-25 19:21 ` [PATCH v2 03/18] xprtrdma: Limit number of RDMA segments in RPC-over-RDMA headers Chuck Lever
2016-04-26 19:43   ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 04/18] xprtrdma: Prevent inline overflow Chuck Lever
2016-04-26 19:55   ` Sagi Grimberg
2016-04-26 20:04     ` Chuck Lever
2016-04-26 20:42       ` Sagi Grimberg
2016-04-26 20:56         ` Chuck Lever
2016-04-25 19:21 ` [PATCH v2 05/18] xprtrdma: Avoid using Write list for small NFS READ requests Chuck Lever
2016-04-26 19:56   ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 06/18] xprtrdma: Update comments in rpcrdma_marshal_req() Chuck Lever
2016-04-26 19:57   ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 07/18] xprtrdma: Allow Read list and Reply chunk simultaneously Chuck Lever
2016-04-26 20:04   ` Sagi Grimberg
2016-04-25 19:21 ` [PATCH v2 08/18] xprtrdma: Remove rpcrdma_create_chunks() Chuck Lever
2016-04-26 20:04   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 09/18] xprtrdma: Use core ib_drain_qp() API Chuck Lever
2016-04-26 20:07   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 10/18] xprtrdma: Rename rpcrdma_frwr::sg and sg_nents Chuck Lever
2016-04-26 20:08   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 11/18] xprtrdma: Save I/O direction in struct rpcrdma_frwr Chuck Lever
2016-04-26 20:12   ` Sagi Grimberg
2016-04-26 20:14     ` Chuck Lever
2016-04-25 19:22 ` [PATCH v2 12/18] xprtrdma: Reset MRs in frwr_op_unmap_sync() Chuck Lever
2016-04-26 20:13   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 13/18] xprtrdma: Refactor the FRWR recovery worker Chuck Lever
2016-04-26 20:16   ` Sagi Grimberg
2016-04-26 20:30     ` Chuck Lever
2016-04-26 20:33       ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 14/18] xprtrdma: Move fr_xprt and fr_worker to struct rpcrdma_mw Chuck Lever
2016-04-26 20:18   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 15/18] xprtrdma: Refactor __fmr_dma_unmap() Chuck Lever
2016-04-26 20:21   ` Sagi Grimberg
2016-04-25 19:22 ` [PATCH v2 16/18] xprtrdma: Add ro_unmap_safe memreg method Chuck Lever
2016-04-26 20:26   ` Sagi Grimberg [this message]
2016-04-26 20:44     ` Chuck Lever
2016-04-27 15:59       ` Removing NFS/RDMA client support for PHYSICAL memory registration Chuck Lever
2016-04-28 10:59         ` Sagi Grimberg
2016-04-25 19:23 ` [PATCH v2 17/18] xprtrdma: Remove ro_unmap() from all registration modes Chuck Lever
2016-04-26 20:29   ` Sagi Grimberg
2016-04-26 20:46     ` Chuck Lever
2016-04-26 20:50       ` Sagi Grimberg
2016-04-25 19:23 ` [PATCH v2 18/18] xprtrdma: Faster server reboot recovery Chuck Lever
2016-04-26 20:31   ` Sagi Grimberg
2016-04-26 14:13 ` [PATCH v2 00/18] NFS/RDMA client patches for v4.7 Steve Wise
2016-04-26 14:57   ` Chuck Lever
2016-04-26 16:45     ` Steve Wise
2016-04-26 17:15       ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=571FCF03.4060406@grimberg.me \
    --to=sagi@grimberg.me \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).