From: Tom Talpey <tom@talpey.com>
To: Chuck Lever <chuck.lever@oracle.com>,
linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: Re: [PATCH v1 6/8] xprtrdma: Add class for RDMA backwards direction transport
Date: Mon, 23 Nov 2015 19:49:57 -0500 [thread overview]
Message-ID: <5653B435.2040509@talpey.com> (raw)
In-Reply-To: <20151123222102.13040.13340.stgit@klimt.1015granger.net>
On 11/23/2015 5:21 PM, Chuck Lever wrote:
> To support the server-side of an NFSv4.1 backchannel on RDMA
> connections, add a transport class for backwards direction
> operation.
So, what's special here is that it re-uses an existing forward
channel's connection? If not, it would seem unnecessary to
define a new type/transport semantic. Say this?
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> include/linux/sunrpc/xprt.h | 1
> net/sunrpc/xprt.c | 1
> net/sunrpc/xprtrdma/svc_rdma_transport.c | 14 +-
> net/sunrpc/xprtrdma/transport.c | 243 ++++++++++++++++++++++++++++++
> net/sunrpc/xprtrdma/xprt_rdma.h | 2
> 5 files changed, 256 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index 69ef5b3..7637ccd 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -85,6 +85,7 @@ struct rpc_rqst {
> __u32 * rq_buffer; /* XDR encode buffer */
> size_t rq_callsize,
> rq_rcvsize;
> + void * rq_privdata; /* xprt-specific per-rqst data */
> size_t rq_xmit_bytes_sent; /* total bytes sent */
> size_t rq_reply_bytes_recvd; /* total reply bytes */
> /* received */
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 2e98f4a..37edea6 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
> if (atomic_dec_and_test(&xprt->count))
> xprt_destroy(xprt);
> }
> +EXPORT_SYMBOL_GPL(xprt_put);
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index 58ec362..3768a7f 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -1182,12 +1182,14 @@ static void __svc_rdma_free(struct work_struct *work)
> {
> struct svcxprt_rdma *rdma =
> container_of(work, struct svcxprt_rdma, sc_work);
> - dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
> + struct svc_xprt *xprt = &rdma->sc_xprt;
> +
> + dprintk("svcrdma: %s(%p)\n", __func__, rdma);
>
> /* We should only be called from kref_put */
> - if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
> + if (atomic_read(&xprt->xpt_ref.refcount) != 0)
> pr_err("svcrdma: sc_xprt still in use? (%d)\n",
> - atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
> + atomic_read(&xprt->xpt_ref.refcount));
>
> /*
> * Destroy queued, but not processed read completions. Note
> @@ -1222,6 +1224,12 @@ static void __svc_rdma_free(struct work_struct *work)
> pr_err("svcrdma: dma still in use? (%d)\n",
> atomic_read(&rdma->sc_dma_used));
>
> + /* Final put of backchannel client transport */
> + if (xprt->xpt_bc_xprt) {
> + xprt_put(xprt->xpt_bc_xprt);
> + xprt->xpt_bc_xprt = NULL;
> + }
> +
> /* De-allocate fastreg mr */
> rdma_dealloc_frmr_q(rdma);
>
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 8c545f7..fda7488 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -51,6 +51,7 @@
> #include <linux/slab.h>
> #include <linux/seq_file.h>
> #include <linux/sunrpc/addr.h>
> +#include <linux/sunrpc/svc_rdma.h>
>
> #include "xprt_rdma.h"
>
> @@ -148,7 +149,10 @@ static struct ctl_table sunrpc_table[] = {
> #define RPCRDMA_MAX_REEST_TO (30U * HZ)
> #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
>
> -static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
> +static struct rpc_xprt_ops xprt_rdma_procs;
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> +static struct rpc_xprt_ops xprt_rdma_bc_procs;
> +#endif
>
> static void
> xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
> @@ -499,7 +503,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
> if (req == NULL)
> return NULL;
>
> - flags = GFP_NOIO | __GFP_NOWARN;
> + flags = RPCRDMA_DEF_GFP;
> if (RPC_IS_SWAPPER(task))
> flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
>
> @@ -684,6 +688,199 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
> {
> }
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> +
> +/* Server-side transport endpoint wants a whole page for its send
> + * buffer. The client RPC code constructs the RPC header in this
> + * buffer before it invokes ->send_request.
> + */
> +static void *
> +xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
> +{
> + struct rpc_rqst *rqst = task->tk_rqstp;
> + struct svc_rdma_op_ctxt *ctxt;
> + struct svcxprt_rdma *rdma;
> + struct svc_xprt *sxprt;
> + struct page *page;
> +
> + if (size > PAGE_SIZE) {
> + WARN_ONCE(1, "failed to handle buffer allocation (size %zu)\n",
> + size);
> + return NULL;
> + }
> +
> + page = alloc_page(RPCRDMA_DEF_GFP);
> + if (!page)
> + return NULL;
> +
> + sxprt = rqst->rq_xprt->bc_xprt;
> + rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
> + ctxt = svc_rdma_get_context_gfp(rdma, RPCRDMA_DEF_GFP);
> + if (!ctxt) {
> + put_page(page);
> + return NULL;
> + }
> +
> + rqst->rq_privdata = ctxt;
> + ctxt->pages[0] = page;
> + ctxt->count = 1;
> + return page_address(page);
> +}
> +
> +static void
> +xprt_rdma_bc_free(void *buffer)
> +{
> + /* No-op: ctxt and page have already been freed. */
> +}
> +
> +static int
> +rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
> +{
> + struct rpc_xprt *xprt = rqst->rq_xprt;
> + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
> + struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
> + struct svc_rdma_op_ctxt *ctxt;
> + int rc;
> +
> + /* Space in the send buffer for an RPC/RDMA header is reserved
> + * via xprt->tsh_size */
> + headerp->rm_xid = rqst->rq_xid;
> + headerp->rm_vers = rpcrdma_version;
> + headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
> + headerp->rm_type = rdma_msg;
> + headerp->rm_body.rm_chunks[0] = xdr_zero;
> + headerp->rm_body.rm_chunks[1] = xdr_zero;
> + headerp->rm_body.rm_chunks[2] = xdr_zero;
> +
> + pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
> +
> + ctxt = (struct svc_rdma_op_ctxt *)rqst->rq_privdata;
> + rc = svc_rdma_bc_post_send(rdma, ctxt, &rqst->rq_snd_buf);
> + if (rc)
> + goto drop_connection;
> + return rc;
> +
> +drop_connection:
> + pr_info("Failed to send backwards request\n");
This is going to be a very strange message to see out of context.
What's a "backwards request"?
> + svc_rdma_put_context(ctxt, 1);
> + xprt_disconnect_done(xprt);
> + return -ENOTCONN;
> +}
> +
> +/* Take an RPC request and sent it on the passive end of a
> + * transport connection.
> + */
Typo 'send".
> +static int
> +xprt_rdma_bc_send_request(struct rpc_task *task)
> +{
> + struct rpc_rqst *rqst = task->tk_rqstp;
> + struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
> + struct svcxprt_rdma *rdma;
> + u32 len;
> +
> + pr_info("%s: sending request with xid: %08x\n",
> + __func__, be32_to_cpu(rqst->rq_xid));
> +
> + if (!mutex_trylock(&sxprt->xpt_mutex)) {
> + rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
> + if (!mutex_trylock(&sxprt->xpt_mutex))
> + return -EAGAIN;
> + rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
> + }
> +
> + len = -ENOTCONN;
> + rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
> + if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
> + len = rpcrdma_bc_send_request(rdma, rqst);
> +
> + mutex_unlock(&sxprt->xpt_mutex);
> +
> + if (len < 0)
> + return len;
> + return 0;
> +}
> +
> +static void
> +xprt_rdma_bc_close(struct rpc_xprt *xprt)
> +{
> + pr_info("RPC: %s: xprt %p\n", __func__, xprt);
> +}
> +
> +static void
> +xprt_rdma_bc_put(struct rpc_xprt *xprt)
> +{
> + pr_info("RPC: %s: xprt %p\n", __func__, xprt);
> +
> + xprt_free(xprt);
> + module_put(THIS_MODULE);
> +}
> +
> +/* It shouldn't matter if the number of backchannel session slots
> + * doesn't match the number of RPC/RDMA credits. That just means
> + * one or the other will have extra slots that aren't used.
> + */
> +static struct rpc_xprt *
> +xprt_setup_rdma_bc(struct xprt_create *args)
> +{
> + struct rpc_xprt *xprt;
> + struct rpcrdma_xprt *new_xprt;
> +
> + if (args->addrlen > sizeof(xprt->addr)) {
> + dprintk("RPC: %s: address too large\n", __func__);
> + return ERR_PTR(-EBADF);
> + }
> +
> + xprt = xprt_alloc(args->net, sizeof(*new_xprt),
> + RPCRDMA_MAX_BC_REQUESTS,
> + RPCRDMA_MAX_BC_REQUESTS);
> + if (xprt == NULL) {
> + dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
> + __func__);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + xprt->timeout = &xprt_rdma_default_timeout;
> + xprt_set_bound(xprt);
> + xprt_set_connected(xprt);
> + xprt->bind_timeout = RPCRDMA_BIND_TO;
> + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
> + xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
> +
> + xprt->prot = XPRT_TRANSPORT_BC_RDMA;
> + xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
> + xprt->ops = &xprt_rdma_bc_procs;
> +
> + memcpy(&xprt->addr, args->dstaddr, args->addrlen);
> + xprt->addrlen = args->addrlen;
> + xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
> + xprt->resvport = 0;
> +
> + xprt->max_payload = xprt_rdma_max_inline_read;
> +
> + new_xprt = rpcx_to_rdmax(xprt);
> + new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
> +
> + xprt_get(xprt);
> + args->bc_xprt->xpt_bc_xprt = xprt;
> + xprt->bc_xprt = args->bc_xprt;
> +
> + if (!try_module_get(THIS_MODULE))
> + goto out_fail;
> +
> + /* Final put for backchannel xprt is in __svc_rdma_free */
> + xprt_get(xprt);
> + return xprt;
> +
> +out_fail:
> + xprt_rdma_free_addresses(xprt);
> + args->bc_xprt->xpt_bc_xprt = NULL;
> + xprt_put(xprt);
> + xprt_free(xprt);
> + return ERR_PTR(-EINVAL);
> +}
> +
> +#endif /* CONFIG_SUNRPC_BACKCHANNEL */
> +
> /*
> * Plumbing for rpc transport switch and kernel module
> */
> @@ -722,6 +919,32 @@ static struct xprt_class xprt_rdma = {
> .setup = xprt_setup_rdma,
> };
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> +
> +static struct rpc_xprt_ops xprt_rdma_bc_procs = {
> + .reserve_xprt = xprt_reserve_xprt_cong,
> + .release_xprt = xprt_release_xprt_cong,
> + .alloc_slot = xprt_alloc_slot,
> + .release_request = xprt_release_rqst_cong,
> + .buf_alloc = xprt_rdma_bc_allocate,
> + .buf_free = xprt_rdma_bc_free,
> + .send_request = xprt_rdma_bc_send_request,
> + .set_retrans_timeout = xprt_set_retrans_timeout_def,
> + .close = xprt_rdma_bc_close,
> + .destroy = xprt_rdma_bc_put,
> + .print_stats = xprt_rdma_print_stats
> +};
> +
> +static struct xprt_class xprt_rdma_bc = {
> + .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
> + .name = "rdma backchannel",
> + .owner = THIS_MODULE,
> + .ident = XPRT_TRANSPORT_BC_RDMA,
> + .setup = xprt_setup_rdma_bc,
> +};
> +
> +#endif /* CONFIG_SUNRPC_BACKCHANNEL */
> +
> void xprt_rdma_cleanup(void)
> {
> int rc;
> @@ -740,6 +963,13 @@ void xprt_rdma_cleanup(void)
>
> rpcrdma_destroy_wq();
> frwr_destroy_recovery_wq();
> +
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + rc = xprt_unregister_transport(&xprt_rdma_bc);
> + if (rc)
> + dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
> + __func__, rc);
> +#endif
> }
>
> int xprt_rdma_init(void)
> @@ -763,6 +993,15 @@ int xprt_rdma_init(void)
> return rc;
> }
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + rc = xprt_register_transport(&xprt_rdma_bc);
> + if (rc) {
> + xprt_unregister_transport(&xprt_rdma);
> + frwr_destroy_recovery_wq();
> + return rc;
> + }
> +#endif
> +
> dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
>
> dprintk("Defaults:\n");
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 9aeff2b..485027e 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -148,6 +148,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
> return (struct rpcrdma_msg *)rb->rg_base;
> }
>
> +#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
> +
> /*
> * struct rpcrdma_rep -- this structure encapsulates state required to recv
> * and complete a reply, asychronously. It needs several pieces of
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
next prev parent reply other threads:[~2015-11-24 0:56 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-23 22:20 [PATCH v1 0/8] NFS/RDMA server patches for 4.5 Chuck Lever
2015-11-23 22:20 ` [PATCH v1 1/8] svcrdma: Do not send XDR roundup bytes for a write chunk Chuck Lever
2015-11-23 22:20 ` [PATCH v1 2/8] svcrdma: Define maximum number of backchannel requests Chuck Lever
2015-11-24 0:39 ` Tom Talpey
2015-11-24 1:09 ` Chuck Lever
2015-11-24 1:19 ` Tom Talpey
2015-11-24 1:36 ` Trond Myklebust
2015-11-24 1:36 ` Chuck Lever
2015-11-24 1:47 ` Tom Talpey
2015-11-23 22:20 ` [PATCH v1 3/8] svcrdma: Add svc_rdma_get_context() API that is allowed to fail Chuck Lever
2015-11-24 6:55 ` Christoph Hellwig
2015-11-24 14:24 ` Chuck Lever
2015-11-24 20:02 ` Christoph Hellwig
2015-11-24 20:06 ` Chuck Lever
2015-12-04 15:29 ` Chuck Lever
2015-11-23 22:20 ` [PATCH v1 4/8] svcrdma: Add infrastructure to send backwards direction RPC/RDMA calls Chuck Lever
2015-11-23 22:20 ` [PATCH v1 5/8] svcrdma: Add infrastructure to receive backwards direction RPC/RDMA replies Chuck Lever
2015-11-24 0:44 ` Tom Talpey
2015-11-24 1:47 ` Chuck Lever
2015-11-24 2:02 ` Tom Talpey
2015-11-23 22:21 ` [PATCH v1 6/8] xprtrdma: Add class for RDMA backwards direction transport Chuck Lever
2015-11-24 0:49 ` Tom Talpey [this message]
2015-11-23 22:21 ` [PATCH v1 7/8] svcrdma: No need to count WRs in svc_rdma_send() Chuck Lever
2015-11-23 22:21 ` [PATCH v1 8/8] svcrdma: Remove svc_rdma_fastreg_mr::access_flags field Chuck Lever
2015-11-24 0:52 ` Tom Talpey
2015-11-24 0:53 ` Chuck Lever
2015-11-24 6:39 ` Christoph Hellwig
2015-11-24 14:08 ` Chuck Lever
2015-11-24 16:03 ` Christoph Hellwig
2015-11-24 16:04 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5653B435.2040509@talpey.com \
--to=tom@talpey.com \
--cc=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).