All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: Steve Wise <swise@opengridcomputing.com>
Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org,
	tom@opengridcomputing.com
Subject: Re: [PATCH V2 RFC 1/3] svcrdma: Transport and header file changes
Date: Tue, 6 May 2014 15:21:57 -0400	[thread overview]
Message-ID: <20140506192157.GJ18281@fieldses.org> (raw)
In-Reply-To: <20140506174626.18208.95519.stgit@build.ogc.int>

On Tue, May 06, 2014 at 12:46:27PM -0500, Steve Wise wrote:
> From: Tom Tucker <tom@opengridcomputing.com>
> 
> Change poll logic to grab up to 6 completions at a time.
> 
> RDMA write and send completions no longer deal with fastreg objects.
> 
> Set SVCRDMA_DEVCAP_FAST_REG and allocate a dma_mr based on the device
> capabilities.
> 
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> ---
> 
>  include/linux/sunrpc/svc_rdma.h          |    3 -
>  net/sunrpc/xprtrdma/svc_rdma_transport.c |   62 +++++++++++++++++-------------
>  2 files changed, 37 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
> index 0b8e3e6..5cf99a0 100644
> --- a/include/linux/sunrpc/svc_rdma.h
> +++ b/include/linux/sunrpc/svc_rdma.h
> @@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
>  	struct list_head frmr_list;
>  };
>  struct svc_rdma_req_map {
> -	struct svc_rdma_fastreg_mr *frmr;
>  	unsigned long count;
>  	union {
>  		struct kvec sge[RPCSVC_MAXPAGES];
>  		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
> +		unsigned long lkey[RPCSVC_MAXPAGES];
>  	};
>  };
> -#define RDMACTXT_F_FAST_UNREG	1
>  #define RDMACTXT_F_LAST_CTXT	2
>  
>  #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index 25688fa..2c5b201 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -1,4 +1,5 @@
>  /*
> + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
>   * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
> @@ -160,7 +161,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
>  		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
>  	}
>  	map->count = 0;
> -	map->frmr = NULL;
>  	return map;
>  }
>  
> @@ -336,22 +336,21 @@ static void process_context(struct svcxprt_rdma *xprt,
>  
>  	switch (ctxt->wr_op) {
>  	case IB_WR_SEND:
> -		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
> -			svc_rdma_put_frmr(xprt, ctxt->frmr);
> +		BUG_ON(ctxt->frmr);
>  		svc_rdma_put_context(ctxt, 1);
>  		break;
>  
>  	case IB_WR_RDMA_WRITE:
> +		BUG_ON(ctxt->frmr);
>  		svc_rdma_put_context(ctxt, 0);
>  		break;
>  
>  	case IB_WR_RDMA_READ:
>  	case IB_WR_RDMA_READ_WITH_INV:
> +		svc_rdma_put_frmr(xprt, ctxt->frmr);
>  		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
>  			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
>  			BUG_ON(!read_hdr);
> -			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
> -				svc_rdma_put_frmr(xprt, ctxt->frmr);
>  			spin_lock_bh(&xprt->sc_rq_dto_lock);
>  			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
>  			list_add_tail(&read_hdr->dto_q,
> @@ -363,6 +362,7 @@ static void process_context(struct svcxprt_rdma *xprt,
>  		break;
>  
>  	default:
> +		BUG_ON(1);
>  		printk(KERN_ERR "svcrdma: unexpected completion type, "
>  		       "opcode=%d\n",
>  		       ctxt->wr_op);

Note the printk's unreachable now.  Should some of these BUG_ON()'s be
WARN_ON()'s?

> @@ -378,29 +378,42 @@ static void process_context(struct svcxprt_rdma *xprt,
>  static void sq_cq_reap(struct svcxprt_rdma *xprt)
>  {
>  	struct svc_rdma_op_ctxt *ctxt = NULL;
> -	struct ib_wc wc;
> +	struct ib_wc wc_a[6];
> +	struct ib_wc *wc;
>  	struct ib_cq *cq = xprt->sc_sq_cq;
>  	int ret;

May want to keep an eye on the stack usage here?

--b.

>  
> +	memset(wc_a, 0, sizeof(wc_a));
> +
>  	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
>  		return;
>  
>  	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
>  	atomic_inc(&rdma_stat_sq_poll);
> -	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
> -		if (wc.status != IB_WC_SUCCESS)
> -			/* Close the transport */
> -			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
> +	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
> +		int i;
>  
> -		/* Decrement used SQ WR count */
> -		atomic_dec(&xprt->sc_sq_count);
> -		wake_up(&xprt->sc_send_wait);
> +		for (i = 0; i < ret; i++) {
> +			wc = &wc_a[i];
> +			if (wc->status != IB_WC_SUCCESS) {
> +				dprintk("svcrdma: sq wc err status %d\n",
> +					wc->status);
>  
> -		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
> -		if (ctxt)
> -			process_context(xprt, ctxt);
> +				/* Close the transport */
> +				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
> +			}
>  
> -		svc_xprt_put(&xprt->sc_xprt);
> +			/* Decrement used SQ WR count */
> +			atomic_dec(&xprt->sc_sq_count);
> +			wake_up(&xprt->sc_send_wait);
> +
> +			ctxt = (struct svc_rdma_op_ctxt *)
> +				(unsigned long)wc->wr_id;
> +			if (ctxt)
> +				process_context(xprt, ctxt);
> +
> +			svc_xprt_put(&xprt->sc_xprt);
> +		}
>  	}
>  
>  	if (ctxt)
> @@ -993,7 +1006,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
>  			need_dma_mr = 0;
>  		break;
>  	case RDMA_TRANSPORT_IB:
> -		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
> +		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
> +			need_dma_mr = 1;
> +			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
> +		} else if (!(devattr.device_cap_flags &
> +			     IB_DEVICE_LOCAL_DMA_LKEY)) {
>  			need_dma_mr = 1;
>  			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
>  		} else
> @@ -1190,14 +1207,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
>  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
>  
>  	/*
> -	 * If there are fewer SQ WR available than required to send a
> -	 * simple response, return false.
> -	 */
> -	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
> -		return 0;
> -
> -	/*
> -	 * ...or there are already waiters on the SQ,
> +	 * If there are already waiters on the SQ,
>  	 * return false.
>  	 */
>  	if (waitqueue_active(&rdma->sc_send_wait))
> 

WARNING: multiple messages have this Message-ID (diff)
From: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
To: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	tom-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org
Subject: Re: [PATCH V2 RFC 1/3] svcrdma: Transport and header file changes
Date: Tue, 6 May 2014 15:21:57 -0400	[thread overview]
Message-ID: <20140506192157.GJ18281@fieldses.org> (raw)
In-Reply-To: <20140506174626.18208.95519.stgit-T4OLL4TyM9aNDNWfRnPdfg@public.gmane.org>

On Tue, May 06, 2014 at 12:46:27PM -0500, Steve Wise wrote:
> From: Tom Tucker <tom-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> 
> Change poll logic to grab up to 6 completions at a time.
> 
> RDMA write and send completions no longer deal with fastreg objects.
> 
> Set SVCRDMA_DEVCAP_FAST_REG and allocate a dma_mr based on the device
> capabilities.
> 
> Signed-off-by: Tom Tucker <tom-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
> 
>  include/linux/sunrpc/svc_rdma.h          |    3 -
>  net/sunrpc/xprtrdma/svc_rdma_transport.c |   62 +++++++++++++++++-------------
>  2 files changed, 37 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
> index 0b8e3e6..5cf99a0 100644
> --- a/include/linux/sunrpc/svc_rdma.h
> +++ b/include/linux/sunrpc/svc_rdma.h
> @@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
>  	struct list_head frmr_list;
>  };
>  struct svc_rdma_req_map {
> -	struct svc_rdma_fastreg_mr *frmr;
>  	unsigned long count;
>  	union {
>  		struct kvec sge[RPCSVC_MAXPAGES];
>  		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
> +		unsigned long lkey[RPCSVC_MAXPAGES];
>  	};
>  };
> -#define RDMACTXT_F_FAST_UNREG	1
>  #define RDMACTXT_F_LAST_CTXT	2
>  
>  #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index 25688fa..2c5b201 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -1,4 +1,5 @@
>  /*
> + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
>   * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
> @@ -160,7 +161,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
>  		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
>  	}
>  	map->count = 0;
> -	map->frmr = NULL;
>  	return map;
>  }
>  
> @@ -336,22 +336,21 @@ static void process_context(struct svcxprt_rdma *xprt,
>  
>  	switch (ctxt->wr_op) {
>  	case IB_WR_SEND:
> -		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
> -			svc_rdma_put_frmr(xprt, ctxt->frmr);
> +		BUG_ON(ctxt->frmr);
>  		svc_rdma_put_context(ctxt, 1);
>  		break;
>  
>  	case IB_WR_RDMA_WRITE:
> +		BUG_ON(ctxt->frmr);
>  		svc_rdma_put_context(ctxt, 0);
>  		break;
>  
>  	case IB_WR_RDMA_READ:
>  	case IB_WR_RDMA_READ_WITH_INV:
> +		svc_rdma_put_frmr(xprt, ctxt->frmr);
>  		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
>  			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
>  			BUG_ON(!read_hdr);
> -			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
> -				svc_rdma_put_frmr(xprt, ctxt->frmr);
>  			spin_lock_bh(&xprt->sc_rq_dto_lock);
>  			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
>  			list_add_tail(&read_hdr->dto_q,
> @@ -363,6 +362,7 @@ static void process_context(struct svcxprt_rdma *xprt,
>  		break;
>  
>  	default:
> +		BUG_ON(1);
>  		printk(KERN_ERR "svcrdma: unexpected completion type, "
>  		       "opcode=%d\n",
>  		       ctxt->wr_op);

Note the printk's unreachable now.  Should some of these BUG_ON()'s be
WARN_ON()'s?

> @@ -378,29 +378,42 @@ static void process_context(struct svcxprt_rdma *xprt,
>  static void sq_cq_reap(struct svcxprt_rdma *xprt)
>  {
>  	struct svc_rdma_op_ctxt *ctxt = NULL;
> -	struct ib_wc wc;
> +	struct ib_wc wc_a[6];
> +	struct ib_wc *wc;
>  	struct ib_cq *cq = xprt->sc_sq_cq;
>  	int ret;

May want to keep an eye on the stack usage here?

--b.

>  
> +	memset(wc_a, 0, sizeof(wc_a));
> +
>  	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
>  		return;
>  
>  	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
>  	atomic_inc(&rdma_stat_sq_poll);
> -	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
> -		if (wc.status != IB_WC_SUCCESS)
> -			/* Close the transport */
> -			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
> +	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
> +		int i;
>  
> -		/* Decrement used SQ WR count */
> -		atomic_dec(&xprt->sc_sq_count);
> -		wake_up(&xprt->sc_send_wait);
> +		for (i = 0; i < ret; i++) {
> +			wc = &wc_a[i];
> +			if (wc->status != IB_WC_SUCCESS) {
> +				dprintk("svcrdma: sq wc err status %d\n",
> +					wc->status);
>  
> -		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
> -		if (ctxt)
> -			process_context(xprt, ctxt);
> +				/* Close the transport */
> +				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
> +			}
>  
> -		svc_xprt_put(&xprt->sc_xprt);
> +			/* Decrement used SQ WR count */
> +			atomic_dec(&xprt->sc_sq_count);
> +			wake_up(&xprt->sc_send_wait);
> +
> +			ctxt = (struct svc_rdma_op_ctxt *)
> +				(unsigned long)wc->wr_id;
> +			if (ctxt)
> +				process_context(xprt, ctxt);
> +
> +			svc_xprt_put(&xprt->sc_xprt);
> +		}
>  	}
>  
>  	if (ctxt)
> @@ -993,7 +1006,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
>  			need_dma_mr = 0;
>  		break;
>  	case RDMA_TRANSPORT_IB:
> -		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
> +		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
> +			need_dma_mr = 1;
> +			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
> +		} else if (!(devattr.device_cap_flags &
> +			     IB_DEVICE_LOCAL_DMA_LKEY)) {
>  			need_dma_mr = 1;
>  			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
>  		} else
> @@ -1190,14 +1207,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
>  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
>  
>  	/*
> -	 * If there are fewer SQ WR available than required to send a
> -	 * simple response, return false.
> -	 */
> -	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
> -		return 0;
> -
> -	/*
> -	 * ...or there are already waiters on the SQ,
> +	 * If there are already waiters on the SQ,
>  	 * return false.
>  	 */
>  	if (waitqueue_active(&rdma->sc_send_wait))
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2014-05-06 19:21 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-06 17:46 [PATCH V2 RFC 0/3] svcrdma: refactor marshalling logic Steve Wise
2014-05-06 17:46 ` Steve Wise
2014-05-06 17:46 ` [PATCH V2 RFC 1/3] svcrdma: Transport and header file changes Steve Wise
2014-05-06 17:46   ` Steve Wise
2014-05-06 19:21   ` J. Bruce Fields [this message]
2014-05-06 19:21     ` J. Bruce Fields
2014-05-06 21:02     ` Steve Wise
2014-05-06 21:02       ` Steve Wise
2014-05-06 21:13       ` J. Bruce Fields
2014-05-06 21:13         ` J. Bruce Fields
2014-05-06 17:46 ` [PATCH V2 RFC 2/3] svcrdma: Recvfrom changes Steve Wise
2014-05-06 17:46   ` Steve Wise
2014-05-13 18:22   ` Chuck Lever
2014-05-13 18:22     ` Chuck Lever
2014-05-13 20:37     ` Steve Wise
2014-05-13 20:37       ` Steve Wise
2014-05-13 21:44       ` Chuck Lever
2014-05-13 21:44         ` Chuck Lever
2014-05-14 14:26         ` Steve Wise
2014-05-14 14:26           ` Steve Wise
2014-05-14 14:39           ` Chuck Lever
2014-05-14 14:39             ` Chuck Lever
2014-05-14 18:11             ` Steve Wise
2014-05-14 18:11               ` Steve Wise
2014-05-14 18:21               ` Chuck Lever
2014-05-14 18:21                 ` Chuck Lever
2014-05-14 18:24                 ` Steve Wise
2014-05-14 18:24                   ` Steve Wise
2014-05-06 17:46 ` [PATCH V2 RFC 3/3] svcrdma: Sendto changes Steve Wise
2014-05-06 17:46   ` Steve Wise
2014-05-06 19:27 ` [PATCH V2 RFC 0/3] svcrdma: refactor marshalling logic J. Bruce Fields
2014-05-06 19:27   ` J. Bruce Fields
2014-05-06 21:09   ` Steve Wise
2014-05-06 21:09     ` Steve Wise
2014-05-19 19:07     ` Devesh Sharma
2014-05-19 19:07       ` Devesh Sharma
2014-05-19 19:14       ` Steve Wise
2014-05-19 19:14         ` Steve Wise
2014-05-20  5:42         ` Devesh Sharma
2014-05-20  5:42           ` Devesh Sharma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140506192157.GJ18281@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=swise@opengridcomputing.com \
    --cc=tom@opengridcomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.