public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Yann Droneaud <ydroneaud-RlY5vtjFyJ3QT0dZR+AlfA@public.gmane.org>
To: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH] ib_ipoib: Scatter-Gather support in connected mode
Date: Wed, 28 Jan 2015 12:36:27 +0100	[thread overview]
Message-ID: <1422444987.3133.81.camel@opteya.com> (raw)
In-Reply-To: <1422357682-8934-1-git-send-email-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

Hi,

Le mardi 27 janvier 2015 à 03:21 -0800, Yuval Shaia a écrit :
> With this fix Scatter-Gather will be supported also in connected mode
> 

Please explain the issue with current kernel and the advantage
of the proposed fix in the commit message. Perhaps benchmarks
against various HCAs would be welcome.

> Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib.h      |    8 +--
>  drivers/infiniband/ulp/ipoib/ipoib_cm.c   |  107 +++++++++++++++++++++++------
>  drivers/infiniband/ulp/ipoib/ipoib_ib.c   |    3 +-
>  drivers/infiniband/ulp/ipoib/ipoib_main.c |    3 +-
>  4 files changed, 91 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index d7562be..fb42834 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -170,11 +170,6 @@ struct ipoib_tx_buf {
>  	u64		mapping[MAX_SKB_FRAGS + 1];
>  };
>  
> -struct ipoib_cm_tx_buf {
> -	struct sk_buff *skb;
> -	u64		mapping;
> -};
> -
>  struct ib_cm_id;
>  
>  struct ipoib_cm_data {
> @@ -233,7 +228,7 @@ struct ipoib_cm_tx {
>  	struct net_device   *dev;
>  	struct ipoib_neigh  *neigh;
>  	struct ipoib_path   *path;
> -	struct ipoib_cm_tx_buf *tx_ring;
> +	struct ipoib_tx_buf *tx_ring;
>  	unsigned	     tx_head;
>  	unsigned	     tx_tail;
>  	unsigned long	     flags;
> @@ -496,6 +491,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
>  
>  void ipoib_mcast_dev_down(struct net_device *dev);
>  void ipoib_mcast_dev_flush(struct net_device *dev);
> +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
>  
>  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
>  struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> index 933efce..056e4d2 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> @@ -88,6 +88,31 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
>  		ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
>  }
>  
> +static void ipoib_cm_dma_unmap_tx(struct ipoib_dev_priv *priv,
> +				  struct ipoib_tx_buf *tx_req)
> +{
> +	struct sk_buff *skb;
> +	int i, offs;
> +
> +	skb = tx_req->skb;
> +	if (skb_shinfo(skb)->nr_frags) {
> +		offs = 0;
> +		if (skb_headlen(skb)) {
> +			ib_dma_unmap_single(priv->ca, tx_req->mapping[0],
> +					    skb_headlen(skb), DMA_TO_DEVICE);
> +			offs = 1;
> +		}
> +		for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
> +			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
> +
> +			ib_dma_unmap_single(priv->ca, tx_req->mapping[i + offs],
> +					    skb_frag_size(frag), DMA_TO_DEVICE);
> +		}
> +	} else
> +		ib_dma_unmap_single(priv->ca, tx_req->mapping[0], skb->len,
> +				    DMA_TO_DEVICE);
> +}
> +
>  static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -707,11 +732,39 @@ static inline int post_send(struct ipoib_dev_priv *priv,
>  	return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
>  }
>  
> +static inline int post_send_sg(struct ipoib_dev_priv *priv,
> +			       struct ipoib_cm_tx *tx,
> +			       unsigned int wr_id,
> +			       struct sk_buff *skb,
> +			       u64 mapping[MAX_SKB_FRAGS + 1])
> +{
> +	struct ib_send_wr *bad_wr;
> +	int i, off;
> +	skb_frag_t *frags = skb_shinfo(skb)->frags;
> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> +
> +	if (skb_headlen(skb)) {
> +		priv->tx_sge[0].addr = mapping[0];
> +		priv->tx_sge[0].length = skb_headlen(skb);
> +		off = 1;
> +	} else
> +		off = 0;
> +
> +	for (i = 0; i < nr_frags; ++i) {
> +		priv->tx_sge[i + off].addr = mapping[i + off];
> +		priv->tx_sge[i + off].length = frags[i].size;
> +	}
> +	priv->tx_wr.num_sge = nr_frags + off;
> +	priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
> +
> +	return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
> +}
> +
>  void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -	struct ipoib_cm_tx_buf *tx_req;
> -	u64 addr;
> +	struct ipoib_tx_buf *tx_req;
> +	u64 addr = 0;
>  	int rc;
>  
>  	if (unlikely(skb->len > tx->mtu)) {
> @@ -735,24 +788,37 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
>  	 */
>  	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
>  	tx_req->skb = skb;
> -	addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
> -	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
> -		++dev->stats.tx_errors;
> -		dev_kfree_skb_any(skb);
> -		return;
> -	}
>  
> -	tx_req->mapping = addr;
> +	if (skb_shinfo(skb)->nr_frags) {
> +		if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
> +			++dev->stats.tx_errors;
> +			dev_kfree_skb_any(skb);
> +			return;
> +		}
> +		rc = post_send_sg(priv, tx, tx->tx_head &
> +				  (ipoib_sendq_size - 1),
> +				  skb, tx_req->mapping);
> +	} else {
> +		addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
> +					 DMA_TO_DEVICE);
> +		if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
> +			++dev->stats.tx_errors;
> +			dev_kfree_skb_any(skb);
> +			return;
> +		}
> +
> +		tx_req->mapping[0] = addr;
>  
> -	skb_orphan(skb);
> -	skb_dst_drop(skb);
> +		skb_orphan(skb);
> +		skb_dst_drop(skb);
>  
> -	rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
> -		       addr, skb->len);
> +		rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
> +			       addr, skb->len);
> +	}
>  	if (unlikely(rc)) {
>  		ipoib_warn(priv, "post_send failed, error %d\n", rc);
>  		++dev->stats.tx_errors;
> -		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
> +		ipoib_cm_dma_unmap_tx(priv, tx_req);
>  		dev_kfree_skb_any(skb);
>  	} else {
>  		dev->trans_start = jiffies;
> @@ -777,7 +843,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ipoib_cm_tx *tx = wc->qp->qp_context;
>  	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
> -	struct ipoib_cm_tx_buf *tx_req;
> +	struct ipoib_tx_buf *tx_req;
>  	unsigned long flags;
>  
>  	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
> @@ -791,7 +857,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
>  
>  	tx_req = &tx->tx_ring[wr_id];
>  
> -	ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
> +	ipoib_cm_dma_unmap_tx(priv, tx_req);
>  
>  	/* FIXME: is this right? Shouldn't we only increment on success? */
>  	++dev->stats.tx_packets;
> @@ -1036,6 +1102,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
>  
>  	struct ib_qp *tx_qp;
>  
> +	if (dev->features & NETIF_F_SG)
> +		attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
> +
>  	tx_qp = ib_create_qp(priv->pd, &attr);
>  	if (PTR_ERR(tx_qp) == -EINVAL) {
>  		ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
> @@ -1170,7 +1239,7 @@ err_tx:
>  static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
> -	struct ipoib_cm_tx_buf *tx_req;
> +	struct ipoib_tx_buf *tx_req;
>  	unsigned long begin;
>  
>  	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
> @@ -1197,8 +1266,7 @@ timeout:
>  
>  	while ((int) p->tx_tail - (int) p->tx_head < 0) {
>  		tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
> -		ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
> -				    DMA_TO_DEVICE);
> +		ipoib_cm_dma_unmap_tx(priv, tx_req);
>  		dev_kfree_skb_any(tx_req->skb);
>  		++p->tx_tail;
>  		netif_tx_lock_bh(p->dev);
> @@ -1455,7 +1523,6 @@ static void ipoib_cm_stale_task(struct work_struct *work)
>  	spin_unlock_irq(&priv->lock);
>  }
>  
> -

No need to remove this line.

>  static ssize_t show_mode(struct device *d, struct device_attribute *attr,
>  			 char *buf)
>  {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index 72626c3..9e11447 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -312,8 +312,7 @@ repost:
>  			   "for buf %d\n", wr_id);
>  }
>  
> -static int ipoib_dma_map_tx(struct ib_device *ca,
> -			    struct ipoib_tx_buf *tx_req)
> +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
>  {
>  	struct sk_buff *skb = tx_req->skb;
>  	u64 *mapping = tx_req->mapping;
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index 58b5aa3..f9314c6 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -190,7 +190,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  
>  	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
> -		features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO);
> +		features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
>  
>  	return features;
>  }
> @@ -233,7 +233,6 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
>  			   "will cause multicast packet drops\n");
>  		netdev_update_features(dev);
>  		rtnl_unlock();
> -		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;

Sure about this removal ? I don't see it related to scatter-gather.

>  
>  		ipoib_flush_paths(dev);
>  		rtnl_lock();

Regards.

-- 
Yann Droneaud
OPTEYA


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2015-01-28 11:36 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-27 11:21 [PATCH] ib_ipoib: Scatter-Gather support in connected mode Yuval Shaia
     [not found] ` <1422357682-8934-1-git-send-email-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-01-28 11:36   ` Yann Droneaud [this message]
     [not found]     ` <1422444987.3133.81.camel-RlY5vtjFyJ3QT0dZR+AlfA@public.gmane.org>
2015-02-01  7:09       ` Yuval Shaia
     [not found]         ` <20150315151616.GA3546@yuval-lab>
2015-03-22  9:21           ` Yuval Shaia
2015-03-23 17:17             ` Jason Gunthorpe
     [not found]               ` <20150323171749.GA3580-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-04-01 17:17                 ` ira.weiny
     [not found]                   ` <20150401171708.GA21266-W4f6Xiosr+yv7QzWx2u06xL4W9x8LtSr@public.gmane.org>
2015-04-01 20:08                     ` Yuval Shaia
2015-05-05 12:23                       ` Yuval Shaia
2015-04-01 20:33                 ` Yuval Shaia
2015-05-07 14:45                   ` Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1422444987.3133.81.camel@opteya.com \
    --to=ydroneaud-rly5vtjfyj3qt0dzr+alfa@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox