netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 3/3] mlx4_en: Saving mem access on data path
@ 2012-02-23 13:34 Yevgeny Petrilin
  2012-02-23 19:47 ` David Miller
  0 siblings, 1 reply; 5+ messages in thread
From: Yevgeny Petrilin @ 2012-02-23 13:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, yevgenyp


Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    1 +
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     |   14 +++++---------
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |   13 ++++++-------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    1 +
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9fe4f94..31b455a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	memset(priv, 0, sizeof(struct mlx4_en_priv));
 	priv->dev = dev;
 	priv->mdev = mdev;
+	priv->ddev = &mdev->pdev->dev;
 	priv->prof = prof;
 	priv->port = port;
 	priv->port_up = false;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d703ef2..c881712 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
 			      struct mlx4_en_rx_alloc *ring_alloc,
 			      int i)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
 	struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
 	struct page *page;
@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
 		skb_frags[i].offset = page_alloc->offset;
 		page_alloc->offset += frag_info->frag_stride;
 	}
-	dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
+	dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
 			     skb_frags[i].offset, frag_info->frag_size,
 			     PCI_DMA_FROMDEVICE);
 	rx_desc->data[i].addr = cpu_to_be64(dma);
@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
 				 struct mlx4_en_rx_ring *ring,
 				 int index)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct page_frag *skb_frags;
 	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
 	dma_addr_t dma;
@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
 
 		en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
-		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
+		dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
 				 PCI_DMA_FROMDEVICE);
 		put_page(skb_frags[nr].page);
 	}
@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 				    int length)
 {
 	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_frag_info *frag_info;
 	int nr;
 	dma_addr_t dma;
@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 			goto fail;
 
 		/* Unmap buffer */
-		pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]),
+		dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
 				 PCI_DMA_FROMDEVICE);
 	}
 	/* Adjust size of last fragment to match actual length */
@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
 				      struct mlx4_en_rx_alloc *page_alloc,
 				      unsigned int length)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct sk_buff *skb;
 	void *va;
 	int used_frags;
@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
 		/* We are copying all relevant data to the skb - temporarily
 		 * synch buffers for the copy */
 		dma = be64_to_cpu(rx_desc->data[0].addr);
-		dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
+		dma_sync_single_for_cpu(priv->ddev, dma, length,
 					DMA_FROM_DEVICE);
 		skb_copy_to_linear_data(skb, va, length);
-		dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
+		dma_sync_single_for_device(priv->ddev, dma, length,
 					   DMA_FROM_DEVICE);
 		skb->tail += length;
 	} else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e452bba..829d0cf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 				struct mlx4_en_tx_ring *ring,
 				int index, u8 owner)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
 	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
 		if (!tx_info->inl) {
 			if (tx_info->linear) {
-				pci_unmap_single(mdev->pdev,
+				dma_unmap_single(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 
 			for (i = 0; i < frags; i++) {
 				frag = &skb_shinfo(skb)->frags[i];
-				pci_unmap_page(mdev->pdev,
+				dma_unmap_page(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data[i].addr),
 					skb_frag_size(frag), PCI_DMA_TODEVICE);
 			}
@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 			}
 
 			if (tx_info->linear) {
-				pci_unmap_single(mdev->pdev,
+				dma_unmap_single(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 				if ((void *) data >= end)
 					data = ring->buf;
 				frag = &skb_shinfo(skb)->frags[i];
-				pci_unmap_page(mdev->pdev,
+				dma_unmap_page(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 skb_frag_size(frag), PCI_DMA_TODEVICE);
 				++data;
@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* Map fragments */
 		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
 			frag = &skb_shinfo(skb)->frags[i];
-			dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag,
+			dma = skb_frag_dma_map(priv->ddev, frag,
 					       0, skb_frag_size(frag),
 					       DMA_TO_DEVICE);
 			data->addr = cpu_to_be64(dma);
@@ -757,7 +756,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		/* Map linear part */
 		if (tx_info->linear) {
-			dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size,
+			dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
 					     skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
 			data->addr = cpu_to_be64(dma);
 			data->lkey = cpu_to_be32(mdev->mr.key);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 174dc38..135200e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -482,6 +482,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_stat_out_mbox hw_stats;
 	int vids[128];
 	bool wol;
+	struct device *ddev;
 };
 
 enum mlx4_en_wol {
-- 
1.5.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 3/3] mlx4_en: Saving mem access on data path
  2012-02-23 13:34 [PATCH net-next 3/3] mlx4_en: Saving mem access on data path Yevgeny Petrilin
@ 2012-02-23 19:47 ` David Miller
  2012-02-24 19:57   ` Yevgeny Petrilin
  0 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2012-02-23 19:47 UTC (permalink / raw)
  To: yevgenyp; +Cc: netdev

From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Thu, 23 Feb 2012 15:34:44 +0200

> 
> Localized the pdev->dev, and using dma_map instead of pci_map
> There are multiple map/unmap operations on data path,
> optimizing those by saving redundant pointer access.
> 
> Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>

I doubt you can measure any improvement from this at all, all of the
real cost is going to be in programming the hardware (both the
networking chip and potentially any IOMMU hardware to setup the DMA
mappings).  A pointer deref in of your datastructures will be
unnoticable.

When you make changes like this I want you to justify them with real
data and facts.

I think this entire patch set was in very poor taste and judgment.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH net-next 3/3] mlx4_en: Saving mem access on data path
  2012-02-23 19:47 ` David Miller
@ 2012-02-24 19:57   ` Yevgeny Petrilin
  2012-03-01 13:03     ` Or Gerlitz
  0 siblings, 1 reply; 5+ messages in thread
From: Yevgeny Petrilin @ 2012-02-24 19:57 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org

> >
> > Localized the pdev->dev, and using dma_map instead of pci_map
> > There are multiple map/unmap operations on data path,
> > optimizing those by saving redundant pointer access.
> >
> > Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
> 
> I doubt you can measure any improvement from this at all, all of the
> real cost is going to be in programming the hardware (both the
> networking chip and potentially any IOMMU hardware to setup the DMA
> mappings).  A pointer deref in of your datastructures will be unnoticable.

We actually did measure improvements with this changes.
Those places were found as hot spots when we did kernel profiling on some benchmarks.
Making those changes actually helped us reduce cpu utilization in several %, and in some cases was the
difference between being CPU bounded or reaching wire speed.
One scenario that we had improvement in when making this change is measuring Packet Rate for
small (64 Byte) packets.

> 
> When you make changes like this I want you to justify them with real
> data and facts.

I agree, the explanation should have been part of the patch description.

> I think this entire patch set was in very poor taste and judgment.
 
Dave,
Thank you for the comprehensive review, please allow me to kindly disagree with the last statement.

The patch set is a result of performance work we did, and those changes helped us to achieve
better results without hurting us in other areas.
I do understand you concern regarding patch 2/3, although we did test it in various scenarios without
running into the issues you described, and there are other vendors doing the same.

I still think that patches 1 and 3 are good for our device,
I'll be happy to make changes in them if you find it necessary, and resubmit.

Thanks,
Yevgeny 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 3/3] mlx4_en: Saving mem access on data path
  2012-02-24 19:57   ` Yevgeny Petrilin
@ 2012-03-01 13:03     ` Or Gerlitz
  2012-03-01 20:47       ` David Miller
  0 siblings, 1 reply; 5+ messages in thread
From: Or Gerlitz @ 2012-03-01 13:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, Yevgeny Petrilin

On Fri, Feb 24, 2012 at 9:57 PM, Yevgeny Petrilin <yevgenyp@mellanox.com> wrote:
> The patch set is a result of performance work we did, and those changes helped us
> to achieve better results without hurting us in other areas.


Hi Dave,

Following Yevgeny's clarification, can this patch (3/3) go to
net-next? as for patch 1/3, we
understand that the first thing to do in that area is add BQL support
to mlx4_en, makes sense.


Or.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 3/3] mlx4_en: Saving mem access on data path
  2012-03-01 13:03     ` Or Gerlitz
@ 2012-03-01 20:47       ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2012-03-01 20:47 UTC (permalink / raw)
  To: or.gerlitz; +Cc: netdev, yevgenyp

From: Or Gerlitz <or.gerlitz@gmail.com>
Date: Thu, 1 Mar 2012 15:03:02 +0200

> Following Yevgeny's clarification, can this patch (3/3) go to
> net-next?

Sure, please resubmit it.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-03-01 20:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-23 13:34 [PATCH net-next 3/3] mlx4_en: Saving mem access on data path Yevgeny Petrilin
2012-02-23 19:47 ` David Miller
2012-02-24 19:57   ` Yevgeny Petrilin
2012-03-01 13:03     ` Or Gerlitz
2012-03-01 20:47       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).