public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Gibson <dwg@au1.ibm.com>
To: Scott Feldman <scott.feldman@intel.com>
Cc: linux-kernel@vger.kernel.org, Anton Blanchard <anton@samba.org>,
	Nancy Milliner <milliner@us.ibm.com>,
	Herman Dierks <hdierks@us.ibm.com>,
	Ricardo Gonzalez <ricardoz@us.ibm.com>
Subject: e1000 performance hack for ppc64 (Power4)
Date: Thu, 12 Jun 2003 13:32:04 +1000	[thread overview]
Message-ID: <20030612033204.GJ9900@zax> (raw)

Hi Scott,

Peculiarities in the PCI bridges on Power4 based ppc64 machines mean
that unaligned DMAs are horribly slow.  This hits us hard on gigabit
transfers, since the packets (starting from the MAC header) are
usually only 2-byte aligned.

The patch below addresses this by copying and realigning packets into
nicely 2k aligned buffers.  As well as fixing the alignment that
minimises the number of TCE lookups, and because we allocate the
buffers pci_alloc_consistent(), we avoid setting up and tearing down
the TCE mappings for each packet.

It's definitely a ppc64 specific hack, but I've tried to minimise the
patch's adverse impact on the rest of the code: It should cause no
change in behaviour or performance when the realignment is disabled,
which is true by default on everything except ppc64.  I've also
attempted to minimise the impact on the readability of the rest of the
code.

It would be very helpful if you could include this patch in the
mainline driver.

diff -urN /scratch/anton/export/drivers/net/e1000/e1000.h linux-congo/drivers/net/e1000/e1000.h
--- /scratch/anton/export/drivers/net/e1000/e1000.h	2003-05-30 01:28:06.000000000 +1000
+++ linux-congo/drivers/net/e1000/e1000.h	2003-06-12 12:39:10.000000000 +1000
@@ -163,6 +163,43 @@
 #define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
 #define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
 
+#ifdef CONFIG_PPC64
+/* We have a POWER4 specific performance hack to deal with the
+ * slowness of transferring unaligned frames over the PCI bus */
+#define E1000_REALIGN_DATA_HACK	1
+#else
+#define E1000_REALIGN_DATA_HACK	0
+#endif
+
+#define E1000_REALIGN_BUFFER_SIZE	2048 /* importantly, >1514 */
+#define E1000_REALIGN_TARGET_ALIGNMENT	E1000_REALIGN_BUFFER_SIZE
+
+#if E1000_REALIGN_DATA_HACK
+/* We want each buffer to lie within one page, to minimise TCE
+ * lookups */
+#if (PAGE_SIZE % E1000_REALIGN_BUFFER_SIZE)
+#warning E1000_REALIGN_BUFFER_SIZE is not a factor of page size.
+#endif
+
+struct e1000_realign_ring {
+	unsigned char *vaddr;
+	dma_addr_t dma_handle;
+	size_t size;
+};
+
+#define E1000_REALIGN_BUFFER_OFFSET(i)	((i)*E1000_REALIGN_BUFFER_SIZE)
+#define E1000_REALIGN_BUFFER(a,i) ((a)->realign_ring.vaddr ? \
+	(a)->realign_ring.vaddr + E1000_REALIGN_BUFFER_OFFSET(i) : \
+	NULL)
+#define E1000_REALIGN_BUFFER_DMA(a,i) ((a)->realign_ring.dma_handle \
+	+ E1000_REALIGN_BUFFER_OFFSET(i))
+
+#else /* ! E1000_REALIGN_DATA_HACK */
+#define E1000_REALIGN_BUFFER(a,i)	NULL
+#define E1000_REALIGN_BUFFER_DMA(a,i)	0
+#endif /* ! E1000_REALIGN_DATA_HACK */
+
+
 /* board specific private data structure */
 
 struct e1000_adapter {
@@ -186,6 +223,9 @@
 
 	/* TX */
 	struct e1000_desc_ring tx_ring;
+#if E1000_REALIGN_DATA_HACK
+	struct e1000_realign_ring realign_ring;
+#endif /* E1000_REALIGN_DATA_HACK */
 	uint32_t txd_cmd;
 	uint32_t tx_int_delay;
 	uint32_t tx_abs_int_delay;
diff -urN /scratch/anton/export/drivers/net/e1000/e1000_main.c linux-congo/drivers/net/e1000/e1000_main.c
--- /scratch/anton/export/drivers/net/e1000/e1000_main.c	2003-05-30 01:23:39.000000000 +1000
+++ linux-congo/drivers/net/e1000/e1000_main.c	2003-06-12 12:52:58.000000000 +1000
@@ -702,6 +702,23 @@
 	return 0;
 }
 
+static inline void
+e1000_setup_realign_ring(struct e1000_adapter *adapter)
+{
+#if E1000_REALIGN_DATA_HACK
+	struct e1000_desc_ring *txdr = &adapter->tx_ring;
+	struct e1000_realign_ring *rr = &adapter->realign_ring;
+
+	rr->size = txdr->count * E1000_REALIGN_BUFFER_SIZE;
+	rr->vaddr = pci_alloc_consistent(adapter->pdev, rr->size,
+					 &rr->dma_handle);
+
+	if (! rr->vaddr)
+		printk(KERN_WARNING "%s: could not allocate realignment buffers.  Expect poor performance on Power4 hardware\n",
+		       adapter->netdev->name);
+#endif /* E1000_REALIGN_DATA_HACK */
+}
+
 /**
  * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
  * @adapter: board private structure
@@ -735,6 +752,8 @@
 	}
 	memset(txdr->desc, 0, txdr->size);
 
+	e1000_setup_realign_ring(adapter);
+
 	txdr->next_to_use = 0;
 	txdr->next_to_clean = 0;
 
@@ -951,6 +970,19 @@
 	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
 }
 
+static inline void
+e1000_free_realign_ring(struct e1000_adapter *adapter)
+{
+#if E1000_REALIGN_DATA_HACK
+	struct e1000_realign_ring *rr = &adapter->realign_ring;
+
+	if (rr->vaddr)
+		pci_free_consistent(adapter->pdev, rr->size,
+				    rr->vaddr, rr->dma_handle);
+	rr->vaddr = NULL;
+#endif /* E1000_REALIGN_DATA_HACK */
+}
+
 /**
  * e1000_free_tx_resources - Free Tx Resources
  * @adapter: board private structure
@@ -972,6 +1004,8 @@
 	                    adapter->tx_ring.desc, adapter->tx_ring.dma);
 
 	adapter->tx_ring.desc = NULL;
+
+	e1000_free_realign_ring(adapter);
 }
 
 /**
@@ -990,11 +1024,11 @@
 
 	for(i = 0; i < adapter->tx_ring.count; i++) {
 		if(adapter->tx_ring.buffer_info[i].skb) {
-
-			pci_unmap_page(pdev,
-			               adapter->tx_ring.buffer_info[i].dma,
-			               adapter->tx_ring.buffer_info[i].length,
-			               PCI_DMA_TODEVICE);
+			if(adapter->tx_ring.buffer_info[i].dma)
+				pci_unmap_page(pdev,
+					       adapter->tx_ring.buffer_info[i].dma,
+					       adapter->tx_ring.buffer_info[i].length,
+					       PCI_DMA_TODEVICE);
 
 			dev_kfree_skb(adapter->tx_ring.buffer_info[i].skb);
 
@@ -1491,6 +1525,21 @@
 #define E1000_MAX_DATA_PER_TXD	(1<<E1000_MAX_TXD_PWR)
 
 static inline int
+e1000_realign_data(struct e1000_adapter *adapter, unsigned char *data,
+		     int size, int i)
+{
+	unsigned char *buf = E1000_REALIGN_BUFFER(adapter, i);
+
+	if(buf && (size <= E1000_REALIGN_BUFFER_SIZE)
+	   && ((unsigned long)(data) % E1000_REALIGN_TARGET_ALIGNMENT)) {
+
+		memcpy(buf, data, size);
+		return 1;
+	}
+	return 0;
+}
+
+static inline int
 e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb,
 	unsigned int first)
 {
@@ -1515,11 +1564,13 @@
 			size -= 4;
 #endif
 		tx_ring->buffer_info[i].length = size;
-		tx_ring->buffer_info[i].dma =
-			pci_map_single(adapter->pdev,
-				skb->data + offset,
-				size,
-				PCI_DMA_TODEVICE);
+		if (e1000_realign_data(adapter, skb->data+offset, size, i))
+			tx_ring->buffer_info[i].dma = 0;
+		else
+			tx_ring->buffer_info[i].dma =
+				pci_map_single(adapter->pdev,
+					       skb->data + offset, size,
+					       PCI_DMA_TODEVICE);
 		tx_ring->buffer_info[i].time_stamp = jiffies;
 
 		len -= size;
@@ -1593,7 +1644,13 @@
 
 	while(count--) {
 		tx_desc = E1000_TX_DESC(*tx_ring, i);
-		tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
+		if (E1000_REALIGN_DATA_HACK
+		    && !tx_ring->buffer_info[i].dma)
+			tx_desc->buffer_addr =
+				cpu_to_le64(E1000_REALIGN_BUFFER_DMA(adapter, i));
+		else
+			tx_desc->buffer_addr =
+				cpu_to_le64(tx_ring->buffer_info[i].dma);
 		tx_desc->lower.data =
 			cpu_to_le32(txd_lower | tx_ring->buffer_info[i].length);
 		tx_desc->upper.data = cpu_to_le32(txd_upper);


-- 
David Gibson			| For every complex problem there is a
david@gibson.dropbear.id.au	| solution which is simple, neat and
				| wrong.
http://www.ozlabs.org/people/dgibson

             reply	other threads:[~2003-06-12  3:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-12  3:32 David Gibson [this message]
  -- strict thread matches above, loose matches on Subject: below --
2003-06-13  1:16 e1000 performance hack for ppc64 (Power4) Feldman, Scott
2003-06-13 23:15 ` Anton Blanchard
2003-06-13 15:17 Herman Dierks
2003-06-13 16:21 ` Dave Hansen
2003-06-13 22:38   ` Anton Blanchard
2003-06-13 22:46     ` David S. Miller
2003-06-13 23:18       ` Anton Blanchard
2003-06-14  1:52         ` Lincoln Dale
2003-06-14  5:41           ` David S. Miller
2003-06-14  5:52             ` Lincoln Dale
2003-06-14  6:08               ` David S. Miller
2003-06-14  6:14                 ` David S. Miller
2003-06-14  6:27                   ` William Lee Irwin III
2003-06-14 17:08                   ` Greg KH
2003-06-14 17:19                     ` Greg KH
2003-06-14 17:21                     ` Riley Williams
2003-06-15  3:01                     ` David S. Miller
2003-06-14  5:16       ` Nivedita Singhvi
2003-06-14  5:36         ` David S. Miller
2003-06-13 17:03 Herman Dierks
2003-06-13 22:13 Feldman, Scott
2003-06-13 23:52 Feldman, Scott
2003-06-13 23:52 ` David S. Miller
2003-06-14  0:55   ` Anton Blanchard
2003-06-14  1:34     ` David S. Miller
2003-06-14  0:03 ` Anton Blanchard
2003-06-15 14:32 Herman Dierks
2003-06-15 14:40 Herman Dierks
2003-06-15 14:44 ` David S. Miller
2003-06-16 16:17 ` Nivedita Singhvi
2003-06-16 18:21 Feldman, Scott
2003-06-16 18:30 ` Dave Hansen
2003-06-16 18:56 Feldman, Scott

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030612033204.GJ9900@zax \
    --to=dwg@au1.ibm.com \
    --cc=anton@samba.org \
    --cc=hdierks@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=milliner@us.ibm.com \
    --cc=ricardoz@us.ibm.com \
    --cc=scott.feldman@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox