netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/7] sky2: tx performance improvement
@ 2009-08-19  1:17 Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 1/7] sky2: use upper/lower 32 bits Stephen Hemminger
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Bunch of patches for net-next (2.6.32) which boost transmit performance.
-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/7] sky2: use upper/lower 32 bits
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 2/7] sky2: transmit ring 64 bit conservation Stephen Hemminger
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-upper32.patch --]
[-- Type: text/plain, Size: 1948 bytes --]

Use the existing macros to show where DMA address is being broken
apart. This is cosmetic only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/drivers/net/sky2.c	2009-08-18 09:30:17.292586888 -0700
+++ b/drivers/net/sky2.c	2009-08-18 09:32:29.143195273 -0700
@@ -984,12 +984,12 @@ static void sky2_qset(struct sky2_hw *hw
  * hardware and driver list elements
  */
 static void sky2_prefetch_init(struct sky2_hw *hw, u32 qaddr,
-				      u64 addr, u32 last)
+			       dma_addr_t addr, u32 last)
 {
 	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
 	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_CTRL), PREF_UNIT_RST_CLR);
-	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_ADDR_HI), addr >> 32);
-	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_ADDR_LO), (u32) addr);
+	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_ADDR_HI), upper_32_bits(addr));
+	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_ADDR_LO), lower_32_bits(addr));
 	sky2_write16(hw, Y2_QADDR(qaddr, PREF_UNIT_LAST_IDX), last);
 	sky2_write32(hw, Y2_QADDR(qaddr, PREF_UNIT_CTRL), PREF_UNIT_OP_ON);
 
@@ -1057,7 +1057,7 @@ static void sky2_rx_add(struct sky2_port
 	}
 
 	le = sky2_next_rx(sky2);
-	le->addr = cpu_to_le32((u32) map);
+	le->addr = cpu_to_le32(lower_32_bits(map));
 	le->length = cpu_to_le16(len);
 	le->opcode = op | HW_OWNER;
 }
@@ -1662,7 +1662,7 @@ static int sky2_xmit_frame(struct sk_buf
 	}
 
 	le = get_tx_le(sky2, &slot);
-	le->addr = cpu_to_le32((u32) mapping);
+	le->addr = cpu_to_le32(lower_32_bits(mapping));
 	le->length = cpu_to_le16(len);
 	le->ctrl = ctrl;
 	le->opcode = mss ? (OP_LARGESEND | HW_OWNER) : (OP_PACKET | HW_OWNER);
@@ -1689,7 +1689,7 @@ static int sky2_xmit_frame(struct sk_buf
 		}
 
 		le = get_tx_le(sky2, &slot);
-		le->addr = cpu_to_le32((u32) mapping);
+		le->addr = cpu_to_le32(lower_32_bits(mapping));
 		le->length = cpu_to_le16(frag->size);
 		le->ctrl = ctrl;
 		le->opcode = OP_BUFFER | HW_OWNER;

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/7] sky2: transmit ring 64 bit conservation
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 1/7] sky2: use upper/lower 32 bits Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 3/7] sky2: simplify list element error Stephen Hemminger
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-64.patch --]
[-- Type: text/plain, Size: 2091 bytes --]

This patch saves elements on transmit ring by only updating the upper
64 bit address when it changes. With many workloads skb's are located
in same region, so it saves space.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/drivers/net/sky2.c	2009-08-18 09:32:29.143195273 -0700
+++ b/drivers/net/sky2.c	2009-08-18 09:33:15.490426779 -0700
@@ -1016,6 +1016,7 @@ static void tx_init(struct sky2_port *sk
 	le = get_tx_le(sky2, &sky2->tx_prod);
 	le->addr = 0;
 	le->opcode = OP_ADDR64 | HW_OWNER;
+	sky2->tx_last_upper = 0;
 }
 
 static inline struct tx_ring_info *tx_le_re(struct sky2_port *sky2,
@@ -1573,8 +1574,9 @@ static int sky2_xmit_frame(struct sk_buf
 	struct sky2_tx_le *le = NULL;
 	struct tx_ring_info *re;
 	unsigned i, len;
-	u16 slot;
 	dma_addr_t mapping;
+	u32 upper;
+	u16 slot;
 	u16 mss;
 	u8 ctrl;
 
@@ -1593,9 +1595,11 @@ static int sky2_xmit_frame(struct sk_buf
 		       dev->name, slot, skb->len);
 
 	/* Send high bits if needed */
-	if (sizeof(dma_addr_t) > sizeof(u32)) {
+	upper = upper_32_bits(mapping);
+	if (upper != sky2->tx_last_upper) {
 		le = get_tx_le(sky2, &slot);
-		le->addr = cpu_to_le32(upper_32_bits(mapping));
+		le->addr = cpu_to_le32(upper);
+		sky2->tx_last_upper = upper;
 		le->opcode = OP_ADDR64 | HW_OWNER;
 	}
 
@@ -1681,10 +1685,11 @@ static int sky2_xmit_frame(struct sk_buf
 		if (pci_dma_mapping_error(hw->pdev, mapping))
 			goto mapping_unwind;
 
-		if (sizeof(dma_addr_t) > sizeof(u32)) {
+		upper = upper_32_bits(mapping);
+		if (upper != sky2->tx_last_upper) {
 			le = get_tx_le(sky2, &slot);
-			le->addr = cpu_to_le32(upper_32_bits(mapping));
-			le->ctrl = 0;
+			le->addr = cpu_to_le32(upper);
+			sky2->tx_last_upper = upper;
 			le->opcode = OP_ADDR64 | HW_OWNER;
 		}
 
--- a/drivers/net/sky2.h	2009-08-18 09:29:59.224176503 -0700
+++ b/drivers/net/sky2.h	2009-08-18 09:33:15.491426771 -0700
@@ -2017,6 +2017,7 @@ struct sky2_port {
 
 	u16		     tx_pending;
 	u16		     tx_last_mss;
+	u32		     tx_last_upper;
 	u32		     tx_tcpsum;
 
 	struct rx_ring_info  *rx_ring ____cacheline_aligned_in_smp;

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/7] sky2: simplify list element error
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 1/7] sky2: use upper/lower 32 bits Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 2/7] sky2: transmit ring 64 bit conservation Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 4/7] sky2: dynamic size transmit ring Stephen Hemminger
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-le-error.patch --]
[-- Type: text/plain, Size: 2017 bytes --]

The code for list element error (which should only happen on hardware
errors) should be cleaner and safer. Gets rid of unused ring_size
argument, which makes next patch easier.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/drivers/net/sky2.c	2009-08-18 09:33:15.490426779 -0700
+++ b/drivers/net/sky2.c	2009-08-18 09:34:30.664368374 -0700
@@ -2661,19 +2661,15 @@ static void sky2_mac_intr(struct sky2_hw
 }
 
 /* This should never happen it is a bug. */
-static void sky2_le_error(struct sky2_hw *hw, unsigned port,
-			  u16 q, unsigned ring_size)
+static void sky2_le_error(struct sky2_hw *hw, unsigned port, u16 q)
 {
 	struct net_device *dev = hw->dev[port];
-	struct sky2_port *sky2 = netdev_priv(dev);
-	unsigned idx;
-	const u64 *le = (q == Q_R1 || q == Q_R2)
-		? (u64 *) sky2->rx_le : (u64 *) sky2->tx_le;
-
-	idx = sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_GET_IDX));
-	printk(KERN_ERR PFX "%s: descriptor error q=%#x get=%u [%llx] put=%u\n",
-	       dev->name, (unsigned) q, idx, (unsigned long long) le[idx],
-	       (unsigned) sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX)));
+	u16 idx = sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_GET_IDX));
+
+	dev_err(&hw->pdev->dev, PFX
+		"%s: descriptor error q=%#x get=%u put=%u\n",
+		dev->name, (unsigned) q, (unsigned) idx,
+		(unsigned) sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX)));
 
 	sky2_write32(hw, Q_ADDR(q, Q_CSR), BMU_CLR_IRQ_CHK);
 }
@@ -2759,16 +2755,16 @@ static void sky2_err_intr(struct sky2_hw
 		sky2_mac_intr(hw, 1);
 
 	if (status & Y2_IS_CHK_RX1)
-		sky2_le_error(hw, 0, Q_R1, RX_LE_SIZE);
+		sky2_le_error(hw, 0, Q_R1);
 
 	if (status & Y2_IS_CHK_RX2)
-		sky2_le_error(hw, 1, Q_R2, RX_LE_SIZE);
+		sky2_le_error(hw, 1, Q_R2);
 
 	if (status & Y2_IS_CHK_TXA1)
-		sky2_le_error(hw, 0, Q_XA1, TX_RING_SIZE);
+		sky2_le_error(hw, 0, Q_XA1);
 
 	if (status & Y2_IS_CHK_TXA2)
-		sky2_le_error(hw, 1, Q_XA2, TX_RING_SIZE);
+		sky2_le_error(hw, 1, Q_XA2);
 }
 
 static int sky2_poll(struct napi_struct *napi, int work_limit)

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 4/7] sky2: dynamic size transmit ring
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
                   ` (2 preceding siblings ...)
  2009-08-19  1:17 ` [PATCH 3/7] sky2: simplify list element error Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 5/7] sky2: optimize transmit completion Stephen Hemminger
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-tx-size.patch --]
[-- Type: text/plain, Size: 6748 bytes --]

Allocate and size transmit ring based on parameters. Saves excess
space and allows configuring larger rings for testing.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/drivers/net/sky2.c	2009-08-18 09:34:30.664368374 -0700
+++ b/drivers/net/sky2.c	2009-08-18 09:34:35.094550982 -0700
@@ -64,10 +64,12 @@
 #define RX_MAX_PENDING		(RX_LE_SIZE/6 - 2)
 #define RX_DEF_PENDING		RX_MAX_PENDING
 
-#define TX_RING_SIZE		512
-#define TX_DEF_PENDING		128
-#define MAX_SKB_TX_LE		(4 + (sizeof(dma_addr_t)/sizeof(u32))*MAX_SKB_FRAGS)
+/* This is the worst case number of transmit list elements for a single skb:
+   VLAN + TSO + CKSUM + Data + skb_frags * DMA */
+#define MAX_SKB_TX_LE	(4 + (sizeof(dma_addr_t)/sizeof(u32))*MAX_SKB_FRAGS)
 #define TX_MIN_PENDING		(MAX_SKB_TX_LE+1)
+#define TX_MAX_PENDING		4096
+#define TX_DEF_PENDING		127
 
 #define STATUS_RING_SIZE	2048	/* 2 ports * (TX + 2*RX) */
 #define STATUS_LE_BYTES		(STATUS_RING_SIZE*sizeof(struct sky2_status_le))
@@ -1000,7 +1002,7 @@ static inline struct sky2_tx_le *get_tx_
 {
 	struct sky2_tx_le *le = sky2->tx_le + *slot;
 
-	*slot = RING_NEXT(*slot, TX_RING_SIZE);
+	*slot = RING_NEXT(*slot, sky2->tx_ring_size);
 	le->ctrl = 0;
 	return le;
 }
@@ -1433,13 +1435,13 @@ static int sky2_up(struct net_device *de
 
 	/* must be power of 2 */
 	sky2->tx_le = pci_alloc_consistent(hw->pdev,
-					   TX_RING_SIZE *
+					   sky2->tx_ring_size *
 					   sizeof(struct sky2_tx_le),
 					   &sky2->tx_le_map);
 	if (!sky2->tx_le)
 		goto err_out;
 
-	sky2->tx_ring = kcalloc(TX_RING_SIZE, sizeof(struct tx_ring_info),
+	sky2->tx_ring = kcalloc(sky2->tx_ring_size, sizeof(struct tx_ring_info),
 				GFP_KERNEL);
 	if (!sky2->tx_ring)
 		goto err_out;
@@ -1491,7 +1493,7 @@ static int sky2_up(struct net_device *de
 		sky2_write16(hw, Q_ADDR(txqaddr[port], Q_AL), ECU_TXFF_LEV);
 
 	sky2_prefetch_init(hw, txqaddr[port], sky2->tx_le_map,
-			   TX_RING_SIZE - 1);
+			   sky2->tx_ring_size - 1);
 
 #ifdef SKY2_VLAN_TAG_USED
 	sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL);
@@ -1520,7 +1522,7 @@ err_out:
 	}
 	if (sky2->tx_le) {
 		pci_free_consistent(hw->pdev,
-				    TX_RING_SIZE * sizeof(struct sky2_tx_le),
+				    sky2->tx_ring_size * sizeof(struct sky2_tx_le),
 				    sky2->tx_le, sky2->tx_le_map);
 		sky2->tx_le = NULL;
 	}
@@ -1533,15 +1535,15 @@ err_out:
 }
 
 /* Modular subtraction in ring */
-static inline int tx_dist(unsigned tail, unsigned head)
+static inline int tx_inuse(const struct sky2_port *sky2)
 {
-	return (head - tail) & (TX_RING_SIZE - 1);
+	return (sky2->tx_prod - sky2->tx_cons) & (sky2->tx_ring_size - 1);
 }
 
 /* Number of list elements available for next tx */
 static inline int tx_avail(const struct sky2_port *sky2)
 {
-	return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod);
+	return sky2->tx_pending - tx_inuse(sky2);
 }
 
 /* Estimate of number of transmit list elements required */
@@ -1717,7 +1719,7 @@ static int sky2_xmit_frame(struct sk_buf
 	return NETDEV_TX_OK;
 
 mapping_unwind:
-	for (i = sky2->tx_prod; i != slot; i = RING_NEXT(i, TX_RING_SIZE)) {
+	for (i = sky2->tx_prod; i != slot; i = RING_NEXT(i, sky2->tx_ring_size)) {
 		le = sky2->tx_le + i;
 		re = sky2->tx_ring + i;
 
@@ -1760,10 +1762,10 @@ static void sky2_tx_complete(struct sky2
 	struct pci_dev *pdev = sky2->hw->pdev;
 	unsigned idx;
 
-	BUG_ON(done >= TX_RING_SIZE);
+	BUG_ON(done >= sky2->tx_ring_size);
 
 	for (idx = sky2->tx_cons; idx != done;
-	     idx = RING_NEXT(idx, TX_RING_SIZE)) {
+	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
 		struct sky2_tx_le *le = sky2->tx_le + idx;
 		struct tx_ring_info *re = sky2->tx_ring + idx;
 
@@ -1799,7 +1801,7 @@ static void sky2_tx_complete(struct sky2
 			else
 				dev_kfree_skb_any(skb);
 
-			sky2->tx_next = RING_NEXT(idx, TX_RING_SIZE);
+			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
 		}
 	}
 
@@ -1907,7 +1909,7 @@ static int sky2_down(struct net_device *
 	kfree(sky2->rx_ring);
 
 	pci_free_consistent(hw->pdev,
-			    TX_RING_SIZE * sizeof(struct sky2_tx_le),
+			    sky2->tx_ring_size * sizeof(struct sky2_tx_le),
 			    sky2->tx_le, sky2->tx_le_map);
 	kfree(sky2->tx_ring);
 
@@ -2517,7 +2519,6 @@ static int sky2_status_intr(struct sky2_
 
 		case OP_TXINDEXLE:
 			/* TX index reports status for both ports */
-			BUILD_BUG_ON(TX_RING_SIZE > 0x1000);
 			sky2_tx_done(hw->dev[0], status & 0xfff);
 			if (hw->dev[1])
 				sky2_tx_done(hw->dev[1],
@@ -3669,7 +3670,7 @@ static int sky2_set_coalesce(struct net_
 	    ecmd->rx_coalesce_usecs_irq > tmax)
 		return -EINVAL;
 
-	if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1)
+	if (ecmd->tx_max_coalesced_frames >= sky2->tx_ring_size-1)
 		return -EINVAL;
 	if (ecmd->rx_max_coalesced_frames > RX_MAX_PENDING)
 		return -EINVAL;
@@ -3713,7 +3714,7 @@ static void sky2_get_ringparam(struct ne
 	ering->rx_max_pending = RX_MAX_PENDING;
 	ering->rx_mini_max_pending = 0;
 	ering->rx_jumbo_max_pending = 0;
-	ering->tx_max_pending = TX_RING_SIZE - 1;
+	ering->tx_max_pending = TX_MAX_PENDING;
 
 	ering->rx_pending = sky2->rx_pending;
 	ering->rx_mini_pending = 0;
@@ -3728,14 +3729,15 @@ static int sky2_set_ringparam(struct net
 
 	if (ering->rx_pending > RX_MAX_PENDING ||
 	    ering->rx_pending < 8 ||
-	    ering->tx_pending < MAX_SKB_TX_LE ||
-	    ering->tx_pending > TX_RING_SIZE - 1)
+	    ering->tx_pending < TX_MIN_PENDING ||
+	    ering->tx_pending > TX_MAX_PENDING)
 		return -EINVAL;
 
 	sky2_detach(dev);
 
 	sky2->rx_pending = ering->rx_pending;
 	sky2->tx_pending = ering->tx_pending;
+	sky2->tx_ring_size = roundup_pow_of_two(sky2->tx_pending+1);
 
 	return sky2_reattach(dev);
 }
@@ -4105,8 +4107,8 @@ static int sky2_debug_show(struct seq_fi
 
 	/* Dump contents of tx ring */
 	sop = 1;
-	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < TX_RING_SIZE;
-	     idx = RING_NEXT(idx, TX_RING_SIZE)) {
+	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
+	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
 		const struct sky2_tx_le *le = sky2->tx_le + idx;
 		u32 a = le32_to_cpu(le->addr);
 
@@ -4315,7 +4317,9 @@ static __devinit struct net_device *sky2
 	sky2->wol = wol;
 
 	spin_lock_init(&sky2->phy_lock);
+
 	sky2->tx_pending = TX_DEF_PENDING;
+	sky2->tx_ring_size = roundup_pow_of_two(TX_DEF_PENDING+1);
 	sky2->rx_pending = RX_DEF_PENDING;
 
 	hw->dev[port] = dev;
--- a/drivers/net/sky2.h	2009-08-18 09:33:15.491426771 -0700
+++ b/drivers/net/sky2.h	2009-08-18 09:34:35.095551088 -0700
@@ -2011,6 +2011,7 @@ struct sky2_port {
 
 	struct tx_ring_info  *tx_ring;
 	struct sky2_tx_le    *tx_le;
+	u16		     tx_ring_size;
 	u16		     tx_cons;		/* next le to check */
 	u16		     tx_prod;		/* next le to use */
 	u16		     tx_next;		/* debug only */

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 5/7] sky2: optimize transmit completion
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
                   ` (3 preceding siblings ...)
  2009-08-19  1:17 ` [PATCH 4/7] sky2: dynamic size transmit ring Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 6/7] sky2: no recycling Stephen Hemminger
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-tx-re.patch --]
[-- Type: text/plain, Size: 5100 bytes --]

Don't reference the list element in hardware transmit ring on transmit
completion. The list element is updated by hardware, therefore
it causes a cache miss. Do book keeping in software structure.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/drivers/net/sky2.c	2009-08-18 10:50:35.096988776 -0700
+++ b/drivers/net/sky2.c	2009-08-18 11:27:05.272209365 -0700
@@ -1001,8 +1001,11 @@ static void sky2_prefetch_init(struct sk
 static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2, u16 *slot)
 {
 	struct sky2_tx_le *le = sky2->tx_le + *slot;
+	struct tx_ring_info *re = sky2->tx_ring + *slot;
 
 	*slot = RING_NEXT(*slot, sky2->tx_ring_size);
+	re->flags = 0;
+	re->skb = NULL;
 	le->ctrl = 0;
 	return le;
 }
@@ -1021,12 +1024,6 @@ static void tx_init(struct sky2_port *sk
 	sky2->tx_last_upper = 0;
 }
 
-static inline struct tx_ring_info *tx_le_re(struct sky2_port *sky2,
-					    struct sky2_tx_le *le)
-{
-	return sky2->tx_ring + (le - sky2->tx_le);
-}
-
 /* Update chip's next pointer */
 static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx)
 {
@@ -1563,6 +1560,19 @@ static unsigned tx_le_req(const struct s
 	return count;
 }
 
+static void sky2_tx_unmap(struct pci_dev *pdev,
+			  const struct tx_ring_info *re)
+{
+	if (re->flags & TX_MAP_SINGLE)
+		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
+				 pci_unmap_len(re, maplen),
+				 PCI_DMA_TODEVICE);
+	else if (re->flags & TX_MAP_PAGE)
+		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
+			       pci_unmap_len(re, maplen),
+			       PCI_DMA_TODEVICE);
+}
+
 /*
  * Put one packet in ring for transmit.
  * A single packet can generate multiple list elements, and
@@ -1667,16 +1677,17 @@ static int sky2_xmit_frame(struct sk_buf
 		}
 	}
 
+	re = sky2->tx_ring + slot;
+	re->flags = TX_MAP_SINGLE;
+	pci_unmap_addr_set(re, mapaddr, mapping);
+	pci_unmap_len_set(re, maplen, len);
+
 	le = get_tx_le(sky2, &slot);
 	le->addr = cpu_to_le32(lower_32_bits(mapping));
 	le->length = cpu_to_le16(len);
 	le->ctrl = ctrl;
 	le->opcode = mss ? (OP_LARGESEND | HW_OWNER) : (OP_PACKET | HW_OWNER);
 
-	re = tx_le_re(sky2, le);
-	re->skb = skb;
-	pci_unmap_addr_set(re, mapaddr, mapping);
-	pci_unmap_len_set(re, maplen, len);
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -1695,18 +1706,19 @@ static int sky2_xmit_frame(struct sk_buf
 			le->opcode = OP_ADDR64 | HW_OWNER;
 		}
 
+		re = sky2->tx_ring + slot;
+		re->flags = TX_MAP_PAGE;
+		pci_unmap_addr_set(re, mapaddr, mapping);
+		pci_unmap_len_set(re, maplen, frag->size);
+
 		le = get_tx_le(sky2, &slot);
 		le->addr = cpu_to_le32(lower_32_bits(mapping));
 		le->length = cpu_to_le16(frag->size);
 		le->ctrl = ctrl;
 		le->opcode = OP_BUFFER | HW_OWNER;
-
-		re = tx_le_re(sky2, le);
-		re->skb = skb;
-		pci_unmap_addr_set(re, mapaddr, mapping);
-		pci_unmap_len_set(re, maplen, frag->size);
 	}
 
+	re->skb = skb;
 	le->ctrl |= EOP;
 
 	sky2->tx_prod = slot;
@@ -1720,23 +1732,9 @@ static int sky2_xmit_frame(struct sk_buf
 
 mapping_unwind:
 	for (i = sky2->tx_prod; i != slot; i = RING_NEXT(i, sky2->tx_ring_size)) {
-		le = sky2->tx_le + i;
 		re = sky2->tx_ring + i;
 
-		switch(le->opcode & ~HW_OWNER) {
-		case OP_LARGESEND:
-		case OP_PACKET:
-			pci_unmap_single(hw->pdev,
-					 pci_unmap_addr(re, mapaddr),
-					 pci_unmap_len(re, maplen),
-					 PCI_DMA_TODEVICE);
-			break;
-		case OP_BUFFER:
-			pci_unmap_page(hw->pdev, pci_unmap_addr(re, mapaddr),
-				       pci_unmap_len(re, maplen),
-				       PCI_DMA_TODEVICE);
-			break;
-		}
+		sky2_tx_unmap(hw->pdev, re);
 	}
 
 mapping_error:
@@ -1759,34 +1757,18 @@ mapping_error:
 static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
 {
 	struct net_device *dev = sky2->netdev;
-	struct pci_dev *pdev = sky2->hw->pdev;
 	unsigned idx;
 
 	BUG_ON(done >= sky2->tx_ring_size);
 
 	for (idx = sky2->tx_cons; idx != done;
 	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
-		struct sky2_tx_le *le = sky2->tx_le + idx;
 		struct tx_ring_info *re = sky2->tx_ring + idx;
+		struct sk_buff *skb = re->skb;
 
-		switch(le->opcode & ~HW_OWNER) {
-		case OP_LARGESEND:
-		case OP_PACKET:
-			pci_unmap_single(pdev,
-					 pci_unmap_addr(re, mapaddr),
-					 pci_unmap_len(re, maplen),
-					 PCI_DMA_TODEVICE);
-			break;
-		case OP_BUFFER:
-			pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
-				       pci_unmap_len(re, maplen),
-				       PCI_DMA_TODEVICE);
-			break;
-		}
-
-		if (le->ctrl & EOP) {
-			struct sk_buff *skb = re->skb;
+		sky2_tx_unmap(sky2->hw->pdev, re);
 
+		if (skb) {
 			if (unlikely(netif_msg_tx_done(sky2)))
 				printk(KERN_DEBUG "%s: tx done %u\n",
 				       dev->name, idx);
--- a/drivers/net/sky2.h	2009-08-18 10:50:35.110991954 -0700
+++ b/drivers/net/sky2.h	2009-08-18 11:09:06.181966221 -0700
@@ -1984,6 +1984,9 @@ struct sky2_status_le {
 
 struct tx_ring_info {
 	struct sk_buff	*skb;
+	unsigned long flags;
+#define TX_MAP_SINGLE   0x0001
+#define TX_MAP_PAGE     000002
 	DECLARE_PCI_UNMAP_ADDR(mapaddr);
 	DECLARE_PCI_UNMAP_LEN(maplen);
 };

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 6/7] sky2: no recycling
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
                   ` (4 preceding siblings ...)
  2009-08-19  1:17 ` [PATCH 5/7] sky2: optimize transmit completion Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19 19:58   ` Stephen Hemminger
  2009-08-19  1:17 ` [PATCH 7/7] sky2: version 1.25 Stephen Hemminger
  2009-08-19  3:28 ` [PATCH 0/7] sky2: tx performance improvement David Miller
  7 siblings, 1 reply; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-no-recycle.patch --]
[-- Type: text/plain, Size: 2058 bytes --]

Recycling turns out to be a bad idea!  For most use cases, the
packet can not be reused: TCP packets are cloned. Even for the ideal
case of forwarding, it hurts performance because of CPU ping/pong.
On a multi-core system forwarding of 64 byte packets is worse
much worse: recycling = 24% forwarded vs no recycling = 42% forwarded

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/drivers/net/sky2.c	2009-08-18 17:50:31.591887510 -0700
+++ b/drivers/net/sky2.c	2009-08-18 17:53:04.506927814 -0700
@@ -1177,7 +1177,6 @@ static void sky2_rx_clean(struct sky2_po
 			re->skb = NULL;
 		}
 	}
-	skb_queue_purge(&sky2->rx_recycle);
 }
 
 /* Basic MII support */
@@ -1269,10 +1268,8 @@ static struct sk_buff *sky2_rx_alloc(str
 	struct sk_buff *skb;
 	int i;
 
-	skb = __skb_dequeue(&sky2->rx_recycle);
-	if (!skb)
-		skb = netdev_alloc_skb(sky2->netdev, sky2->rx_data_size
-				       + sky2_rx_pad(sky2->hw));
+	skb = netdev_alloc_skb(sky2->netdev,
+			       sky2->rx_data_size + sky2_rx_pad(sky2->hw));
 	if (!skb)
 		goto nomem;
 
@@ -1364,8 +1361,6 @@ static int sky2_rx_start(struct sky2_por
 
 	sky2->rx_data_size = size;
 
-	skb_queue_head_init(&sky2->rx_recycle);
-
 	/* Fill Rx ring */
 	for (i = 0; i < sky2->rx_pending; i++) {
 		re = sky2->rx_ring + i;
@@ -1776,12 +1771,7 @@ static void sky2_tx_complete(struct sky2
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += skb->len;
 
-			if (skb_queue_len(&sky2->rx_recycle) < sky2->rx_pending
-			    && skb_recycle_check(skb, sky2->rx_data_size
-						 + sky2_rx_pad(sky2->hw)))
-				__skb_queue_head(&sky2->rx_recycle, skb);
-			else
-				dev_kfree_skb_any(skb);
+			dev_kfree_skb_any(skb);
 
 			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
 		}
--- a/drivers/net/sky2.h	2009-08-18 17:50:34.706665948 -0700
+++ b/drivers/net/sky2.h	2009-08-18 17:51:02.596652122 -0700
@@ -2032,7 +2032,6 @@ struct sky2_port {
 	u16		     rx_pending;
 	u16		     rx_data_size;
 	u16		     rx_nfrags;
-	struct sk_buff_head  rx_recycle;
 
 #ifdef SKY2_VLAN_TAG_USED
 	u16		     rx_tag;

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 7/7] sky2: version 1.25
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
                   ` (5 preceding siblings ...)
  2009-08-19  1:17 ` [PATCH 6/7] sky2: no recycling Stephen Hemminger
@ 2009-08-19  1:17 ` Stephen Hemminger
  2009-08-19  3:28 ` [PATCH 0/7] sky2: tx performance improvement David Miller
  7 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19  1:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: sky2-1.25.patch --]
[-- Type: text/plain, Size: 343 bytes --]

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/drivers/net/sky2.c	2009-08-18 17:53:33.772652966 -0700
+++ b/drivers/net/sky2.c	2009-08-18 17:53:42.177674500 -0700
@@ -50,7 +50,7 @@
 #include "sky2.h"
 
 #define DRV_NAME		"sky2"
-#define DRV_VERSION		"1.24"
+#define DRV_VERSION		"1.25"
 #define PFX			DRV_NAME " "
 
 /*

-- 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/7] sky2: tx performance improvement
  2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
                   ` (6 preceding siblings ...)
  2009-08-19  1:17 ` [PATCH 7/7] sky2: version 1.25 Stephen Hemminger
@ 2009-08-19  3:28 ` David Miller
  7 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2009-08-19  3:28 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 18 Aug 2009 18:17:04 -0700

> Bunch of patches for net-next (2.6.32) which boost transmit performance.

This series looks good, applied to net-next-2.6

Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/7] sky2: no recycling
  2009-08-19  1:17 ` [PATCH 6/7] sky2: no recycling Stephen Hemminger
@ 2009-08-19 19:58   ` Stephen Hemminger
  0 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2009-08-19 19:58 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On Tue, 18 Aug 2009 18:17:10 -0700
Stephen Hemminger <shemminger@vyatta.com> wrote:

> Recycling turns out to be a bad idea!  For most use cases, the
> packet can not be reused: TCP packets are cloned. Even for the ideal
> case of forwarding, it hurts performance because of CPU ping/pong.
> On a multi-core system forwarding of 64 byte packets is worse
> much worse: recycling = 24% forwarded vs no recycling = 42% forwarded

Minor correction on numbers.  The actual data for unidirectional
forwarding is 30% vs 24%. Previous data had hardware flow/control enabled
which skewed the numbers.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-08-19 19:58 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-19  1:17 [PATCH 0/7] sky2: tx performance improvement Stephen Hemminger
2009-08-19  1:17 ` [PATCH 1/7] sky2: use upper/lower 32 bits Stephen Hemminger
2009-08-19  1:17 ` [PATCH 2/7] sky2: transmit ring 64 bit conservation Stephen Hemminger
2009-08-19  1:17 ` [PATCH 3/7] sky2: simplify list element error Stephen Hemminger
2009-08-19  1:17 ` [PATCH 4/7] sky2: dynamic size transmit ring Stephen Hemminger
2009-08-19  1:17 ` [PATCH 5/7] sky2: optimize transmit completion Stephen Hemminger
2009-08-19  1:17 ` [PATCH 6/7] sky2: no recycling Stephen Hemminger
2009-08-19 19:58   ` Stephen Hemminger
2009-08-19  1:17 ` [PATCH 7/7] sky2: version 1.25 Stephen Hemminger
2009-08-19  3:28 ` [PATCH 0/7] sky2: tx performance improvement David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).