public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [RFT] sky2: transmit complete alternative
  2006-08-21 22:04       ` Jon Wikne
@ 2006-08-22 22:38         ` Stephen Hemminger
  2006-08-23 10:06           ` Jon Wikne
  0 siblings, 1 reply; 4+ messages in thread
From: Stephen Hemminger @ 2006-08-22 22:38 UTC (permalink / raw)
  To: Jon Wikne; +Cc: netdev, Daniel Drake

Does the following get rid of the hang?
--------
Recode the transmit completion handling to avoid races between the hardware
status report mechanism and the interrupt handler. Rather than relying on
the index value in the status ring, read the chip register and cleanup
all completed transmits. 

Reduce the transmit lock window smaller to allow more parallelism.

--- sky2.orig/drivers/net/sky2.c	2006-08-22 13:45:17.000000000 -0700
+++ sky2/drivers/net/sky2.c	2006-08-22 14:01:52.000000000 -0700
@@ -135,6 +135,7 @@
 static const unsigned txqaddr[] = { Q_XA1, Q_XA2 };
 static const unsigned rxqaddr[] = { Q_R1, Q_R2 };
 static const u32 portirq_msk[] = { Y2_IS_PORT_1, Y2_IS_PORT_2 };
+static const u16 report_idx[] = { STAT_TXA1_RIDX, STAT_TXA2_RIDX };
 
 /* This driver supports yukon2 chipset only */
 static const char *yukon2_name[] = {
@@ -1189,7 +1190,6 @@
 	struct sky2_tx_le *le = NULL;
 	struct tx_ring_info *re;
 	unsigned i, len;
-	int avail;
 	dma_addr_t mapping;
 	u32 addr64;
 	u16 mss;
@@ -1332,12 +1332,8 @@
 	re->idx = sky2->tx_prod;
 	le->ctrl |= EOP;
 
-	avail = tx_avail(sky2);
-	if (mss != 0 || avail < TX_MIN_PENDING) {
- 		le->ctrl |= FRC_STAT;
-		if (avail <= MAX_SKB_TX_LE)
-			netif_stop_queue(dev);
-	}
+	if (tx_avail(sky2) <= MAX_SKB_TX_LE)
+		netif_stop_queue(dev);
 
 	sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod);
 
@@ -1361,12 +1357,10 @@
 	u16 nxt, put;
 	unsigned i;
 
-	BUG_ON(done >= TX_RING_SIZE);
-
-	if (unlikely(netif_msg_tx_done(sky2)))
-		printk(KERN_DEBUG "%s: tx done, up to %u\n",
-		       dev->name, done);
+	if (done == sky2->tx_cons)
+		return;
 
+	BUG_ON(done >= TX_RING_SIZE);
 	for (put = sky2->tx_cons; put != done; put = nxt) {
 		struct tx_ring_info *re = sky2->tx_ring + put;
 		struct sk_buff *skb = re->skb;
@@ -1391,20 +1385,26 @@
 				       PCI_DMA_TODEVICE);
 		}
 
+		if (unlikely(netif_msg_tx_done(sky2)))
+			printk(KERN_DEBUG "%s: tx done, slot %u\n",
+			       dev->name, put);
+
 		dev_kfree_skb(skb);
 	}
 
+	spin_lock(&sky2->tx_lock);
 	sky2->tx_cons = put;
 	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4)
 		netif_wake_queue(dev);
+	spin_unlock(&sky2->tx_lock);
 }
 
 /* Cleanup all untransmitted buffers, assume transmitter not running */
 static void sky2_tx_clean(struct sky2_port *sky2)
 {
-	spin_lock_bh(&sky2->tx_lock);
+	local_bh_disable();
 	sky2_tx_complete(sky2, sky2->tx_prod);
-	spin_unlock_bh(&sky2->tx_lock);
+	local_bh_enable();
 }
 
 /* Network shutdown */
@@ -1732,7 +1732,7 @@
 	if (netif_msg_timer(sky2))
 		printk(KERN_ERR PFX "%s: tx timeout\n", dev->name);
 
-	report = sky2_read16(hw, sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX);
+	report = sky2_read16(hw, report_idx[sky2->port]);
 	done = sky2_read16(hw, Q_ADDR(txq, Q_DONE));
 
 	printk(KERN_DEBUG PFX "%s: transmit ring %u .. %u report=%u done=%u\n",
@@ -1747,9 +1747,7 @@
 	} else if (report != sky2->tx_cons) {
 		printk(KERN_INFO PFX "status report lost?\n");
 
-		spin_lock_bh(&sky2->tx_lock);
 		sky2_tx_complete(sky2, report);
-		spin_unlock_bh(&sky2->tx_lock);
 	} else {
 		printk(KERN_INFO PFX "hardware hung? flushing\n");
 
@@ -1919,15 +1917,14 @@
 	goto resubmit;
 }
 
-/* Transmit complete */
-static inline void sky2_tx_done(struct net_device *dev, u16 last)
+/* Transmit completion handling */
+static void sky2_tx(struct sky2_hw *hw, unsigned port)
 {
-	struct sky2_port *sky2 = netdev_priv(dev);
+	struct net_device *dev = hw->dev[port];
 
-	if (netif_running(dev)) {
-		spin_lock(&sky2->tx_lock);
-		sky2_tx_complete(sky2, last);
-		spin_unlock(&sky2->tx_lock);
+	if (dev && netif_running(dev)) {
+		u16 last = sky2_read16(hw, report_idx[port]);
+		sky2_tx_complete(netdev_priv(dev), last);
 	}
 }
 
@@ -1939,6 +1936,10 @@
 	unsigned buf_write[2] = { 0, 0 };
 	u16 hwidx = sky2_read16(hw, STAT_PUT_IDX);
 
+	sky2_tx(hw, 0);
+	if (hw->ports > 1)
+		sky2_tx(hw, 1);
+
 	rmb();
 
 	while (hw->st_idx != hwidx) {
@@ -2004,13 +2005,7 @@
 			break;
 
 		case OP_TXINDEXLE:
-			/* TX index reports status for both ports */
-			BUILD_BUG_ON(TX_RING_SIZE > 0x1000);
-			sky2_tx_done(hw->dev[0], status & 0xfff);
-			if (hw->dev[1])
-				sky2_tx_done(hw->dev[1],
-				     ((status >> 24) & 0xff)
-					     | (u16)(length & 0xf) << 8);
+			/* TX completion handled above */
 			break;
 
 		default:

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFT] sky2: transmit complete alternative
  2006-08-22 22:38         ` [RFT] sky2: transmit complete alternative Stephen Hemminger
@ 2006-08-23 10:06           ` Jon Wikne
  0 siblings, 0 replies; 4+ messages in thread
From: Jon Wikne @ 2006-08-23 10:06 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Daniel Drake

Stephen Hemminger wrote:
> Does the following get rid of the hang?
> --------
> Recode the transmit completion handling to avoid races between the hardware
> status report mechanism and the interrupt handler. Rather than relying on
> the index value in the status ring, read the chip register and cleanup
> all completed transmits. 
> 
> Reduce the transmit lock window smaller to allow more parallelism.

[ patch ]

I'm afraid not. :-(

1) The number of bytes before the hang occurs seems to have _decreased_
    rather than the opposite. This observation is, however, a question
    of statistics, and I have better such with the previous version.

2) Previously, the sequence /sbin/ifdown eth0 - /sbin/ifup eth0 made
    the driver recover. Now, the latter of these commands hangs the
    whole system. The recovery is now /sbin/ifdown eth0 - rmmod sky2
    - modprobe sky2


-- Jon


^ permalink raw reply	[flat|nested] 4+ messages in thread

* re: [RFT] sky2: transmit complete alternative
@ 2006-08-30 23:22 Andrew Hall
       [not found] ` <20060830163636.78fa1aed@localhost.localdomain>
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Hall @ 2006-08-30 23:22 UTC (permalink / raw)
  To: netdev; +Cc: shemminger

Has there been any updates on a fix for the sky2 tx timeout issues?

I have been tracking this problem for a while and consistently see this
issue when the interface is under heavy load. I'm using 2.6.17.11 but have
tried .15, .16, .17 and .18rcX all with the same issue - a large amount of
(HTTP generated test workload) traffic will cause the driver to stop 
responding, report an error (as below)
and the machine to "hang" for about 20 seconds. As others have mentioned, a
down/up of the interface will recover until it locks up again. The dmesg
output for me is usually something like:

sky2 eth1: tx timeout
sky2 eth1: transmit ring 397 .. 356 report=399 done 399
sky2 status report lost?

I have also tried the new transmit code that was supplied earlier in this
thread without improvement of this situation.

Any help appreciated.,


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFT] sky2: transmit complete alternative
       [not found] ` <20060830163636.78fa1aed@localhost.localdomain>
@ 2006-08-31  6:18   ` Andrew Hall
  0 siblings, 0 replies; 4+ messages in thread
From: Andrew Hall @ 2006-08-31  6:18 UTC (permalink / raw)
  To: netdev



> On Thu, 31 Aug 2006 09:22:05 +1000
> "Andrew Hall" <andrew.hall@bluereefnetworks.com> wrote:
>
>> Has there been any updates on a fix for the sky2 tx timeout issues?
>>
>> I have been tracking this problem for a while and consistently see this
>> issue when the interface is under heavy load. I'm using 2.6.17.11 but 
>> have
>> tried .15, .16, .17 and .18rcX all with the same issue - a large amount 
>> of
>> (HTTP generated test workload) traffic will cause the driver to stop
>> responding, report an error (as below)
>> and the machine to "hang" for about 20 seconds. As others have mentioned, 
>> a
>> down/up of the interface will recover until it locks up again. The dmesg
>> output for me is usually something like:
>>
>> sky2 eth1: tx timeout
>> sky2 eth1: transmit ring 397 .. 356 report=399 done 399
>> sky2 status report lost?
>>
>> I have also tried the new transmit code that was supplied earlier in this
>> thread without improvement of this situation.
>>
>> Any help appreciated.,
>>
>
> Are you running ipv6?  there was just a fix where ipv6 was sending
> the hardware bad data?
>
> -- 
> Stephen Hemminger <shemminger@osdl.org>

No, IPv4.. looks like this is a different issue.. Can I supply any data to 
help solve this issue?


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-08-31  6:18 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-30 23:22 [RFT] sky2: transmit complete alternative Andrew Hall
     [not found] ` <20060830163636.78fa1aed@localhost.localdomain>
2006-08-31  6:18   ` Andrew Hall
  -- strict thread matches above, loose matches on Subject: below --
2006-08-21 12:41 sky2 driver - large files upload problem Jon Wikne
2006-08-21 13:18 ` Daniel Drake
2006-08-21 14:21   ` Jon Wikne
2006-08-21 16:51     ` Stephen Hemminger
2006-08-21 22:04       ` Jon Wikne
2006-08-22 22:38         ` [RFT] sky2: transmit complete alternative Stephen Hemminger
2006-08-23 10:06           ` Jon Wikne

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox