netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [RFT] 2.6.4 - epic100 napi
@ 2004-03-20 14:21 Francois Romieu
  2004-03-21 18:24 ` Jeff Garzik
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-20 14:21 UTC (permalink / raw)
  To: netdev, Jeff Garzik

People are welcome to report how the following patch behaves on their
hardware. It does not seem too bad here but it probably is still a bit rough.
A split version of the patch will follow tomorrow. This one definitely aims
at brave and/or bored testers.

The driver lacks ethtool support. Badly. :o/

--- linux-2.6.4/drivers/net/epic100.c	2004-03-20 14:52:08.000000000 +0100
+++ linux-2.6.4/drivers/net/epic100.c	2004-03-20 14:52:13.000000000 +0100
@@ -96,9 +96,9 @@ static int rx_copybreak;
    Making the Tx ring too large decreases the effectiveness of channel
    bonding and packet priority.
    There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE	16
+#define TX_RING_SIZE	256
 #define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
-#define RX_RING_SIZE	32
+#define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
 
@@ -292,6 +292,10 @@ enum CommandBits {
 	StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
 };
 
+#define EpicNapiEvent	(TxEmpty | TxDone | \
+			 RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent	(0x0000ffffUL & ~EpicNapiEvent)
+
 static u16 media2miictl[16] = {
 	0, 0x0C00, 0x0C00, 0x2000,  0x0100, 0x2100, 0, 0,
 	0, 0, 0, 0,  0, 0, 0, 0 };
@@ -330,9 +334,11 @@ struct epic_private {
 
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
+	spinlock_t napi_lock;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
+	u32 irq_mask;
 	unsigned int rx_buf_sz;				/* Based on MTU+slack. */
 
 	struct pci_dev *pci_dev;			/* PCI bus location. */
@@ -359,7 +365,8 @@ static void epic_timer(unsigned long dat
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static struct ethtool_ops netdev_ethtool_ops;
@@ -378,7 +385,7 @@ static int __devinit epic_init_one (stru
 	int irq;
 	struct net_device *dev;
 	struct epic_private *ep;
-	int i, option = 0, duplex = 0;
+	int i, ret, option = 0, duplex = 0;
 	void *ring_space;
 	dma_addr_t ring_dma;
 
@@ -392,29 +399,33 @@ static int __devinit epic_init_one (stru
 	
 	card_idx++;
 	
-	i = pci_enable_device(pdev);
-	if (i)
-		return i;
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto out;
 	irq = pdev->irq;
 
 	if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
 		printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_out_disable;
 	}
 	
 	pci_set_master(pdev);
 
+	ret = pci_request_regions(pdev, DRV_NAME);
+	if (ret < 0)
+		goto err_out_disable;
+
+	ret = -ENOMEM;
+
 	dev = alloc_etherdev(sizeof (*ep));
 	if (!dev) {
 		printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
-		return -ENOMEM;
+		goto err_out_free_res;
 	}
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_request_regions(pdev, DRV_NAME))
-		goto err_out_free_netdev;
-
 #ifdef USE_IO_OPS
 	ioaddr = pci_resource_start (pdev, 0);
 #else
@@ -422,7 +433,7 @@ static int __devinit epic_init_one (stru
 	ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
 	if (!ioaddr) {
 		printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
-		goto err_out_free_res;
+		goto err_out_free_netdev;
 	}
 #endif
 
@@ -489,6 +500,9 @@ static int __devinit epic_init_one (stru
 	ep->pci_dev = pdev;
 	ep->chip_id = chip_idx;
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+	ep->irq_mask = 
+		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
+		 | CntFull | TxUnderrun | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -543,10 +557,12 @@ static int __devinit epic_init_one (stru
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
+	dev->poll = epic_poll;
+	dev->weight = 64;
 
-	i = register_netdev(dev);
-	if (i)
-		goto err_out_unmap_tx;
+	ret = register_netdev(dev);
+	if (ret < 0)
+		goto err_out_unmap_rx;
 
 	printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
 		   dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +570,24 @@ static int __devinit epic_init_one (stru
 		printk("%2.2x:", dev->dev_addr[i]);
 	printk("%2.2x.\n", dev->dev_addr[i]);
 
-	return 0;
+out:
+	return ret;
 
+err_out_unmap_rx:
+	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
 err_out_unmap_tx:
 	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
 err_out_iounmap:
 #ifndef USE_IO_OPS
 	iounmap(ioaddr);
-err_out_free_res:
-#endif
-	pci_release_regions(pdev);
 err_out_free_netdev:
+#endif
 	free_netdev(dev);
-	return -ENODEV;
+err_out_free_res:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	goto out;
 }
 \f
 /* Serial EEPROM section. */
@@ -592,6 +613,36 @@ err_out_free_netdev:
 #define EE_READ256_CMD	(6 << 8)
 #define EE_ERASE_CMD	(7 << 6)
 
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(0x00000000, ioaddr + INTMASK);
+}
+
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+	inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+	__epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	/* No need to commit possible posted write */
+	outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -753,8 +804,7 @@ static int epic_open(struct net_device *
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -795,7 +845,7 @@ static void epic_pause(struct net_device
 	}
 
 	/* Remove the packets on the Rx queue. */
-	epic_rx(dev);
+	epic_rx(dev, RX_RING_SIZE);
 }
 
 static void epic_restart(struct net_device *dev)
@@ -842,7 +892,7 @@ static void epic_restart(struct net_devi
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
+		 | RxError | EpicNapiEvent,
 		 ioaddr + INTMASK);
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
 		   " interrupt %4.4x.\n",
@@ -929,7 +979,8 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1029,6 +1080,77 @@ static int epic_start_xmit(struct sk_buf
 	return 0;
 }
 
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+			  int status)
+{
+	struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+	/* There was an major error, log it. */
+	if (debug > 1)
+		printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+		       dev->name, status);
+#endif
+	stats->tx_errors++;
+	if (status & 0x1050)
+		stats->tx_aborted_errors++;
+	if (status & 0x0008)
+		stats->tx_carrier_errors++;
+	if (status & 0x0040)
+		stats->tx_window_errors++;
+	if (status & 0x0010)
+		stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+	unsigned int dirty_tx, cur_tx;
+
+	/*
+	 * Note: if this lock becomes a problem we can narrow the locked
+	 * region at the cost of occasionally grabbing the lock more times.
+	 */
+	cur_tx = ep->cur_tx;
+	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+		struct sk_buff *skb;
+		int entry = dirty_tx % TX_RING_SIZE;
+		int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+		if (txstatus & DescOwn)
+			break;	/* It still hasn't been Txed */
+
+		if (likely(txstatus & 0x0001)) {
+			ep->stats.collisions += (txstatus >> 8) & 15;
+			ep->stats.tx_packets++;
+			ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+		} else
+			epic_tx_error(dev, ep, txstatus);
+
+		/* Free the original skb. */
+		skb = ep->tx_skbuff[entry];
+		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
+				 skb->len, PCI_DMA_TODEVICE);
+		dev_kfree_skb_irq(skb);
+		ep->tx_skbuff[entry] = 0;
+	}
+
+#ifndef final_version
+	if (cur_tx - dirty_tx > TX_RING_SIZE) {
+		printk(KERN_WARNING
+		       "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+		       dev->name, dirty_tx, cur_tx, ep->tx_full);
+		dirty_tx += TX_RING_SIZE;
+	}
+#endif
+	ep->dirty_tx = dirty_tx;
+	if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+		/* The ring is no longer full, allow new TX entries. */
+		ep->tx_full = 0;
+		netif_wake_queue(dev);
+	}
+}
+
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1042,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
 	do {
 		status = inl(ioaddr + INTSTAT);
 		/* Acknowledge all of the current interrupt sources ASAP. */
-		outl(status & 0x00007fff, ioaddr + INTSTAT);
+		outl(status & EpicNormalEvent, ioaddr + INTSTAT);
 
 		if (debug > 4)
 			printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1053,73 +1175,18 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
-			epic_rx(dev);
-
-		if (status & (TxEmpty | TxDone)) {
-			unsigned int dirty_tx, cur_tx;
-
-			/* Note: if this lock becomes a problem we can narrow the locked
-			   region at the cost of occasionally grabbing the lock more
-			   times. */
-			spin_lock(&ep->lock);
-			cur_tx = ep->cur_tx;
-			dirty_tx = ep->dirty_tx;
-			for (; cur_tx - dirty_tx > 0; dirty_tx++) {
-				struct sk_buff *skb;
-				int entry = dirty_tx % TX_RING_SIZE;
-				int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
-				if (txstatus & DescOwn)
-					break;			/* It still hasn't been Txed */
-
-				if ( ! (txstatus & 0x0001)) {
-					/* There was an major error, log it. */
-#ifndef final_version
-					if (debug > 1)
-						printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
-							   dev->name, txstatus);
-#endif
-					ep->stats.tx_errors++;
-					if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
-					if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
-					if (txstatus & 0x0040) ep->stats.tx_window_errors++;
-					if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
-				} else {
-					ep->stats.collisions += (txstatus >> 8) & 15;
-					ep->stats.tx_packets++;
-					ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
-				}
-
-				/* Free the original skb. */
-				skb = ep->tx_skbuff[entry];
-				pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
-						 skb->len, PCI_DMA_TODEVICE);
-				dev_kfree_skb_irq(skb);
-				ep->tx_skbuff[entry] = 0;
+		if (status & EpicNapiEvent) {
+			spin_lock(&ep->napi_lock);
+			if (netif_rx_schedule_prep(dev)) {
+				epic_napi_irq_off(dev, ep);
+				__netif_rx_schedule(dev);
 			}
-
-#ifndef final_version
-			if (cur_tx - dirty_tx > TX_RING_SIZE) {
-				printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
-					   dev->name, dirty_tx, cur_tx, ep->tx_full);
-				dirty_tx += TX_RING_SIZE;
-			}
-#endif
-			ep->dirty_tx = dirty_tx;
-			if (ep->tx_full
-				&& cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
-				/* The ring is no longer full, allow new TX entries. */
-				ep->tx_full = 0;
-				spin_unlock(&ep->lock);
-				netif_wake_queue(dev);
-			} else
-				spin_unlock(&ep->lock);
+			spin_unlock(&ep->napi_lock);
 		}
 
 		/* Check uncommon events all at once. */
-		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
-					  PCIBusErr170 | PCIBusErr175)) {
+		if (status &
+		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
 			if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
 				break;
 			/* Always update the error counts to avoid overhead later. */
@@ -1133,11 +1200,6 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {		/* Missed a Rx frame. */
-				ep->stats.rx_errors++;
-			}
-			if (status & (RxOverflow | RxFull))
-				outw(RxQueued, ioaddr + COMMAND);
 			if (status & PCIBusErr170) {
 				printk(KERN_ERR "%s: PCI Bus Error!  EPIC status %4.4x.\n",
 					   dev->name, status);
@@ -1147,6 +1209,8 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
+		if (!(status & EpicNormalEvent))
+			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
 				   "IntrStatus=0x%8.8x.\n",
@@ -1164,7 +1228,7 @@ static irqreturn_t epic_interrupt(int ir
 	return IRQ_RETVAL(handled);
 }
 
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
 {
 	struct epic_private *ep = dev->priv;
 	int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1174,6 +1238,10 @@ static int epic_rx(struct net_device *de
 	if (debug > 4)
 		printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
 			   ep->rx_ring[entry].rxstatus);
+
+	if (rx_work_limit > budget)
+		rx_work_limit = budget;
+
 	/* If we own the next entry, it's a new packet. Send it up. */
 	while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
 		int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1228,7 +1296,7 @@ static int epic_rx(struct net_device *de
 				ep->rx_skbuff[entry] = NULL;
 			}
 			skb->protocol = eth_type_trans(skb, dev);
-			netif_rx(skb);
+			netif_receive_skb(skb);
 			dev->last_rx = jiffies;
 			ep->stats.rx_packets++;
 			ep->stats.rx_bytes += pkt_len;
@@ -1256,6 +1324,60 @@ static int epic_rx(struct net_device *de
 	return work_done;
 }
 
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+	int status;
+
+	status = inl(ioaddr + INTSTAT);
+
+	if (status == 0xffffffff)
+		return;
+	if (status & RxOverflow) 	/* Missed a Rx frame. */
+		ep->stats.rx_errors++;
+	if (status & (RxOverflow | RxFull))
+		outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+	struct epic_private *ep = dev->priv;
+	int work_done, orig_budget;
+	long ioaddr = dev->base_addr;
+
+	epic_tx(dev, ep);
+
+	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+	outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+	work_done = epic_rx(dev, *budget);
+
+	epic_rx_err(dev, ep);
+
+	*budget -= work_done;
+	dev->quota -= work_done;
+
+	if (work_done < orig_budget) {
+		unsigned long flags;
+		int status;
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+		status = inl(ioaddr + INTSTAT);
+		if (status & EpicNapiEvent) {
+			epic_napi_irq_off(dev, ep);
+			goto rx_action;
+		}
+	}
+
+	return (work_done >= orig_budget);
+}
+
 static int epic_close(struct net_device *dev)
 {
 	long ioaddr = dev->base_addr;
@@ -1270,9 +1392,13 @@ static int epic_close(struct net_device 
 			   dev->name, (int)inl(ioaddr + INTSTAT));
 
 	del_timer_sync(&ep->timer);
-	epic_pause(dev);
+
+	epic_disable_int(dev, ep);
+
 	free_irq(dev->irq, dev);
 
+	epic_pause(dev);
+
 	/* Free all the skbuffs in the Rx queue. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = ep->rx_skbuff[i];
@@ -1470,6 +1596,7 @@ static void __devexit epic_remove_one (s
 #endif
 	pci_release_regions(pdev);
 	free_netdev(dev);
+	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	/* pci_power_off(pdev, -1); */
 }

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
@ 2004-03-21 18:24 ` Jeff Garzik
  2004-03-21 23:47   ` Francois Romieu
  2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  1 sibling, 2 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-21 18:24 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu wrote:
> People are welcome to report how the following patch behaves on their
> hardware. It does not seem too bad here but it probably is still a bit rough.
> A split version of the patch will follow tomorrow. This one definitely aims
> at brave and/or bored testers.

Looks pretty good, but includes a standard NAPI race...

When you split up the patches, I'll throw it into my -netdev tree, which 
means it will be automatically included in -mm for testing (as is r8169 
now).

FWIW Andrew Morton has made me lazy...  I don't bother publishing 
separate -netdev patches anymore, since he automatically downloads my 
netdev-2.6 BK tree before doing each -mm release.


> +	if (work_done < orig_budget) {
> +		unsigned long flags;
> +		int status;
> +
> +		spin_lock_irqsave(&ep->napi_lock, flags);
> +		epic_napi_irq_on(dev, ep);
> +		__netif_rx_complete(dev);
> +		spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> +		status = inl(ioaddr + INTSTAT);
> +		if (status & EpicNapiEvent) {
> +			epic_napi_irq_off(dev, ep);
> +			goto rx_action;
> +		}

Need to add a netif_running() check to the 'if' test at the top of the 
quote.

Are you (or somebody else?) interested in reviewing all the in-tree NAPI 
drivers, and seeing if other drivers have this bug?  I think 8139cp.c 
does at least, maybe e100 too...  Such a fix would need to go into 2.4.x 
as well.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-21 23:47   ` Francois Romieu
  2004-03-23 14:29   ` OGAWA Hirofumi
  1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-21 23:47 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: netdev

Jeff Garzik <jgarzik@pobox.com> :
[...]
> Need to add a netif_running() check to the 'if' test at the top of the 
> quote.
> 
> Are you (or somebody else?) interested in reviewing all the in-tree NAPI 
> drivers, and seeing if other drivers have this bug?  I think 8139cp.c 
> does at least, maybe e100 too...  Such a fix would need to go into 2.4.x 
> as well.

Ok, I'll check for the race against dev_close.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 0/4] 2.6.5-rc2 - epic100 update
  2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
  2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-22 23:50 ` Francois Romieu
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
  2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
  1 sibling, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:50 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik

Schedule:

- epic100-fixup.patch:   opportunistic cleanup
- epic100-napi-00.patch: code shuffling before the move
- epic100-napi-10.patch: rx napi (includes netif_running change)
- epic100-napi-20.patch: minimalistic tx napi

2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
so the patches should apply equally to both.

The driver has been moderately tested. 

Feedback welcome.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] 2.6.5-rc2 - epic100 fixup
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
@ 2004-03-22 23:51   ` Francois Romieu
  2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
  2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
  1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:51 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


- extra pci_disable_device() to balance invocation of pci_enable_device()
  in epic_init_one() (-> error path + epic_remove_one());
- lazy return status in epic_init_one(), tsss...;
- memory dedicated to Rx descriptors was not freed after failure of
  register_netdev() in epic_init_one();
- use of epic_pause() in epic_close() offers a small window for a late
  interruption just before the final free_irq(). Let's close the window to
  avoid two epic_rx() threads racing with each other.


 drivers/net/epic100.c |   59 +++++++++++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 19 deletions(-)

diff -puN drivers/net/epic100.c~epic100-fixup drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-fixup	2004-03-22 22:53:16.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-22 22:53:16.000000000 +0100
@@ -378,7 +378,7 @@ static int __devinit epic_init_one (stru
 	int irq;
 	struct net_device *dev;
 	struct epic_private *ep;
-	int i, option = 0, duplex = 0;
+	int i, ret, option = 0, duplex = 0;
 	void *ring_space;
 	dma_addr_t ring_dma;
 
@@ -392,29 +392,33 @@ static int __devinit epic_init_one (stru
 	
 	card_idx++;
 	
-	i = pci_enable_device(pdev);
-	if (i)
-		return i;
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto out;
 	irq = pdev->irq;
 
 	if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
 		printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_out_disable;
 	}
 	
 	pci_set_master(pdev);
 
+	ret = pci_request_regions(pdev, DRV_NAME);
+	if (ret < 0)
+		goto err_out_disable;
+
+	ret = -ENOMEM;
+
 	dev = alloc_etherdev(sizeof (*ep));
 	if (!dev) {
 		printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
-		return -ENOMEM;
+		goto err_out_free_res;
 	}
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_request_regions(pdev, DRV_NAME))
-		goto err_out_free_netdev;
-
 #ifdef USE_IO_OPS
 	ioaddr = pci_resource_start (pdev, 0);
 #else
@@ -422,7 +426,7 @@ static int __devinit epic_init_one (stru
 	ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
 	if (!ioaddr) {
 		printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
-		goto err_out_free_res;
+		goto err_out_free_netdev;
 	}
 #endif
 
@@ -544,9 +548,9 @@ static int __devinit epic_init_one (stru
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
 
-	i = register_netdev(dev);
-	if (i)
-		goto err_out_unmap_tx;
+	ret = register_netdev(dev);
+	if (ret < 0)
+		goto err_out_unmap_rx;
 
 	printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
 		   dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +558,24 @@ static int __devinit epic_init_one (stru
 		printk("%2.2x:", dev->dev_addr[i]);
 	printk("%2.2x.\n", dev->dev_addr[i]);
 
-	return 0;
+out:
+	return ret;
 
+err_out_unmap_rx:
+	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
 err_out_unmap_tx:
 	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
 err_out_iounmap:
 #ifndef USE_IO_OPS
 	iounmap(ioaddr);
-err_out_free_res:
-#endif
-	pci_release_regions(pdev);
 err_out_free_netdev:
+#endif
 	free_netdev(dev);
-	return -ENODEV;
+err_out_free_res:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	goto out;
 }
 \f
 /* Serial EEPROM section. */
@@ -592,6 +601,13 @@ err_out_free_netdev:
 #define EE_READ256_CMD	(6 << 8)
 #define EE_ERASE_CMD	(7 << 6)
 
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(0x00000000, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -1276,9 +1292,13 @@ static int epic_close(struct net_device 
 			   dev->name, (int)inl(ioaddr + INTSTAT));
 
 	del_timer_sync(&ep->timer);
-	epic_pause(dev);
+
+	epic_disable_int(dev, ep);
+
 	free_irq(dev->irq, dev);
 
+	epic_pause(dev);
+
 	/* Free all the skbuffs in the Rx queue. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = ep->rx_skbuff[i];
@@ -1476,6 +1496,7 @@ static void __devexit epic_remove_one (s
 #endif
 	pci_release_regions(pdev);
 	free_netdev(dev);
+	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	/* pci_power_off(pdev, -1); */
 }

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-22 23:52     ` Francois Romieu
  2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:52 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


Isolate the classical TX part of epic_interrupt. Innocent code shuffling.


 drivers/net/epic100.c |  137 +++++++++++++++++++++++++++-----------------------
 1 files changed, 76 insertions(+), 61 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-00 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-00	2004-03-22 22:53:18.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-22 22:53:18.000000000 +0100
@@ -1045,6 +1045,79 @@ static int epic_start_xmit(struct sk_buf
 	return 0;
 }
 
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+			  int status)
+{
+	struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+	/* There was an major error, log it. */
+	if (debug > 1)
+		printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+		       dev->name, status);
+#endif
+	stats->tx_errors++;
+	if (status & 0x1050)
+		stats->tx_aborted_errors++;
+	if (status & 0x0008)
+		stats->tx_carrier_errors++;
+	if (status & 0x0040)
+		stats->tx_window_errors++;
+	if (status & 0x0010)
+		stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+	unsigned int dirty_tx, cur_tx;
+
+	/*
+	 * Note: if this lock becomes a problem we can narrow the locked
+	 * region at the cost of occasionally grabbing the lock more times.
+	 */
+	spin_lock(&ep->lock);
+	cur_tx = ep->cur_tx;
+	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+		struct sk_buff *skb;
+		int entry = dirty_tx % TX_RING_SIZE;
+		int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+		if (txstatus & DescOwn)
+			break;	/* It still hasn't been Txed */
+
+		if (likely(txstatus & 0x0001)) {
+			ep->stats.collisions += (txstatus >> 8) & 15;
+			ep->stats.tx_packets++;
+			ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+		} else
+			epic_tx_error(dev, ep, txstatus);
+
+		/* Free the original skb. */
+		skb = ep->tx_skbuff[entry];
+		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
+				 skb->len, PCI_DMA_TODEVICE);
+		dev_kfree_skb_irq(skb);
+		ep->tx_skbuff[entry] = 0;
+	}
+
+#ifndef final_version
+	if (cur_tx - dirty_tx > TX_RING_SIZE) {
+		printk(KERN_WARNING
+		       "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+		       dev->name, dirty_tx, cur_tx, ep->tx_full);
+		dirty_tx += TX_RING_SIZE;
+	}
+#endif
+	ep->dirty_tx = dirty_tx;
+	if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+		/* The ring is no longer full, allow new TX entries. */
+		ep->tx_full = 0;
+		netif_wake_queue(dev);
+	}
+	spin_unlock(&ep->lock);
+}
+
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1072,66 +1145,8 @@ static irqreturn_t epic_interrupt(int ir
 		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
 			epic_rx(dev);
 
-		if (status & (TxEmpty | TxDone)) {
-			unsigned int dirty_tx, cur_tx;
-
-			/* Note: if this lock becomes a problem we can narrow the locked
-			   region at the cost of occasionally grabbing the lock more
-			   times. */
-			spin_lock(&ep->lock);
-			cur_tx = ep->cur_tx;
-			dirty_tx = ep->dirty_tx;
-			for (; cur_tx - dirty_tx > 0; dirty_tx++) {
-				struct sk_buff *skb;
-				int entry = dirty_tx % TX_RING_SIZE;
-				int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
-				if (txstatus & DescOwn)
-					break;			/* It still hasn't been Txed */
-
-				if ( ! (txstatus & 0x0001)) {
-					/* There was an major error, log it. */
-#ifndef final_version
-					if (debug > 1)
-						printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
-							   dev->name, txstatus);
-#endif
-					ep->stats.tx_errors++;
-					if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
-					if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
-					if (txstatus & 0x0040) ep->stats.tx_window_errors++;
-					if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
-				} else {
-					ep->stats.collisions += (txstatus >> 8) & 15;
-					ep->stats.tx_packets++;
-					ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
-				}
-
-				/* Free the original skb. */
-				skb = ep->tx_skbuff[entry];
-				pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
-						 skb->len, PCI_DMA_TODEVICE);
-				dev_kfree_skb_irq(skb);
-				ep->tx_skbuff[entry] = 0;
-			}
-
-#ifndef final_version
-			if (cur_tx - dirty_tx > TX_RING_SIZE) {
-				printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
-					   dev->name, dirty_tx, cur_tx, ep->tx_full);
-				dirty_tx += TX_RING_SIZE;
-			}
-#endif
-			ep->dirty_tx = dirty_tx;
-			if (ep->tx_full
-				&& cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
-				/* The ring is no longer full, allow new TX entries. */
-				ep->tx_full = 0;
-				spin_unlock(&ep->lock);
-				netif_wake_queue(dev);
-			} else
-				spin_unlock(&ep->lock);
-		}
+		if (status & (TxEmpty | TxDone))
+			epic_tx(dev, ep);
 
 		/* Check uncommon events all at once. */
 		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
@@ -1149,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {		/* Missed a Rx frame. */
+			if (status & RxOverflow) {	/* Missed a Rx frame. */
 				ep->stats.rx_errors++;
 			}
 			if (status & (RxOverflow | RxFull))

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
@ 2004-03-22 23:53       ` Francois Romieu
  2004-03-22 23:53         ` [PATCH 4/4] " Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


RX NAPI.


 drivers/net/epic100.c |  137 ++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 116 insertions(+), 21 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-10 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-10	2004-03-22 22:53:19.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-23 00:18:33.000000000 +0100
@@ -98,7 +98,7 @@ static int rx_copybreak;
    There are no ill effects from too-large receive rings. */
 #define TX_RING_SIZE	16
 #define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
-#define RX_RING_SIZE	32
+#define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
 
@@ -292,6 +292,11 @@ enum CommandBits {
 	StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
 };
 
+#define EpicRemoved	0xffffffff	/* Chip failed or removed (CardBus) */
+
+#define EpicNapiEvent	(RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent	(0x0000ffff & ~EpicNapiEvent)
+
 static u16 media2miictl[16] = {
 	0, 0x0C00, 0x0C00, 0x2000,  0x0100, 0x2100, 0, 0,
 	0, 0, 0, 0,  0, 0, 0, 0 };
@@ -330,9 +335,11 @@ struct epic_private {
 
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
+	spinlock_t napi_lock;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
+	u32 irq_mask;
 	unsigned int rx_buf_sz;				/* Based on MTU+slack. */
 
 	struct pci_dev *pci_dev;			/* PCI bus location. */
@@ -359,7 +366,8 @@ static void epic_timer(unsigned long dat
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static struct ethtool_ops netdev_ethtool_ops;
@@ -493,6 +501,9 @@ static int __devinit epic_init_one (stru
 	ep->pci_dev = pdev;
 	ep->chip_id = chip_idx;
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+	ep->irq_mask = 
+		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
+		 | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -547,6 +558,8 @@ static int __devinit epic_init_one (stru
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
+	dev->poll = epic_poll;
+	dev->weight = 64;
 
 	ret = register_netdev(dev);
 	if (ret < 0)
@@ -608,6 +621,29 @@ static void epic_disable_int(struct net_
 	outl(0x00000000, ioaddr + INTMASK);
 }
 
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+	inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+	__epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	/* No need to commit possible posted write */
+	outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -769,8 +805,7 @@ static int epic_open(struct net_device *
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -811,7 +846,7 @@ static void epic_pause(struct net_device
 	}
 
 	/* Remove the packets on the Rx queue. */
-	epic_rx(dev);
+	epic_rx(dev, RX_RING_SIZE);
 }
 
 static void epic_restart(struct net_device *dev)
@@ -858,8 +893,8 @@ static void epic_restart(struct net_devi
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
+
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
 		   " interrupt %4.4x.\n",
 		   dev->name, (int)inl(ioaddr + COMMAND), (int)inl(ioaddr + GENCTL),
@@ -945,7 +980,8 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1131,7 +1167,7 @@ static irqreturn_t epic_interrupt(int ir
 	do {
 		status = inl(ioaddr + INTSTAT);
 		/* Acknowledge all of the current interrupt sources ASAP. */
-		outl(status & 0x00007fff, ioaddr + INTSTAT);
+		outl(status & EpicNormalEvent, ioaddr + INTSTAT);
 
 		if (debug > 4)
 			printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1142,16 +1178,22 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
-			epic_rx(dev);
+		if (status & EpicNapiEvent) {
+			spin_lock(&ep->napi_lock);
+			if (netif_rx_schedule_prep(dev)) {
+				epic_napi_irq_off(dev, ep);
+				__netif_rx_schedule(dev);
+			}
+			spin_unlock(&ep->napi_lock);
+		}
 
 		if (status & (TxEmpty | TxDone))
 			epic_tx(dev, ep);
 
 		/* Check uncommon events all at once. */
-		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
-					  PCIBusErr170 | PCIBusErr175)) {
-			if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
+		if (status &
+		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
+			if (status == EpicRemoved)
 				break;
 			/* Always update the error counts to avoid overhead later. */
 			ep->stats.rx_missed_errors += inb(ioaddr + MPCNT);
@@ -1164,11 +1206,6 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {	/* Missed a Rx frame. */
-				ep->stats.rx_errors++;
-			}
-			if (status & (RxOverflow | RxFull))
-				outw(RxQueued, ioaddr + COMMAND);
 			if (status & PCIBusErr170) {
 				printk(KERN_ERR "%s: PCI Bus Error!  EPIC status %4.4x.\n",
 					   dev->name, status);
@@ -1178,6 +1215,8 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
+		if (status & EpicNormalEvent)
+			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
 				   "IntrStatus=0x%8.8x.\n",
@@ -1195,7 +1234,7 @@ static irqreturn_t epic_interrupt(int ir
 	return IRQ_RETVAL(handled);
 }
 
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
 {
 	struct epic_private *ep = dev->priv;
 	int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1205,6 +1244,10 @@ static int epic_rx(struct net_device *de
 	if (debug > 4)
 		printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
 			   ep->rx_ring[entry].rxstatus);
+
+	if (rx_work_limit > budget)
+		rx_work_limit = budget;
+
 	/* If we own the next entry, it's a new packet. Send it up. */
 	while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
 		int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1265,7 +1308,7 @@ static int epic_rx(struct net_device *de
 				ep->rx_skbuff[entry] = NULL;
 			}
 			skb->protocol = eth_type_trans(skb, dev);
-			netif_rx(skb);
+			netif_receive_skb(skb);
 			dev->last_rx = jiffies;
 			ep->stats.rx_packets++;
 			ep->stats.rx_bytes += pkt_len;
@@ -1293,6 +1336,58 @@ static int epic_rx(struct net_device *de
 	return work_done;
 }
 
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+	int status;
+
+	status = inl(ioaddr + INTSTAT);
+
+	if (status == EpicRemoved)
+		return;
+	if (status & RxOverflow) 	/* Missed a Rx frame. */
+		ep->stats.rx_errors++;
+	if (status & (RxOverflow | RxFull))
+		outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+	struct epic_private *ep = dev->priv;
+	int work_done, orig_budget;
+	long ioaddr = dev->base_addr;
+
+	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+	outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+	work_done = epic_rx(dev, *budget);
+
+	epic_rx_err(dev, ep);
+
+	*budget -= work_done;
+	dev->quota -= work_done;
+
+	if (netif_running(dev) && (work_done < orig_budget)) {
+		unsigned long flags;
+		int status;
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+		status = inl(ioaddr + INTSTAT);
+		if (status & EpicNapiEvent) {
+			epic_napi_irq_off(dev, ep);
+			goto rx_action;
+		}
+	}
+
+	return (work_done >= orig_budget);
+}
+
 static int epic_close(struct net_device *dev)
 {
 	long ioaddr = dev->base_addr;

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
@ 2004-03-22 23:53         ` Francois Romieu
  0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


TX NAPI.


 drivers/net/epic100.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-20 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-20	2004-03-23 00:18:40.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-23 00:18:40.000000000 +0100
@@ -96,8 +96,8 @@ static int rx_copybreak;
    Making the Tx ring too large decreases the effectiveness of channel
    bonding and packet priority.
    There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE	16
-#define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
+#define TX_RING_SIZE	256
+#define TX_QUEUE_LEN	240		/* Limit ring entries actually used.  */
 #define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -294,7 +294,8 @@ enum CommandBits {
 
 #define EpicRemoved	0xffffffff	/* Chip failed or removed (CardBus) */
 
-#define EpicNapiEvent	(RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNapiEvent	(TxEmpty | TxDone | \
+			 RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
 #define EpicNormalEvent	(0x0000ffff & ~EpicNapiEvent)
 
 static u16 media2miictl[16] = {
@@ -503,7 +504,7 @@ static int __devinit epic_init_one (stru
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
 	ep->irq_mask = 
 		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
+		 | CntFull | TxUnderrun | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -804,7 +805,7 @@ static int epic_open(struct net_device *
 
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty
+		 | CntFull | TxUnderrun 
 		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
@@ -892,7 +893,7 @@ static void epic_restart(struct net_devi
 
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty
+		 | CntFull | TxUnderrun
 		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
@@ -1111,7 +1112,6 @@ static void epic_tx(struct net_device *d
 	 * Note: if this lock becomes a problem we can narrow the locked
 	 * region at the cost of occasionally grabbing the lock more times.
 	 */
-	spin_lock(&ep->lock);
 	cur_tx = ep->cur_tx;
 	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
 		struct sk_buff *skb;
@@ -1150,7 +1150,6 @@ static void epic_tx(struct net_device *d
 		ep->tx_full = 0;
 		netif_wake_queue(dev);
 	}
-	spin_unlock(&ep->lock);
 }
 
 
@@ -1187,9 +1186,6 @@ static irqreturn_t epic_interrupt(int ir
 			spin_unlock(&ep->napi_lock);
 		}
 
-		if (status & (TxEmpty | TxDone))
-			epic_tx(dev, ep);
-
 		/* Check uncommon events all at once. */
 		if (status &
 		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
@@ -1362,6 +1358,8 @@ static int epic_poll(struct net_device *
 rx_action:
 	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
+	epic_tx(dev, ep);
+
 	work_done = epic_rx(dev, *budget);
 
 	epic_rx_err(dev, ep);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/4] 2.6.5-rc2 - epic100 update
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-23  0:12   ` Jeff Garzik
  1 sibling, 0 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23  0:12 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev, Andrew Morton

Francois Romieu wrote:
> Schedule:
> 
> - epic100-fixup.patch:   opportunistic cleanup
> - epic100-napi-00.patch: code shuffling before the move
> - epic100-napi-10.patch: rx napi (includes netif_running change)
> - epic100-napi-20.patch: minimalistic tx napi
> 
> 2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
> so the patches should apply equally to both.
> 
> The driver has been moderately tested. 

Thanks.

Applied to my netdev-2.6 queue, and so it should automatically appear in 
Andrew's -mm tree soon.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-21 18:24 ` Jeff Garzik
  2004-03-21 23:47   ` Francois Romieu
@ 2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-23 15:14     ` Jeff Garzik
  2004-03-23 18:51     ` Francois Romieu
  1 sibling, 2 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 14:29 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Francois Romieu, netdev

Jeff Garzik <jgarzik@pobox.com> writes:

> > +	if (work_done < orig_budget) {
> > +		unsigned long flags;
> > +		int status;
> > +
> > +		spin_lock_irqsave(&ep->napi_lock, flags);
> > +		epic_napi_irq_on(dev, ep);
> > +		__netif_rx_complete(dev);
> > +		spin_unlock_irqrestore(&ep->napi_lock, flags);
> > +
> > +		status = inl(ioaddr + INTSTAT);
> > +		if (status & EpicNapiEvent) {
> > +			epic_napi_irq_off(dev, ep);
> > +			goto rx_action;
> > +		}
> 
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
> 
> Are you (or somebody else?) interested in reviewing all the in-tree
> NAPI drivers, and seeing if other drivers have this bug?  I think
> 8139cp.c does at least, maybe e100 too...  Such a fix would need to go
> into 2.4.x as well.

Umm.. the above code is part of ->poll(). I think xxx_interrut() need
netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
flag...

BTW, ->napi_lock is unneeded because netif_schedule() is already
atomic, it need only local_irq_enable/disable().

After __netif_rx_complete() must not do "goto rx_action", otherwise it
may become cause of twice scheduleing, it should move before spin_lock().

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 14:29   ` OGAWA Hirofumi
@ 2004-03-23 15:14     ` Jeff Garzik
  2004-03-23 16:05       ` OGAWA Hirofumi
  2004-03-23 18:51     ` Francois Romieu
  1 sibling, 1 reply; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 15:14 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Francois Romieu, netdev

OGAWA Hirofumi wrote:
> Jeff Garzik <jgarzik@pobox.com> writes:
> 
> 
>>>+	if (work_done < orig_budget) {
>>>+		unsigned long flags;
>>>+		int status;
>>>+
>>>+		spin_lock_irqsave(&ep->napi_lock, flags);
>>>+		epic_napi_irq_on(dev, ep);
>>>+		__netif_rx_complete(dev);
>>>+		spin_unlock_irqrestore(&ep->napi_lock, flags);
>>>+
>>>+		status = inl(ioaddr + INTSTAT);
>>>+		if (status & EpicNapiEvent) {
>>>+			epic_napi_irq_off(dev, ep);
>>>+			goto rx_action;
>>>+		}
>>
>>Need to add a netif_running() check to the 'if' test at the top of the
>>quote.
>>
>>Are you (or somebody else?) interested in reviewing all the in-tree
>>NAPI drivers, and seeing if other drivers have this bug?  I think
>>8139cp.c does at least, maybe e100 too...  Such a fix would need to go
>>into 2.4.x as well.
> 
> 
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...

Most interrupt routines already test this, look at

static inline int netif_rx_schedule_prep(struct net_device *dev)
{
         return netif_running(dev) &&
                 !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
}

It shouldn't schedule unless the interface is running.

However...  I believe it was you that added this check to 8139cp.c:

         /* close possible race's with dev_close */
         if (unlikely(!netif_running(dev))) {
                 cpw16(IntrMask, 0);
                 goto out;
         }

I like this, because regardless of NAPI, most drivers have 
non-NAPI-related interrupts they must still process.  This check handles 
that.

Although this code is a bit redundant to some of the locking and 
synchronization found in net driver dev->close() methods, I think it is 
a nice thing to do.

I do wonder about the consequences, on some hardware, about receiving an 
interrupt and -not- processing the RX or TX completions associated with 
that.  For most NIC hardware, you'll get sane behavior, but not all, I 
bet...


> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
> 
> After __netif_rx_complete() must not do "goto rx_action", otherwise it

Agreed.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 15:14     ` Jeff Garzik
@ 2004-03-23 16:05       ` OGAWA Hirofumi
  0 siblings, 0 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 16:05 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Francois Romieu, netdev

Jeff Garzik <jgarzik@pobox.com> writes:

> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> 
> Most interrupt routines already test this, look at
> 
> static inline int netif_rx_schedule_prep(struct net_device *dev)
> {
>          return netif_running(dev) &&
>                  !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
> }
> 
> It shouldn't schedule unless the interface is running.

Yes.

> However...  I believe it was you that added this check to 8139cp.c:
>
>          /* close possible race's with dev_close */
>          if (unlikely(!netif_running(dev))) {
>                  cpw16(IntrMask, 0);
>                  goto out;
>          }

Yes, I added. And my suggestion was about this.

Because in case of 8139too, I got too many interrupts about pending RX
during the following, and the following wasn't finished.
(dev->close() wasn't called).

dev_close(),
	clear_bit(__LINK_STATE_START, &dev->state);

	smp_mb__after_clear_bit(); /* Commit netif_running(). */
	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
		/* No hurry. */
		current->state = TASK_INTERRUPTIBLE;
		schedule_timeout(1);
	}


> I do wonder about the consequences, on some hardware, about receiving
> an interrupt and -not- processing the RX or TX completions associated
> with that.  For most NIC hardware, you'll get sane behavior, but not
> all, I bet...

Is this meaning should _not_ receive the interrupt about pending RX/TX?

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-23 15:14     ` Jeff Garzik
@ 2004-03-23 18:51     ` Francois Romieu
  2004-03-23 19:59       ` OGAWA Hirofumi
  1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-23 18:51 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
> 
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().

Color me confused. The lock is supposed to protect against:

CPU1                         CPU2
[poll]
epic_napi_irq_on(dev, ep);
                             [irq handler]
                             if (netif_rx_schedule_prep(dev)) {
                                     epic_napi_irq_off(dev, ep);
                                     __netif_rx_schedule(dev);
                             }
__netif_rx_complete(dev);

-> napi irq are disabled and device is removed from poll list. What will
   prevent it ?

> After __netif_rx_complete() must not do "goto rx_action", otherwise it
> may become cause of twice scheduleing, it should move before spin_lock().

 understand the previous statement as:

+               status = inl(ioaddr + INTSTAT);
+               if (status & EpicNapiEvent) {
+                       epic_napi_irq_off(dev, ep);
+                       goto rx_action;
+
+               spin_lock_irqsave(&ep->napi_lock, flags);
+               epic_napi_irq_on(dev, ep);
+               __netif_rx_complete(dev);
+               spin_unlock_irqrestore(&ep->napi_lock, flags);
+

Afaiu, if some data comes in just before the spin_lock, it may wait for ages.

Can you reformulate as I feel I still did not get it right.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 18:51     ` Francois Romieu
@ 2004-03-23 19:59       ` OGAWA Hirofumi
  2004-03-24  0:41         ` Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 19:59 UTC (permalink / raw)
  To: Francois Romieu; +Cc: Jeff Garzik, netdev

Francois Romieu <romieu@fr.zoreil.com> writes:

> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
> [...]
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> > 
> > BTW, ->napi_lock is unneeded because netif_schedule() is already
> > atomic, it need only local_irq_enable/disable().
> 
> Color me confused. The lock is supposed to protect against:
> 
> CPU1                         CPU2
> [poll]
> epic_napi_irq_on(dev, ep);
>                              [irq handler]
>                              if (netif_rx_schedule_prep(dev)) {
>                                      epic_napi_irq_off(dev, ep);
>                                      __netif_rx_schedule(dev);
>                              }
> __netif_rx_complete(dev);
> 
> -> napi irq are disabled and device is removed from poll list. What will
>    prevent it ?

__LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
So netif_rx_schedule_prep() returns 0.

> > After __netif_rx_complete() must not do "goto rx_action", otherwise it
> > may become cause of twice scheduleing, it should move before spin_lock().
> 
>  understand the previous statement as:
> 
> +               status = inl(ioaddr + INTSTAT);
> +               if (status & EpicNapiEvent) {
> +                       epic_napi_irq_off(dev, ep);
> +                       goto rx_action;
> +
> +               spin_lock_irqsave(&ep->napi_lock, flags);
> +               epic_napi_irq_on(dev, ep);
> +               __netif_rx_complete(dev);
> +               spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> 
> Afaiu, if some data comes in just before the spin_lock, it may wait for ages.

Yes, maybe. But, if after spin_lock, it loop may call the twice
__netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
It will leaks the dev->refcnt, I think.

> +               __netif_rx_complete(dev);
> +               spin_unlock_irqrestore(&ep->napi_lock, flags);
           -- interrupt and call __netif_rx_schedule() --
> +               status = inl(ioaddr + INTSTAT);
> +               if (status & EpicNapiEvent) {
> +                       epic_napi_irq_off(dev, ep);
> +                       goto rx_action;

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 19:59       ` OGAWA Hirofumi
@ 2004-03-24  0:41         ` Francois Romieu
  2004-03-24  2:52           ` OGAWA Hirofumi
  2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
  0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24  0:41 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> > -> napi irq are disabled and device is removed from poll list. What will
> >    prevent it ?
> 
> __LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
> So netif_rx_schedule_prep() returns 0.

Ok, thanks for the explanation. It is possible that the lock stays anyway (see
below).

[...]
> > Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
> 
> Yes, maybe. But, if after spin_lock, it loop may call the twice
> __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> It will leaks the dev->refcnt, I think.

@$*#!zW

The following patch should avoid the leak as well as the packet rot
(untested, 1:33 AM, apply on top of previous serie).


Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.


 drivers/net/epic100.c |   26 +++++++++++++-------------
 1 files changed, 13 insertions(+), 13 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-24 01:19:35.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
 	spinlock_t napi_lock;
+	unsigned int reschedule_in_poll;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
 	dev->base_addr = ioaddr;
 	dev->irq = irq;
 
-	spin_lock_init (&ep->lock);
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
+	ep->reschedule_in_poll = 0;
 
 	/* Bring the chip out of low-power mode. */
 	outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	spin_lock_init(&ep->lock);
-	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
 	}
 }
 
-
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,12 +1177,13 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & EpicNapiEvent) {
+		if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 			spin_lock(&ep->napi_lock);
 			if (netif_rx_schedule_prep(dev)) {
 				epic_napi_irq_off(dev, ep);
 				__netif_rx_schedule(dev);
-			}
+			} else
+				ep->reschedule_in_poll++;
 			spin_unlock(&ep->napi_lock);
 		}
 
@@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
 
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
 
-rx_action:
 	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
 	epic_tx(dev, ep);
@@ -1369,18 +1369,18 @@ rx_action:
 
 	if (netif_running(dev) && (work_done < orig_budget)) {
 		unsigned long flags;
-		int status;
 
-		spin_lock_irqsave(&ep->napi_lock, flags);
 		epic_napi_irq_on(dev, ep);
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
 		__netif_rx_complete(dev);
-		spin_unlock_irqrestore(&ep->napi_lock, flags);
 
-		status = inl(ioaddr + INTSTAT);
-		if (status & EpicNapiEvent) {
+		if (ep->reschedule_in_poll) {
 			epic_napi_irq_off(dev, ep);
-			goto rx_action;
+			__netif_rx_schedule(dev);
+			ep->reschedule_in_poll--;
 		}
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
 	}
 
 	return (work_done >= orig_budget);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-24  0:41         ` Francois Romieu
@ 2004-03-24  2:52           ` OGAWA Hirofumi
  2004-03-24 12:33             ` Francois Romieu
  2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
  1 sibling, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-24  2:52 UTC (permalink / raw)
  To: Francois Romieu; +Cc: Jeff Garzik, netdev

Francois Romieu <romieu@fr.zoreil.com> writes:

> > Yes, maybe. But, if after spin_lock, it loop may call the twice
> > __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> > It will leaks the dev->refcnt, I think.
> 
> @$*#!zW
> 
> The following patch should avoid the leak as well as the packet rot
> (untested, 1:33 AM, apply on top of previous serie).
> 
> 
> Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
> epic_poll loops over __netif_rx_complete() leads to serious device
> refcount leak.

Do you care the lost interrupt? If so, I was miss reading it.

IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
signal until the driver clears the pending interrupt. No?
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-24  2:52           ` OGAWA Hirofumi
@ 2004-03-24 12:33             ` Francois Romieu
  0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 12:33 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Do you care the lost interrupt? If so, I was miss reading it.
> 
> IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
> signal until the driver clears the pending interrupt. No?

<insert usual "correct me if I'm wrong" disclaimer here>

The driver only masks the interruptions which are napi related so
epic_poll() and epic_interrupt() are always racing. I completely agree that
it "wastes" the nice behavior of level-triggered irq. If one wants to avoid
the race, everything should go to the poll() handler. It implies polled
access to the INTSTAT register (as done in epic_rx_err).

I could not find in the archives some general napi-wisdom which suggests
that everything has to go to the poll() handler, be it for simplicity or
for a real gain. So I hesitate to follow the other way and exchange the
polled access for some (locked) traffic between the poll() and the irq
handler.

Comments welcome.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] 2.6.5-rc2 - more epic100 napi
  2004-03-24  0:41         ` Francois Romieu
  2004-03-24  2:52           ` OGAWA Hirofumi
@ 2004-03-25  0:27           ` Francois Romieu
  1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-25  0:27 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: OGAWA Hirofumi, netdev

Francois Romieu <romieu@fr.zoreil.com> :
[...]
> diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
> --- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
> +++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-24 01:19:35.000000000 +0100
> @@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
>  
>  	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
>  
> -rx_action:
>  	outl(EpicNapiEvent, ioaddr + INTSTAT);
>  
>  	epic_tx(dev, ep);
> @@ -1369,18 +1369,18 @@ rx_action:
>  
>  	if (netif_running(dev) && (work_done < orig_budget)) {
>  		unsigned long flags;
> -		int status;
>  
> -		spin_lock_irqsave(&ep->napi_lock, flags);
>  		epic_napi_irq_on(dev, ep);
> +
> +		spin_lock_irqsave(&ep->napi_lock, flags);
>  		__netif_rx_complete(dev);
> -		spin_unlock_irqrestore(&ep->napi_lock, flags);
>  
> -		status = inl(ioaddr + INTSTAT);
> -		if (status & EpicNapiEvent) {
> +		if (ep->reschedule_in_poll) {
>  			epic_napi_irq_off(dev, ep);
> -			goto rx_action;
> +			__netif_rx_schedule(dev);
                        ^^^^^^^^^^^^^^^^^^^^^^^^^

While in poll() handler, brilliant :o(

Please apply (tested) patch below on top of the acked patches (i.e 1/4...4/4):
- issuing commands via the serial console under an incoming stream of 40k
  short sized pps sucks but it is possible;
- does not leak refcount (it rmmods fine).

Next step: identify the best performer amongst the previously discussed changes.


Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.


 drivers/net/epic100.c |   33 ++++++++++++++++++---------------
 1 files changed, 18 insertions(+), 15 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-25 00:51:30.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
 	spinlock_t napi_lock;
+	unsigned int reschedule_in_poll;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
 	dev->base_addr = ioaddr;
 	dev->irq = irq;
 
-	spin_lock_init (&ep->lock);
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
+	ep->reschedule_in_poll = 0;
 
 	/* Bring the chip out of low-power mode. */
 	outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	spin_lock_init(&ep->lock);
-	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
 	}
 }
 
-
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,14 +1177,16 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & EpicNapiEvent) {
+		if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 			spin_lock(&ep->napi_lock);
 			if (netif_rx_schedule_prep(dev)) {
 				epic_napi_irq_off(dev, ep);
 				__netif_rx_schedule(dev);
-			}
+			} else
+				ep->reschedule_in_poll++;
 			spin_unlock(&ep->napi_lock);
 		}
+		status &= ~EpicNapiEvent;
 
 		/* Check uncommon events all at once. */
 		if (status &
@@ -1211,7 +1213,7 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
-		if (status & EpicNormalEvent)
+		if (!(status & EpicNormalEvent))
 			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
@@ -1356,7 +1358,6 @@ static int epic_poll(struct net_device *
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
 
 rx_action:
-	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
 	epic_tx(dev, ep);
 
@@ -1369,18 +1370,20 @@ rx_action:
 
 	if (netif_running(dev) && (work_done < orig_budget)) {
 		unsigned long flags;
-		int status;
 
 		spin_lock_irqsave(&ep->napi_lock, flags);
-		epic_napi_irq_on(dev, ep);
-		__netif_rx_complete(dev);
-		spin_unlock_irqrestore(&ep->napi_lock, flags);
 
-		status = inl(ioaddr + INTSTAT);
-		if (status & EpicNapiEvent) {
-			epic_napi_irq_off(dev, ep);
+		if (ep->reschedule_in_poll) {
+			ep->reschedule_in_poll--;
+			spin_unlock_irqrestore(&ep->napi_lock, flags);
 			goto rx_action;
 		}
+
+		outl(EpicNapiEvent, ioaddr + INTSTAT);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
 	}
 
 	return (work_done >= orig_budget);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2004-03-25  0:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47   ` Francois Romieu
2004-03-23 14:29   ` OGAWA Hirofumi
2004-03-23 15:14     ` Jeff Garzik
2004-03-23 16:05       ` OGAWA Hirofumi
2004-03-23 18:51     ` Francois Romieu
2004-03-23 19:59       ` OGAWA Hirofumi
2004-03-24  0:41         ` Francois Romieu
2004-03-24  2:52           ` OGAWA Hirofumi
2004-03-24 12:33             ` Francois Romieu
2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
2004-03-22 23:53         ` [PATCH 4/4] " Francois Romieu
2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).