[PATCH] [RFT] 2.6.4 - epic100 napi

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] [RFT] 2.6.4 - epic100 napi
@ 2004-03-20 14:21 Francois Romieu
  2004-03-21 18:24 ` Jeff Garzik
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-20 14:21 UTC (permalink / raw)
  To: netdev, Jeff Garzik

People are welcome to report how the following patch behaves on their
hardware. It does not seem too bad here but it probably is still a bit rough.
A split version of the patch will follow tomorrow. This one definitely aims
at brave and/or bored testers.

The driver lacks ethtool support. Badly. :o/

--- linux-2.6.4/drivers/net/epic100.c	2004-03-20 14:52:08.000000000 +0100
+++ linux-2.6.4/drivers/net/epic100.c	2004-03-20 14:52:13.000000000 +0100
@@ -96,9 +96,9 @@ static int rx_copybreak;
    Making the Tx ring too large decreases the effectiveness of channel
    bonding and packet priority.
    There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE	16
+#define TX_RING_SIZE	256
 #define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
-#define RX_RING_SIZE	32
+#define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
 
@@ -292,6 +292,10 @@ enum CommandBits {
 	StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
 };
 
+#define EpicNapiEvent	(TxEmpty | TxDone | \
+			 RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent	(0x0000ffffUL & ~EpicNapiEvent)
+
 static u16 media2miictl[16] = {
 	0, 0x0C00, 0x0C00, 0x2000,  0x0100, 0x2100, 0, 0,
 	0, 0, 0, 0,  0, 0, 0, 0 };
@@ -330,9 +334,11 @@ struct epic_private {
 
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
+	spinlock_t napi_lock;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
+	u32 irq_mask;
 	unsigned int rx_buf_sz;				/* Based on MTU+slack. */
 
 	struct pci_dev *pci_dev;			/* PCI bus location. */
@@ -359,7 +365,8 @@ static void epic_timer(unsigned long dat
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static struct ethtool_ops netdev_ethtool_ops;
@@ -378,7 +385,7 @@ static int __devinit epic_init_one (stru
 	int irq;
 	struct net_device *dev;
 	struct epic_private *ep;
-	int i, option = 0, duplex = 0;
+	int i, ret, option = 0, duplex = 0;
 	void *ring_space;
 	dma_addr_t ring_dma;
 
@@ -392,29 +399,33 @@ static int __devinit epic_init_one (stru
 	
 	card_idx++;
 	
-	i = pci_enable_device(pdev);
-	if (i)
-		return i;
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto out;
 	irq = pdev->irq;
 
 	if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
 		printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_out_disable;
 	}
 	
 	pci_set_master(pdev);
 
+	ret = pci_request_regions(pdev, DRV_NAME);
+	if (ret < 0)
+		goto err_out_disable;
+
+	ret = -ENOMEM;
+
 	dev = alloc_etherdev(sizeof (*ep));
 	if (!dev) {
 		printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
-		return -ENOMEM;
+		goto err_out_free_res;
 	}
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_request_regions(pdev, DRV_NAME))
-		goto err_out_free_netdev;
-
 #ifdef USE_IO_OPS
 	ioaddr = pci_resource_start (pdev, 0);
 #else
@@ -422,7 +433,7 @@ static int __devinit epic_init_one (stru
 	ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
 	if (!ioaddr) {
 		printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
-		goto err_out_free_res;
+		goto err_out_free_netdev;
 	}
 #endif
 
@@ -489,6 +500,9 @@ static int __devinit epic_init_one (stru
 	ep->pci_dev = pdev;
 	ep->chip_id = chip_idx;
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+	ep->irq_mask = 
+		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
+		 | CntFull | TxUnderrun | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -543,10 +557,12 @@ static int __devinit epic_init_one (stru
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
+	dev->poll = epic_poll;
+	dev->weight = 64;
 
-	i = register_netdev(dev);
-	if (i)
-		goto err_out_unmap_tx;
+	ret = register_netdev(dev);
+	if (ret < 0)
+		goto err_out_unmap_rx;
 
 	printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
 		   dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +570,24 @@ static int __devinit epic_init_one (stru
 		printk("%2.2x:", dev->dev_addr[i]);
 	printk("%2.2x.\n", dev->dev_addr[i]);
 
-	return 0;
+out:
+	return ret;
 
+err_out_unmap_rx:
+	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
 err_out_unmap_tx:
 	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
 err_out_iounmap:
 #ifndef USE_IO_OPS
 	iounmap(ioaddr);
-err_out_free_res:
-#endif
-	pci_release_regions(pdev);
 err_out_free_netdev:
+#endif
 	free_netdev(dev);
-	return -ENODEV;
+err_out_free_res:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	goto out;
 }
 \f
 /* Serial EEPROM section. */
@@ -592,6 +613,36 @@ err_out_free_netdev:
 #define EE_READ256_CMD	(6 << 8)
 #define EE_ERASE_CMD	(7 << 6)
 
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(0x00000000, ioaddr + INTMASK);
+}
+
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+	inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+	__epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	/* No need to commit possible posted write */
+	outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -753,8 +804,7 @@ static int epic_open(struct net_device *
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -795,7 +845,7 @@ static void epic_pause(struct net_device
 	}
 
 	/* Remove the packets on the Rx queue. */
-	epic_rx(dev);
+	epic_rx(dev, RX_RING_SIZE);
 }
 
 static void epic_restart(struct net_device *dev)
@@ -842,7 +892,7 @@ static void epic_restart(struct net_devi
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
+		 | RxError | EpicNapiEvent,
 		 ioaddr + INTMASK);
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
 		   " interrupt %4.4x.\n",
@@ -929,7 +979,8 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1029,6 +1080,77 @@ static int epic_start_xmit(struct sk_buf
 	return 0;
 }
 
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+			  int status)
+{
+	struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+	/* There was an major error, log it. */
+	if (debug > 1)
+		printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+		       dev->name, status);
+#endif
+	stats->tx_errors++;
+	if (status & 0x1050)
+		stats->tx_aborted_errors++;
+	if (status & 0x0008)
+		stats->tx_carrier_errors++;
+	if (status & 0x0040)
+		stats->tx_window_errors++;
+	if (status & 0x0010)
+		stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+	unsigned int dirty_tx, cur_tx;
+
+	/*
+	 * Note: if this lock becomes a problem we can narrow the locked
+	 * region at the cost of occasionally grabbing the lock more times.
+	 */
+	cur_tx = ep->cur_tx;
+	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+		struct sk_buff *skb;
+		int entry = dirty_tx % TX_RING_SIZE;
+		int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+		if (txstatus & DescOwn)
+			break;	/* It still hasn't been Txed */
+
+		if (likely(txstatus & 0x0001)) {
+			ep->stats.collisions += (txstatus >> 8) & 15;
+			ep->stats.tx_packets++;
+			ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+		} else
+			epic_tx_error(dev, ep, txstatus);
+
+		/* Free the original skb. */
+		skb = ep->tx_skbuff[entry];
+		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
+				 skb->len, PCI_DMA_TODEVICE);
+		dev_kfree_skb_irq(skb);
+		ep->tx_skbuff[entry] = 0;
+	}
+
+#ifndef final_version
+	if (cur_tx - dirty_tx > TX_RING_SIZE) {
+		printk(KERN_WARNING
+		       "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+		       dev->name, dirty_tx, cur_tx, ep->tx_full);
+		dirty_tx += TX_RING_SIZE;
+	}
+#endif
+	ep->dirty_tx = dirty_tx;
+	if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+		/* The ring is no longer full, allow new TX entries. */
+		ep->tx_full = 0;
+		netif_wake_queue(dev);
+	}
+}
+
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1042,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
 	do {
 		status = inl(ioaddr + INTSTAT);
 		/* Acknowledge all of the current interrupt sources ASAP. */
-		outl(status & 0x00007fff, ioaddr + INTSTAT);
+		outl(status & EpicNormalEvent, ioaddr + INTSTAT);
 
 		if (debug > 4)
 			printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1053,73 +1175,18 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
-			epic_rx(dev);
-
-		if (status & (TxEmpty | TxDone)) {
-			unsigned int dirty_tx, cur_tx;
-
-			/* Note: if this lock becomes a problem we can narrow the locked
-			   region at the cost of occasionally grabbing the lock more
-			   times. */
-			spin_lock(&ep->lock);
-			cur_tx = ep->cur_tx;
-			dirty_tx = ep->dirty_tx;
-			for (; cur_tx - dirty_tx > 0; dirty_tx++) {
-				struct sk_buff *skb;
-				int entry = dirty_tx % TX_RING_SIZE;
-				int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
-				if (txstatus & DescOwn)
-					break;			/* It still hasn't been Txed */
-
-				if ( ! (txstatus & 0x0001)) {
-					/* There was an major error, log it. */
-#ifndef final_version
-					if (debug > 1)
-						printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
-							   dev->name, txstatus);
-#endif
-					ep->stats.tx_errors++;
-					if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
-					if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
-					if (txstatus & 0x0040) ep->stats.tx_window_errors++;
-					if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
-				} else {
-					ep->stats.collisions += (txstatus >> 8) & 15;
-					ep->stats.tx_packets++;
-					ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
-				}
-
-				/* Free the original skb. */
-				skb = ep->tx_skbuff[entry];
-				pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
-						 skb->len, PCI_DMA_TODEVICE);
-				dev_kfree_skb_irq(skb);
-				ep->tx_skbuff[entry] = 0;
+		if (status & EpicNapiEvent) {
+			spin_lock(&ep->napi_lock);
+			if (netif_rx_schedule_prep(dev)) {
+				epic_napi_irq_off(dev, ep);
+				__netif_rx_schedule(dev);
 			}
-
-#ifndef final_version
-			if (cur_tx - dirty_tx > TX_RING_SIZE) {
-				printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
-					   dev->name, dirty_tx, cur_tx, ep->tx_full);
-				dirty_tx += TX_RING_SIZE;
-			}
-#endif
-			ep->dirty_tx = dirty_tx;
-			if (ep->tx_full
-				&& cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
-				/* The ring is no longer full, allow new TX entries. */
-				ep->tx_full = 0;
-				spin_unlock(&ep->lock);
-				netif_wake_queue(dev);
-			} else
-				spin_unlock(&ep->lock);
+			spin_unlock(&ep->napi_lock);
 		}
 
 		/* Check uncommon events all at once. */
-		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
-					  PCIBusErr170 | PCIBusErr175)) {
+		if (status &
+		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
 			if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
 				break;
 			/* Always update the error counts to avoid overhead later. */
@@ -1133,11 +1200,6 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {		/* Missed a Rx frame. */
-				ep->stats.rx_errors++;
-			}
-			if (status & (RxOverflow | RxFull))
-				outw(RxQueued, ioaddr + COMMAND);
 			if (status & PCIBusErr170) {
 				printk(KERN_ERR "%s: PCI Bus Error!  EPIC status %4.4x.\n",
 					   dev->name, status);
@@ -1147,6 +1209,8 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
+		if (!(status & EpicNormalEvent))
+			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
 				   "IntrStatus=0x%8.8x.\n",
@@ -1164,7 +1228,7 @@ static irqreturn_t epic_interrupt(int ir
 	return IRQ_RETVAL(handled);
 }
 
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
 {
 	struct epic_private *ep = dev->priv;
 	int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1174,6 +1238,10 @@ static int epic_rx(struct net_device *de
 	if (debug > 4)
 		printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
 			   ep->rx_ring[entry].rxstatus);
+
+	if (rx_work_limit > budget)
+		rx_work_limit = budget;
+
 	/* If we own the next entry, it's a new packet. Send it up. */
 	while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
 		int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1228,7 +1296,7 @@ static int epic_rx(struct net_device *de
 				ep->rx_skbuff[entry] = NULL;
 			}
 			skb->protocol = eth_type_trans(skb, dev);
-			netif_rx(skb);
+			netif_receive_skb(skb);
 			dev->last_rx = jiffies;
 			ep->stats.rx_packets++;
 			ep->stats.rx_bytes += pkt_len;
@@ -1256,6 +1324,60 @@ static int epic_rx(struct net_device *de
 	return work_done;
 }
 
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+	int status;
+
+	status = inl(ioaddr + INTSTAT);
+
+	if (status == 0xffffffff)
+		return;
+	if (status & RxOverflow) 	/* Missed a Rx frame. */
+		ep->stats.rx_errors++;
+	if (status & (RxOverflow | RxFull))
+		outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+	struct epic_private *ep = dev->priv;
+	int work_done, orig_budget;
+	long ioaddr = dev->base_addr;
+
+	epic_tx(dev, ep);
+
+	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+	outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+	work_done = epic_rx(dev, *budget);
+
+	epic_rx_err(dev, ep);
+
+	*budget -= work_done;
+	dev->quota -= work_done;
+
+	if (work_done < orig_budget) {
+		unsigned long flags;
+		int status;
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+		status = inl(ioaddr + INTSTAT);
+		if (status & EpicNapiEvent) {
+			epic_napi_irq_off(dev, ep);
+			goto rx_action;
+		}
+	}
+
+	return (work_done >= orig_budget);
+}
+
 static int epic_close(struct net_device *dev)
 {
 	long ioaddr = dev->base_addr;
@@ -1270,9 +1392,13 @@ static int epic_close(struct net_device 
 			   dev->name, (int)inl(ioaddr + INTSTAT));
 
 	del_timer_sync(&ep->timer);
-	epic_pause(dev);
+
+	epic_disable_int(dev, ep);
+
 	free_irq(dev->irq, dev);
 
+	epic_pause(dev);
+
 	/* Free all the skbuffs in the Rx queue. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = ep->rx_skbuff[i];
@@ -1470,6 +1596,7 @@ static void __devexit epic_remove_one (s
 #endif
 	pci_release_regions(pdev);
 	free_netdev(dev);
+	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	/* pci_power_off(pdev, -1); */
 }

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
@ 2004-03-21 18:24 ` Jeff Garzik
  2004-03-21 23:47   ` Francois Romieu
  2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  1 sibling, 2 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-21 18:24 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu wrote:
> People are welcome to report how the following patch behaves on their
> hardware. It does not seem too bad here but it probably is still a bit rough.
> A split version of the patch will follow tomorrow. This one definitely aims
> at brave and/or bored testers.

Looks pretty good, but includes a standard NAPI race...

When you split up the patches, I'll throw it into my -netdev tree, which 
means it will be automatically included in -mm for testing (as is r8169 
now).

FWIW Andrew Morton has made me lazy...  I don't bother publishing 
separate -netdev patches anymore, since he automatically downloads my 
netdev-2.6 BK tree before doing each -mm release.

> +	if (work_done < orig_budget) {
> +		unsigned long flags;
> +		int status;
> +
> +		spin_lock_irqsave(&ep->napi_lock, flags);
> +		epic_napi_irq_on(dev, ep);
> +		__netif_rx_complete(dev);
> +		spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> +		status = inl(ioaddr + INTSTAT);
> +		if (status & EpicNapiEvent) {
> +			epic_napi_irq_off(dev, ep);
> +			goto rx_action;
> +		}

Need to add a netif_running() check to the 'if' test at the top of the 
quote.

Are you (or somebody else?) interested in reviewing all the in-tree NAPI 
drivers, and seeing if other drivers have this bug?  I think 8139cp.c 
does at least, maybe e100 too...  Such a fix would need to go into 2.4.x 
as well.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-21 23:47   ` Francois Romieu
  2004-03-23 14:29   ` OGAWA Hirofumi
  1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-21 23:47 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: netdev

Jeff Garzik <jgarzik@pobox.com> :
[...]
> Need to add a netif_running() check to the 'if' test at the top of the 
> quote.
> 
> Are you (or somebody else?) interested in reviewing all the in-tree NAPI 
> drivers, and seeing if other drivers have this bug?  I think 8139cp.c 
> does at least, maybe e100 too...  Such a fix would need to go into 2.4.x 
> as well.

Ok, I'll check for the race against dev_close.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 0/4] 2.6.5-rc2 - epic100 update
  2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
  2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-22 23:50 ` Francois Romieu
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
  2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
  1 sibling, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:50 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik

Schedule:

- epic100-fixup.patch:   opportunistic cleanup
- epic100-napi-00.patch: code shuffling before the move
- epic100-napi-10.patch: rx napi (includes netif_running change)
- epic100-napi-20.patch: minimalistic tx napi

2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
so the patches should apply equally to both.

The driver has been moderately tested. 

Feedback welcome.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] 2.6.5-rc2 - epic100 fixup
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
@ 2004-03-22 23:51   ` Francois Romieu
  2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
  2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
  1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:51 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


- extra pci_disable_device() to balance invocation of pci_enable_device()
  in epic_init_one() (-> error path + epic_remove_one());
- lazy return status in epic_init_one(), tsss...;
- memory dedicated to Rx descriptors was not freed after failure of
  register_netdev() in epic_init_one();
- use of epic_pause() in epic_close() offers a small window for a late
  interruption just before the final free_irq(). Let's close the window to
  avoid two epic_rx() threads racing with each other.


 drivers/net/epic100.c |   59 +++++++++++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 19 deletions(-)

diff -puN drivers/net/epic100.c~epic100-fixup drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-fixup	2004-03-22 22:53:16.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-22 22:53:16.000000000 +0100
@@ -378,7 +378,7 @@ static int __devinit epic_init_one (stru
 	int irq;
 	struct net_device *dev;
 	struct epic_private *ep;
-	int i, option = 0, duplex = 0;
+	int i, ret, option = 0, duplex = 0;
 	void *ring_space;
 	dma_addr_t ring_dma;
 
@@ -392,29 +392,33 @@ static int __devinit epic_init_one (stru
 	
 	card_idx++;
 	
-	i = pci_enable_device(pdev);
-	if (i)
-		return i;
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto out;
 	irq = pdev->irq;
 
 	if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
 		printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_out_disable;
 	}
 	
 	pci_set_master(pdev);
 
+	ret = pci_request_regions(pdev, DRV_NAME);
+	if (ret < 0)
+		goto err_out_disable;
+
+	ret = -ENOMEM;
+
 	dev = alloc_etherdev(sizeof (*ep));
 	if (!dev) {
 		printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
-		return -ENOMEM;
+		goto err_out_free_res;
 	}
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_request_regions(pdev, DRV_NAME))
-		goto err_out_free_netdev;
-
 #ifdef USE_IO_OPS
 	ioaddr = pci_resource_start (pdev, 0);
 #else
@@ -422,7 +426,7 @@ static int __devinit epic_init_one (stru
 	ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
 	if (!ioaddr) {
 		printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
-		goto err_out_free_res;
+		goto err_out_free_netdev;
 	}
 #endif
 
@@ -544,9 +548,9 @@ static int __devinit epic_init_one (stru
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
 
-	i = register_netdev(dev);
-	if (i)
-		goto err_out_unmap_tx;
+	ret = register_netdev(dev);
+	if (ret < 0)
+		goto err_out_unmap_rx;
 
 	printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
 		   dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +558,24 @@ static int __devinit epic_init_one (stru
 		printk("%2.2x:", dev->dev_addr[i]);
 	printk("%2.2x.\n", dev->dev_addr[i]);
 
-	return 0;
+out:
+	return ret;
 
+err_out_unmap_rx:
+	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
 err_out_unmap_tx:
 	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
 err_out_iounmap:
 #ifndef USE_IO_OPS
 	iounmap(ioaddr);
-err_out_free_res:
-#endif
-	pci_release_regions(pdev);
 err_out_free_netdev:
+#endif
 	free_netdev(dev);
-	return -ENODEV;
+err_out_free_res:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	goto out;
 }
 \f
 /* Serial EEPROM section. */
@@ -592,6 +601,13 @@ err_out_free_netdev:
 #define EE_READ256_CMD	(6 << 8)
 #define EE_ERASE_CMD	(7 << 6)
 
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(0x00000000, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -1276,9 +1292,13 @@ static int epic_close(struct net_device 
 			   dev->name, (int)inl(ioaddr + INTSTAT));
 
 	del_timer_sync(&ep->timer);
-	epic_pause(dev);
+
+	epic_disable_int(dev, ep);
+
 	free_irq(dev->irq, dev);
 
+	epic_pause(dev);
+
 	/* Free all the skbuffs in the Rx queue. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = ep->rx_skbuff[i];
@@ -1476,6 +1496,7 @@ static void __devexit epic_remove_one (s
 #endif
 	pci_release_regions(pdev);
 	free_netdev(dev);
+	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	/* pci_power_off(pdev, -1); */
 }

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-22 23:52     ` Francois Romieu
  2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:52 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


Isolate the classical TX part of epic_interrupt. Innocent code shuffling.


 drivers/net/epic100.c |  137 +++++++++++++++++++++++++++-----------------------
 1 files changed, 76 insertions(+), 61 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-00 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-00	2004-03-22 22:53:18.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-22 22:53:18.000000000 +0100
@@ -1045,6 +1045,79 @@ static int epic_start_xmit(struct sk_buf
 	return 0;
 }
 
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+			  int status)
+{
+	struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+	/* There was an major error, log it. */
+	if (debug > 1)
+		printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+		       dev->name, status);
+#endif
+	stats->tx_errors++;
+	if (status & 0x1050)
+		stats->tx_aborted_errors++;
+	if (status & 0x0008)
+		stats->tx_carrier_errors++;
+	if (status & 0x0040)
+		stats->tx_window_errors++;
+	if (status & 0x0010)
+		stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+	unsigned int dirty_tx, cur_tx;
+
+	/*
+	 * Note: if this lock becomes a problem we can narrow the locked
+	 * region at the cost of occasionally grabbing the lock more times.
+	 */
+	spin_lock(&ep->lock);
+	cur_tx = ep->cur_tx;
+	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+		struct sk_buff *skb;
+		int entry = dirty_tx % TX_RING_SIZE;
+		int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+		if (txstatus & DescOwn)
+			break;	/* It still hasn't been Txed */
+
+		if (likely(txstatus & 0x0001)) {
+			ep->stats.collisions += (txstatus >> 8) & 15;
+			ep->stats.tx_packets++;
+			ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+		} else
+			epic_tx_error(dev, ep, txstatus);
+
+		/* Free the original skb. */
+		skb = ep->tx_skbuff[entry];
+		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
+				 skb->len, PCI_DMA_TODEVICE);
+		dev_kfree_skb_irq(skb);
+		ep->tx_skbuff[entry] = 0;
+	}
+
+#ifndef final_version
+	if (cur_tx - dirty_tx > TX_RING_SIZE) {
+		printk(KERN_WARNING
+		       "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+		       dev->name, dirty_tx, cur_tx, ep->tx_full);
+		dirty_tx += TX_RING_SIZE;
+	}
+#endif
+	ep->dirty_tx = dirty_tx;
+	if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+		/* The ring is no longer full, allow new TX entries. */
+		ep->tx_full = 0;
+		netif_wake_queue(dev);
+	}
+	spin_unlock(&ep->lock);
+}
+
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1072,66 +1145,8 @@ static irqreturn_t epic_interrupt(int ir
 		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
 			epic_rx(dev);
 
-		if (status & (TxEmpty | TxDone)) {
-			unsigned int dirty_tx, cur_tx;
-
-			/* Note: if this lock becomes a problem we can narrow the locked
-			   region at the cost of occasionally grabbing the lock more
-			   times. */
-			spin_lock(&ep->lock);
-			cur_tx = ep->cur_tx;
-			dirty_tx = ep->dirty_tx;
-			for (; cur_tx - dirty_tx > 0; dirty_tx++) {
-				struct sk_buff *skb;
-				int entry = dirty_tx % TX_RING_SIZE;
-				int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
-				if (txstatus & DescOwn)
-					break;			/* It still hasn't been Txed */
-
-				if ( ! (txstatus & 0x0001)) {
-					/* There was an major error, log it. */
-#ifndef final_version
-					if (debug > 1)
-						printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
-							   dev->name, txstatus);
-#endif
-					ep->stats.tx_errors++;
-					if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
-					if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
-					if (txstatus & 0x0040) ep->stats.tx_window_errors++;
-					if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
-				} else {
-					ep->stats.collisions += (txstatus >> 8) & 15;
-					ep->stats.tx_packets++;
-					ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
-				}
-
-				/* Free the original skb. */
-				skb = ep->tx_skbuff[entry];
-				pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, 
-						 skb->len, PCI_DMA_TODEVICE);
-				dev_kfree_skb_irq(skb);
-				ep->tx_skbuff[entry] = 0;
-			}
-
-#ifndef final_version
-			if (cur_tx - dirty_tx > TX_RING_SIZE) {
-				printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
-					   dev->name, dirty_tx, cur_tx, ep->tx_full);
-				dirty_tx += TX_RING_SIZE;
-			}
-#endif
-			ep->dirty_tx = dirty_tx;
-			if (ep->tx_full
-				&& cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
-				/* The ring is no longer full, allow new TX entries. */
-				ep->tx_full = 0;
-				spin_unlock(&ep->lock);
-				netif_wake_queue(dev);
-			} else
-				spin_unlock(&ep->lock);
-		}
+		if (status & (TxEmpty | TxDone))
+			epic_tx(dev, ep);
 
 		/* Check uncommon events all at once. */
 		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
@@ -1149,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {		/* Missed a Rx frame. */
+			if (status & RxOverflow) {	/* Missed a Rx frame. */
 				ep->stats.rx_errors++;
 			}
 			if (status & (RxOverflow | RxFull))

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
@ 2004-03-22 23:53       ` Francois Romieu
  2004-03-22 23:53         ` [PATCH 4/4] " Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


RX NAPI.


 drivers/net/epic100.c |  137 ++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 116 insertions(+), 21 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-10 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-10	2004-03-22 22:53:19.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-23 00:18:33.000000000 +0100
@@ -98,7 +98,7 @@ static int rx_copybreak;
    There are no ill effects from too-large receive rings. */
 #define TX_RING_SIZE	16
 #define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
-#define RX_RING_SIZE	32
+#define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
 
@@ -292,6 +292,11 @@ enum CommandBits {
 	StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
 };
 
+#define EpicRemoved	0xffffffff	/* Chip failed or removed (CardBus) */
+
+#define EpicNapiEvent	(RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent	(0x0000ffff & ~EpicNapiEvent)
+
 static u16 media2miictl[16] = {
 	0, 0x0C00, 0x0C00, 0x2000,  0x0100, 0x2100, 0, 0,
 	0, 0, 0, 0,  0, 0, 0, 0 };
@@ -330,9 +335,11 @@ struct epic_private {
 
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
+	spinlock_t napi_lock;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
+	u32 irq_mask;
 	unsigned int rx_buf_sz;				/* Based on MTU+slack. */
 
 	struct pci_dev *pci_dev;			/* PCI bus location. */
@@ -359,7 +366,8 @@ static void epic_timer(unsigned long dat
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static struct ethtool_ops netdev_ethtool_ops;
@@ -493,6 +501,9 @@ static int __devinit epic_init_one (stru
 	ep->pci_dev = pdev;
 	ep->chip_id = chip_idx;
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+	ep->irq_mask = 
+		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
+		 | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -547,6 +558,8 @@ static int __devinit epic_init_one (stru
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
+	dev->poll = epic_poll;
+	dev->weight = 64;
 
 	ret = register_netdev(dev);
 	if (ret < 0)
@@ -608,6 +621,29 @@ static void epic_disable_int(struct net_
 	outl(0x00000000, ioaddr + INTMASK);
 }
 
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+	inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+	__epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+
+	/* No need to commit possible posted write */
+	outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
 static int __devinit read_eeprom(long ioaddr, int location)
 {
 	int i;
@@ -769,8 +805,7 @@ static int epic_open(struct net_device *
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -811,7 +846,7 @@ static void epic_pause(struct net_device
 	}
 
 	/* Remove the packets on the Rx queue. */
-	epic_rx(dev);
+	epic_rx(dev, RX_RING_SIZE);
 }
 
 static void epic_restart(struct net_device *dev)
@@ -858,8 +893,8 @@ static void epic_restart(struct net_devi
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
 		 | CntFull | TxUnderrun | TxDone | TxEmpty
-		 | RxError | RxOverflow | RxFull | RxHeader | RxDone,
-		 ioaddr + INTMASK);
+		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
+
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
 		   " interrupt %4.4x.\n",
 		   dev->name, (int)inl(ioaddr + COMMAND), (int)inl(ioaddr + GENCTL),
@@ -945,7 +980,8 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1131,7 +1167,7 @@ static irqreturn_t epic_interrupt(int ir
 	do {
 		status = inl(ioaddr + INTSTAT);
 		/* Acknowledge all of the current interrupt sources ASAP. */
-		outl(status & 0x00007fff, ioaddr + INTSTAT);
+		outl(status & EpicNormalEvent, ioaddr + INTSTAT);
 
 		if (debug > 4)
 			printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1142,16 +1178,22 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
-			epic_rx(dev);
+		if (status & EpicNapiEvent) {
+			spin_lock(&ep->napi_lock);
+			if (netif_rx_schedule_prep(dev)) {
+				epic_napi_irq_off(dev, ep);
+				__netif_rx_schedule(dev);
+			}
+			spin_unlock(&ep->napi_lock);
+		}
 
 		if (status & (TxEmpty | TxDone))
 			epic_tx(dev, ep);
 
 		/* Check uncommon events all at once. */
-		if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
-					  PCIBusErr170 | PCIBusErr175)) {
-			if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
+		if (status &
+		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
+			if (status == EpicRemoved)
 				break;
 			/* Always update the error counts to avoid overhead later. */
 			ep->stats.rx_missed_errors += inb(ioaddr + MPCNT);
@@ -1164,11 +1206,6 @@ static irqreturn_t epic_interrupt(int ir
 				/* Restart the transmit process. */
 				outl(RestartTx, ioaddr + COMMAND);
 			}
-			if (status & RxOverflow) {	/* Missed a Rx frame. */
-				ep->stats.rx_errors++;
-			}
-			if (status & (RxOverflow | RxFull))
-				outw(RxQueued, ioaddr + COMMAND);
 			if (status & PCIBusErr170) {
 				printk(KERN_ERR "%s: PCI Bus Error!  EPIC status %4.4x.\n",
 					   dev->name, status);
@@ -1178,6 +1215,8 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
+		if (status & EpicNormalEvent)
+			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
 				   "IntrStatus=0x%8.8x.\n",
@@ -1195,7 +1234,7 @@ static irqreturn_t epic_interrupt(int ir
 	return IRQ_RETVAL(handled);
 }
 
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
 {
 	struct epic_private *ep = dev->priv;
 	int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1205,6 +1244,10 @@ static int epic_rx(struct net_device *de
 	if (debug > 4)
 		printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
 			   ep->rx_ring[entry].rxstatus);
+
+	if (rx_work_limit > budget)
+		rx_work_limit = budget;
+
 	/* If we own the next entry, it's a new packet. Send it up. */
 	while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
 		int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1265,7 +1308,7 @@ static int epic_rx(struct net_device *de
 				ep->rx_skbuff[entry] = NULL;
 			}
 			skb->protocol = eth_type_trans(skb, dev);
-			netif_rx(skb);
+			netif_receive_skb(skb);
 			dev->last_rx = jiffies;
 			ep->stats.rx_packets++;
 			ep->stats.rx_bytes += pkt_len;
@@ -1293,6 +1336,58 @@ static int epic_rx(struct net_device *de
 	return work_done;
 }
 
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+	long ioaddr = dev->base_addr;
+	int status;
+
+	status = inl(ioaddr + INTSTAT);
+
+	if (status == EpicRemoved)
+		return;
+	if (status & RxOverflow) 	/* Missed a Rx frame. */
+		ep->stats.rx_errors++;
+	if (status & (RxOverflow | RxFull))
+		outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+	struct epic_private *ep = dev->priv;
+	int work_done, orig_budget;
+	long ioaddr = dev->base_addr;
+
+	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+	outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+	work_done = epic_rx(dev, *budget);
+
+	epic_rx_err(dev, ep);
+
+	*budget -= work_done;
+	dev->quota -= work_done;
+
+	if (netif_running(dev) && (work_done < orig_budget)) {
+		unsigned long flags;
+		int status;
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+		status = inl(ioaddr + INTSTAT);
+		if (status & EpicNapiEvent) {
+			epic_napi_irq_off(dev, ep);
+			goto rx_action;
+		}
+	}
+
+	return (work_done >= orig_budget);
+}
+
 static int epic_close(struct net_device *dev)
 {
 	long ioaddr = dev->base_addr;

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/4] 2.6.5-rc2 - epic100 napi
  2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
@ 2004-03-22 23:53         ` Francois Romieu
  0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik


TX NAPI.


 drivers/net/epic100.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-20 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-20	2004-03-23 00:18:40.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-23 00:18:40.000000000 +0100
@@ -96,8 +96,8 @@ static int rx_copybreak;
    Making the Tx ring too large decreases the effectiveness of channel
    bonding and packet priority.
    There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE	16
-#define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
+#define TX_RING_SIZE	256
+#define TX_QUEUE_LEN	240		/* Limit ring entries actually used.  */
 #define RX_RING_SIZE	256
 #define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct epic_tx_desc)
 #define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -294,7 +294,8 @@ enum CommandBits {
 
 #define EpicRemoved	0xffffffff	/* Chip failed or removed (CardBus) */
 
-#define EpicNapiEvent	(RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNapiEvent	(TxEmpty | TxDone | \
+			 RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
 #define EpicNormalEvent	(0x0000ffff & ~EpicNapiEvent)
 
 static u16 media2miictl[16] = {
@@ -503,7 +504,7 @@ static int __devinit epic_init_one (stru
 	ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
 	ep->irq_mask = 
 		(ep->chip_flags & TYPE2_INTR ?  PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
+		 | CntFull | TxUnderrun | EpicNapiEvent;
 
 	/* Find the connected MII xcvrs.
 	   Doing this in open() would allow detecting external xcvrs later, but
@@ -804,7 +805,7 @@ static int epic_open(struct net_device *
 
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty
+		 | CntFull | TxUnderrun 
 		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	if (debug > 1)
@@ -892,7 +893,7 @@ static void epic_restart(struct net_devi
 
 	/* Enable interrupts by setting the interrupt mask. */
 	outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
-		 | CntFull | TxUnderrun | TxDone | TxEmpty
+		 | CntFull | TxUnderrun
 		 | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
 
 	printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
@@ -1111,7 +1112,6 @@ static void epic_tx(struct net_device *d
 	 * Note: if this lock becomes a problem we can narrow the locked
 	 * region at the cost of occasionally grabbing the lock more times.
 	 */
-	spin_lock(&ep->lock);
 	cur_tx = ep->cur_tx;
 	for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
 		struct sk_buff *skb;
@@ -1150,7 +1150,6 @@ static void epic_tx(struct net_device *d
 		ep->tx_full = 0;
 		netif_wake_queue(dev);
 	}
-	spin_unlock(&ep->lock);
 }
 
 
@@ -1187,9 +1186,6 @@ static irqreturn_t epic_interrupt(int ir
 			spin_unlock(&ep->napi_lock);
 		}
 
-		if (status & (TxEmpty | TxDone))
-			epic_tx(dev, ep);
-
 		/* Check uncommon events all at once. */
 		if (status &
 		    (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
@@ -1362,6 +1358,8 @@ static int epic_poll(struct net_device *
 rx_action:
 	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
+	epic_tx(dev, ep);
+
 	work_done = epic_rx(dev, *budget);
 
 	epic_rx_err(dev, ep);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/4] 2.6.5-rc2 - epic100 update
  2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
  2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-23  0:12   ` Jeff Garzik
  1 sibling, 0 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23  0:12 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev, Andrew Morton

Francois Romieu wrote:
> Schedule:
> 
> - epic100-fixup.patch:   opportunistic cleanup
> - epic100-napi-00.patch: code shuffling before the move
> - epic100-napi-10.patch: rx napi (includes netif_running change)
> - epic100-napi-20.patch: minimalistic tx napi
> 
> 2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
> so the patches should apply equally to both.
> 
> The driver has been moderately tested. 

Thanks.

Applied to my netdev-2.6 queue, and so it should automatically appear in 
Andrew's -mm tree soon.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-21 18:24 ` Jeff Garzik
  2004-03-21 23:47   ` Francois Romieu
@ 2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-23 15:14     ` Jeff Garzik
  2004-03-23 18:51     ` Francois Romieu
  1 sibling, 2 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 14:29 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Francois Romieu, netdev

Jeff Garzik <jgarzik@pobox.com> writes:

> > +	if (work_done < orig_budget) {
> > +		unsigned long flags;
> > +		int status;
> > +
> > +		spin_lock_irqsave(&ep->napi_lock, flags);
> > +		epic_napi_irq_on(dev, ep);
> > +		__netif_rx_complete(dev);
> > +		spin_unlock_irqrestore(&ep->napi_lock, flags);
> > +
> > +		status = inl(ioaddr + INTSTAT);
> > +		if (status & EpicNapiEvent) {
> > +			epic_napi_irq_off(dev, ep);
> > +			goto rx_action;
> > +		}
> 
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
> 
> Are you (or somebody else?) interested in reviewing all the in-tree
> NAPI drivers, and seeing if other drivers have this bug?  I think
> 8139cp.c does at least, maybe e100 too...  Such a fix would need to go
> into 2.4.x as well.

Umm.. the above code is part of ->poll(). I think xxx_interrut() need
netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
flag...

BTW, ->napi_lock is unneeded because netif_schedule() is already
atomic, it need only local_irq_enable/disable().

After __netif_rx_complete() must not do "goto rx_action", otherwise it
may become cause of twice scheduleing, it should move before spin_lock().

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 14:29   ` OGAWA Hirofumi
@ 2004-03-23 15:14     ` Jeff Garzik
  2004-03-23 16:05       ` OGAWA Hirofumi
  2004-03-23 18:51     ` Francois Romieu
  1 sibling, 1 reply; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 15:14 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Francois Romieu, netdev

OGAWA Hirofumi wrote:
> Jeff Garzik <jgarzik@pobox.com> writes:
> 
> 
>>>+	if (work_done < orig_budget) {
>>>+		unsigned long flags;
>>>+		int status;
>>>+
>>>+		spin_lock_irqsave(&ep->napi_lock, flags);
>>>+		epic_napi_irq_on(dev, ep);
>>>+		__netif_rx_complete(dev);
>>>+		spin_unlock_irqrestore(&ep->napi_lock, flags);
>>>+
>>>+		status = inl(ioaddr + INTSTAT);
>>>+		if (status & EpicNapiEvent) {
>>>+			epic_napi_irq_off(dev, ep);
>>>+			goto rx_action;
>>>+		}
>>
>>Need to add a netif_running() check to the 'if' test at the top of the
>>quote.
>>
>>Are you (or somebody else?) interested in reviewing all the in-tree
>>NAPI drivers, and seeing if other drivers have this bug?  I think
>>8139cp.c does at least, maybe e100 too...  Such a fix would need to go
>>into 2.4.x as well.
> 
> 
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...

Most interrupt routines already test this, look at

static inline int netif_rx_schedule_prep(struct net_device *dev)
{
         return netif_running(dev) &&
                 !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
}

It shouldn't schedule unless the interface is running.

However...  I believe it was you that added this check to 8139cp.c:

         /* close possible race's with dev_close */
         if (unlikely(!netif_running(dev))) {
                 cpw16(IntrMask, 0);
                 goto out;
         }

I like this, because regardless of NAPI, most drivers have 
non-NAPI-related interrupts they must still process.  This check handles 
that.

Although this code is a bit redundant to some of the locking and 
synchronization found in net driver dev->close() methods, I think it is 
a nice thing to do.

I do wonder about the consequences, on some hardware, about receiving an 
interrupt and -not- processing the RX or TX completions associated with 
that.  For most NIC hardware, you'll get sane behavior, but not all, I 
bet...


> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
> 
> After __netif_rx_complete() must not do "goto rx_action", otherwise it

Agreed.

	Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 15:14     ` Jeff Garzik
@ 2004-03-23 16:05       ` OGAWA Hirofumi
  0 siblings, 0 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 16:05 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Francois Romieu, netdev

Jeff Garzik <jgarzik@pobox.com> writes:

> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> 
> Most interrupt routines already test this, look at
> 
> static inline int netif_rx_schedule_prep(struct net_device *dev)
> {
>          return netif_running(dev) &&
>                  !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
> }
> 
> It shouldn't schedule unless the interface is running.

Yes.

> However...  I believe it was you that added this check to 8139cp.c:
>
>          /* close possible race's with dev_close */
>          if (unlikely(!netif_running(dev))) {
>                  cpw16(IntrMask, 0);
>                  goto out;
>          }

Yes, I added. And my suggestion was about this.

Because in case of 8139too, I got too many interrupts about pending RX
during the following, and the following wasn't finished.
(dev->close() wasn't called).

dev_close(),
	clear_bit(__LINK_STATE_START, &dev->state);

	smp_mb__after_clear_bit(); /* Commit netif_running(). */
	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
		/* No hurry. */
		current->state = TASK_INTERRUPTIBLE;
		schedule_timeout(1);
	}


> I do wonder about the consequences, on some hardware, about receiving
> an interrupt and -not- processing the RX or TX completions associated
> with that.  For most NIC hardware, you'll get sane behavior, but not
> all, I bet...

Is this meaning should _not_ receive the interrupt about pending RX/TX?

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 14:29   ` OGAWA Hirofumi
  2004-03-23 15:14     ` Jeff Garzik
@ 2004-03-23 18:51     ` Francois Romieu
  2004-03-23 19:59       ` OGAWA Hirofumi
  1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-23 18:51 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
> 
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().

Color me confused. The lock is supposed to protect against:

CPU1                         CPU2
[poll]
epic_napi_irq_on(dev, ep);
                             [irq handler]
                             if (netif_rx_schedule_prep(dev)) {
                                     epic_napi_irq_off(dev, ep);
                                     __netif_rx_schedule(dev);
                             }
__netif_rx_complete(dev);

-> napi irq are disabled and device is removed from poll list. What will
   prevent it ?

> After __netif_rx_complete() must not do "goto rx_action", otherwise it
> may become cause of twice scheduleing, it should move before spin_lock().

 understand the previous statement as:

+               status = inl(ioaddr + INTSTAT);
+               if (status & EpicNapiEvent) {
+                       epic_napi_irq_off(dev, ep);
+                       goto rx_action;
+
+               spin_lock_irqsave(&ep->napi_lock, flags);
+               epic_napi_irq_on(dev, ep);
+               __netif_rx_complete(dev);
+               spin_unlock_irqrestore(&ep->napi_lock, flags);
+

Afaiu, if some data comes in just before the spin_lock, it may wait for ages.

Can you reformulate as I feel I still did not get it right.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 18:51     ` Francois Romieu
@ 2004-03-23 19:59       ` OGAWA Hirofumi
  2004-03-24  0:41         ` Francois Romieu
  0 siblings, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 19:59 UTC (permalink / raw)
  To: Francois Romieu; +Cc: Jeff Garzik, netdev

Francois Romieu <romieu@fr.zoreil.com> writes:

> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
> [...]
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> > 
> > BTW, ->napi_lock is unneeded because netif_schedule() is already
> > atomic, it need only local_irq_enable/disable().
> 
> Color me confused. The lock is supposed to protect against:
> 
> CPU1                         CPU2
> [poll]
> epic_napi_irq_on(dev, ep);
>                              [irq handler]
>                              if (netif_rx_schedule_prep(dev)) {
>                                      epic_napi_irq_off(dev, ep);
>                                      __netif_rx_schedule(dev);
>                              }
> __netif_rx_complete(dev);
> 
> -> napi irq are disabled and device is removed from poll list. What will
>    prevent it ?

__LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
So netif_rx_schedule_prep() returns 0.

> > After __netif_rx_complete() must not do "goto rx_action", otherwise it
> > may become cause of twice scheduleing, it should move before spin_lock().
> 
>  understand the previous statement as:
> 
> +               status = inl(ioaddr + INTSTAT);
> +               if (status & EpicNapiEvent) {
> +                       epic_napi_irq_off(dev, ep);
> +                       goto rx_action;
> +
> +               spin_lock_irqsave(&ep->napi_lock, flags);
> +               epic_napi_irq_on(dev, ep);
> +               __netif_rx_complete(dev);
> +               spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> 
> Afaiu, if some data comes in just before the spin_lock, it may wait for ages.

Yes, maybe. But, if after spin_lock, it loop may call the twice
__netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
It will leaks the dev->refcnt, I think.

> +               __netif_rx_complete(dev);
> +               spin_unlock_irqrestore(&ep->napi_lock, flags);
           -- interrupt and call __netif_rx_schedule() --
> +               status = inl(ioaddr + INTSTAT);
> +               if (status & EpicNapiEvent) {
> +                       epic_napi_irq_off(dev, ep);
> +                       goto rx_action;

Thanks.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-23 19:59       ` OGAWA Hirofumi
@ 2004-03-24  0:41         ` Francois Romieu
  2004-03-24  2:52           ` OGAWA Hirofumi
  2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
  0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24  0:41 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> > -> napi irq are disabled and device is removed from poll list. What will
> >    prevent it ?
> 
> __LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
> So netif_rx_schedule_prep() returns 0.

Ok, thanks for the explanation. It is possible that the lock stays anyway (see
below).

[...]
> > Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
> 
> Yes, maybe. But, if after spin_lock, it loop may call the twice
> __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> It will leaks the dev->refcnt, I think.

@$*#!zW

The following patch should avoid the leak as well as the packet rot
(untested, 1:33 AM, apply on top of previous serie).


Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.


 drivers/net/epic100.c |   26 +++++++++++++-------------
 1 files changed, 13 insertions(+), 13 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-24 01:19:35.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
 	spinlock_t napi_lock;
+	unsigned int reschedule_in_poll;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
 	dev->base_addr = ioaddr;
 	dev->irq = irq;
 
-	spin_lock_init (&ep->lock);
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
+	ep->reschedule_in_poll = 0;
 
 	/* Bring the chip out of low-power mode. */
 	outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	spin_lock_init(&ep->lock);
-	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
 	}
 }
 
-
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,12 +1177,13 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & EpicNapiEvent) {
+		if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 			spin_lock(&ep->napi_lock);
 			if (netif_rx_schedule_prep(dev)) {
 				epic_napi_irq_off(dev, ep);
 				__netif_rx_schedule(dev);
-			}
+			} else
+				ep->reschedule_in_poll++;
 			spin_unlock(&ep->napi_lock);
 		}
 
@@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
 
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
 
-rx_action:
 	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
 	epic_tx(dev, ep);
@@ -1369,18 +1369,18 @@ rx_action:
 
 	if (netif_running(dev) && (work_done < orig_budget)) {
 		unsigned long flags;
-		int status;
 
-		spin_lock_irqsave(&ep->napi_lock, flags);
 		epic_napi_irq_on(dev, ep);
+
+		spin_lock_irqsave(&ep->napi_lock, flags);
 		__netif_rx_complete(dev);
-		spin_unlock_irqrestore(&ep->napi_lock, flags);
 
-		status = inl(ioaddr + INTSTAT);
-		if (status & EpicNapiEvent) {
+		if (ep->reschedule_in_poll) {
 			epic_napi_irq_off(dev, ep);
-			goto rx_action;
+			__netif_rx_schedule(dev);
+			ep->reschedule_in_poll--;
 		}
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
 	}
 
 	return (work_done >= orig_budget);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-24  0:41         ` Francois Romieu
@ 2004-03-24  2:52           ` OGAWA Hirofumi
  2004-03-24 12:33             ` Francois Romieu
  2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
  1 sibling, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-24  2:52 UTC (permalink / raw)
  To: Francois Romieu; +Cc: Jeff Garzik, netdev

Francois Romieu <romieu@fr.zoreil.com> writes:

> > Yes, maybe. But, if after spin_lock, it loop may call the twice
> > __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> > It will leaks the dev->refcnt, I think.
> 
> @$*#!zW
> 
> The following patch should avoid the leak as well as the packet rot
> (untested, 1:33 AM, apply on top of previous serie).
> 
> 
> Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
> epic_poll loops over __netif_rx_complete() leads to serious device
> refcount leak.

Do you care the lost interrupt? If so, I was miss reading it.

IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
signal until the driver clears the pending interrupt. No?
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
  2004-03-24  2:52           ` OGAWA Hirofumi
@ 2004-03-24 12:33             ` Francois Romieu
  0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 12:33 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Do you care the lost interrupt? If so, I was miss reading it.
> 
> IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
> signal until the driver clears the pending interrupt. No?

<insert usual "correct me if I'm wrong" disclaimer here>

The driver only masks the interruptions which are napi related so
epic_poll() and epic_interrupt() are always racing. I completely agree that
it "wastes" the nice behavior of level-triggered irq. If one wants to avoid
the race, everything should go to the poll() handler. It implies polled
access to the INTSTAT register (as done in epic_rx_err).

I could not find in the archives some general napi-wisdom which suggests
that everything has to go to the poll() handler, be it for simplicity or
for a real gain. So I hesitate to follow the other way and exchange the
polled access for some (locked) traffic between the poll() and the irq
handler.

Comments welcome.

--
Ueimor

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] 2.6.5-rc2 - more epic100 napi
  2004-03-24  0:41         ` Francois Romieu
  2004-03-24  2:52           ` OGAWA Hirofumi
@ 2004-03-25  0:27           ` Francois Romieu
  1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-25  0:27 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: OGAWA Hirofumi, netdev

Francois Romieu <romieu@fr.zoreil.com> :
[...]
> diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
> --- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
> +++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-24 01:19:35.000000000 +0100
> @@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
>  
>  	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
>  
> -rx_action:
>  	outl(EpicNapiEvent, ioaddr + INTSTAT);
>  
>  	epic_tx(dev, ep);
> @@ -1369,18 +1369,18 @@ rx_action:
>  
>  	if (netif_running(dev) && (work_done < orig_budget)) {
>  		unsigned long flags;
> -		int status;
>  
> -		spin_lock_irqsave(&ep->napi_lock, flags);
>  		epic_napi_irq_on(dev, ep);
> +
> +		spin_lock_irqsave(&ep->napi_lock, flags);
>  		__netif_rx_complete(dev);
> -		spin_unlock_irqrestore(&ep->napi_lock, flags);
>  
> -		status = inl(ioaddr + INTSTAT);
> -		if (status & EpicNapiEvent) {
> +		if (ep->reschedule_in_poll) {
>  			epic_napi_irq_off(dev, ep);
> -			goto rx_action;
> +			__netif_rx_schedule(dev);
                        ^^^^^^^^^^^^^^^^^^^^^^^^^

While in poll() handler, brilliant :o(

Please apply (tested) patch below on top of the acked patches (i.e 1/4...4/4):
- issuing commands via the serial console under an incoming stream of 40k
  short sized pps sucks but it is possible;
- does not leak refcount (it rmmods fine).

Next step: identify the best performer amongst the previously discussed changes.


Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.


 drivers/net/epic100.c |   33 ++++++++++++++++++---------------
 1 files changed, 18 insertions(+), 15 deletions(-)

diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30	2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c	2004-03-25 00:51:30.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
 	spinlock_t napi_lock;
+	unsigned int reschedule_in_poll;
 	unsigned int cur_tx, dirty_tx;
 
 	unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
 	dev->base_addr = ioaddr;
 	dev->irq = irq;
 
-	spin_lock_init (&ep->lock);
+	spin_lock_init(&ep->lock);
+	spin_lock_init(&ep->napi_lock);
+	ep->reschedule_in_poll = 0;
 
 	/* Bring the chip out of low-power mode. */
 	outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
 	int i;
 
 	ep->tx_full = 0;
-	spin_lock_init(&ep->lock);
-	spin_lock_init(&ep->napi_lock);
 	ep->dirty_tx = ep->cur_tx = 0;
 	ep->cur_rx = ep->dirty_rx = 0;
 	ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
 	}
 }
 
-
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,14 +1177,16 @@ static irqreturn_t epic_interrupt(int ir
 			break;
 		handled = 1;
 
-		if (status & EpicNapiEvent) {
+		if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 			spin_lock(&ep->napi_lock);
 			if (netif_rx_schedule_prep(dev)) {
 				epic_napi_irq_off(dev, ep);
 				__netif_rx_schedule(dev);
-			}
+			} else
+				ep->reschedule_in_poll++;
 			spin_unlock(&ep->napi_lock);
 		}
+		status &= ~EpicNapiEvent;
 
 		/* Check uncommon events all at once. */
 		if (status &
@@ -1211,7 +1213,7 @@ static irqreturn_t epic_interrupt(int ir
 			/* Clear all error sources. */
 			outl(status & 0x7f18, ioaddr + INTSTAT);
 		}
-		if (status & EpicNormalEvent)
+		if (!(status & EpicNormalEvent))
 			break;
 		if (--boguscnt < 0) {
 			printk(KERN_ERR "%s: Too much work at interrupt, "
@@ -1356,7 +1358,6 @@ static int epic_poll(struct net_device *
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
 
 rx_action:
-	outl(EpicNapiEvent, ioaddr + INTSTAT);
 
 	epic_tx(dev, ep);
 
@@ -1369,18 +1370,20 @@ rx_action:
 
 	if (netif_running(dev) && (work_done < orig_budget)) {
 		unsigned long flags;
-		int status;
 
 		spin_lock_irqsave(&ep->napi_lock, flags);
-		epic_napi_irq_on(dev, ep);
-		__netif_rx_complete(dev);
-		spin_unlock_irqrestore(&ep->napi_lock, flags);
 
-		status = inl(ioaddr + INTSTAT);
-		if (status & EpicNapiEvent) {
-			epic_napi_irq_off(dev, ep);
+		if (ep->reschedule_in_poll) {
+			ep->reschedule_in_poll--;
+			spin_unlock_irqrestore(&ep->napi_lock, flags);
 			goto rx_action;
 		}
+
+		outl(EpicNapiEvent, ioaddr + INTSTAT);
+		epic_napi_irq_on(dev, ep);
+		__netif_rx_complete(dev);
+
+		spin_unlock_irqrestore(&ep->napi_lock, flags);
 	}
 
 	return (work_done >= orig_budget);

_

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2004-03-25  0:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47   ` Francois Romieu
2004-03-23 14:29   ` OGAWA Hirofumi
2004-03-23 15:14     ` Jeff Garzik
2004-03-23 16:05       ` OGAWA Hirofumi
2004-03-23 18:51     ` Francois Romieu
2004-03-23 19:59       ` OGAWA Hirofumi
2004-03-24  0:41         ` Francois Romieu
2004-03-24  2:52           ` OGAWA Hirofumi
2004-03-24 12:33             ` Francois Romieu
2004-03-25  0:27           ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51   ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-22 23:52     ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-22 23:53       ` [PATCH 3/4] " Francois Romieu
2004-03-22 23:53         ` [PATCH 4/4] " Francois Romieu
2004-03-23  0:12   ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.