* [PATCH] [RFT] 2.6.4 - epic100 napi
@ 2004-03-20 14:21 Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-20 14:21 UTC (permalink / raw)
To: netdev, Jeff Garzik
People are welcome to report how the following patch behaves on their
hardware. It does not seem too bad here but it probably is still a bit rough.
A split version of the patch will follow tomorrow. This one definitely aims
at brave and/or bored testers.
The driver lacks ethtool support. Badly. :o/
--- linux-2.6.4/drivers/net/epic100.c 2004-03-20 14:52:08.000000000 +0100
+++ linux-2.6.4/drivers/net/epic100.c 2004-03-20 14:52:13.000000000 +0100
@@ -96,9 +96,9 @@ static int rx_copybreak;
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE 16
+#define TX_RING_SIZE 256
#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
-#define RX_RING_SIZE 32
+#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -292,6 +292,10 @@ enum CommandBits {
StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
};
+#define EpicNapiEvent (TxEmpty | TxDone | \
+ RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent (0x0000ffffUL & ~EpicNapiEvent)
+
static u16 media2miictl[16] = {
0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
@@ -330,9 +334,11 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
+ spinlock_t napi_lock;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
+ u32 irq_mask;
unsigned int rx_buf_sz; /* Based on MTU+slack. */
struct pci_dev *pci_dev; /* PCI bus location. */
@@ -359,7 +365,8 @@ static void epic_timer(unsigned long dat
static void epic_tx_timeout(struct net_device *dev);
static void epic_init_ring(struct net_device *dev);
static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static struct ethtool_ops netdev_ethtool_ops;
@@ -378,7 +385,7 @@ static int __devinit epic_init_one (stru
int irq;
struct net_device *dev;
struct epic_private *ep;
- int i, option = 0, duplex = 0;
+ int i, ret, option = 0, duplex = 0;
void *ring_space;
dma_addr_t ring_dma;
@@ -392,29 +399,33 @@ static int __devinit epic_init_one (stru
card_idx++;
- i = pci_enable_device(pdev);
- if (i)
- return i;
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto out;
irq = pdev->irq;
if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_out_disable;
}
pci_set_master(pdev);
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret < 0)
+ goto err_out_disable;
+
+ ret = -ENOMEM;
+
dev = alloc_etherdev(sizeof (*ep));
if (!dev) {
printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
- return -ENOMEM;
+ goto err_out_free_res;
}
SET_MODULE_OWNER(dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- if (pci_request_regions(pdev, DRV_NAME))
- goto err_out_free_netdev;
-
#ifdef USE_IO_OPS
ioaddr = pci_resource_start (pdev, 0);
#else
@@ -422,7 +433,7 @@ static int __devinit epic_init_one (stru
ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
if (!ioaddr) {
printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
- goto err_out_free_res;
+ goto err_out_free_netdev;
}
#endif
@@ -489,6 +500,9 @@ static int __devinit epic_init_one (stru
ep->pci_dev = pdev;
ep->chip_id = chip_idx;
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+ ep->irq_mask =
+ (ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
+ | CntFull | TxUnderrun | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -543,10 +557,12 @@ static int __devinit epic_init_one (stru
dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
+ dev->poll = epic_poll;
+ dev->weight = 64;
- i = register_netdev(dev);
- if (i)
- goto err_out_unmap_tx;
+ ret = register_netdev(dev);
+ if (ret < 0)
+ goto err_out_unmap_rx;
printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +570,24 @@ static int __devinit epic_init_one (stru
printk("%2.2x:", dev->dev_addr[i]);
printk("%2.2x.\n", dev->dev_addr[i]);
- return 0;
+out:
+ return ret;
+err_out_unmap_rx:
+ pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
err_out_unmap_tx:
pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
err_out_iounmap:
#ifndef USE_IO_OPS
iounmap(ioaddr);
-err_out_free_res:
-#endif
- pci_release_regions(pdev);
err_out_free_netdev:
+#endif
free_netdev(dev);
- return -ENODEV;
+err_out_free_res:
+ pci_release_regions(pdev);
+err_out_disable:
+ pci_disable_device(pdev);
+ goto out;
}
\f
/* Serial EEPROM section. */
@@ -592,6 +613,36 @@ err_out_free_netdev:
#define EE_READ256_CMD (6 << 8)
#define EE_ERASE_CMD (7 << 6)
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(0x00000000, ioaddr + INTMASK);
+}
+
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+ inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+ __epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ /* No need to commit possible posted write */
+ outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -753,8 +804,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -795,7 +845,7 @@ static void epic_pause(struct net_device
}
/* Remove the packets on the Rx queue. */
- epic_rx(dev);
+ epic_rx(dev, RX_RING_SIZE);
}
static void epic_restart(struct net_device *dev)
@@ -842,7 +892,7 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
+ | RxError | EpicNapiEvent,
ioaddr + INTMASK);
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
" interrupt %4.4x.\n",
@@ -929,7 +979,8 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1029,6 +1080,77 @@ static int epic_start_xmit(struct sk_buf
return 0;
}
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+ int status)
+{
+ struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+ /* There was an major error, log it. */
+ if (debug > 1)
+ printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+ dev->name, status);
+#endif
+ stats->tx_errors++;
+ if (status & 0x1050)
+ stats->tx_aborted_errors++;
+ if (status & 0x0008)
+ stats->tx_carrier_errors++;
+ if (status & 0x0040)
+ stats->tx_window_errors++;
+ if (status & 0x0010)
+ stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+ unsigned int dirty_tx, cur_tx;
+
+ /*
+ * Note: if this lock becomes a problem we can narrow the locked
+ * region at the cost of occasionally grabbing the lock more times.
+ */
+ cur_tx = ep->cur_tx;
+ for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+ struct sk_buff *skb;
+ int entry = dirty_tx % TX_RING_SIZE;
+ int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+ if (txstatus & DescOwn)
+ break; /* It still hasn't been Txed */
+
+ if (likely(txstatus & 0x0001)) {
+ ep->stats.collisions += (txstatus >> 8) & 15;
+ ep->stats.tx_packets++;
+ ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+ } else
+ epic_tx_error(dev, ep, txstatus);
+
+ /* Free the original skb. */
+ skb = ep->tx_skbuff[entry];
+ pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
+ skb->len, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(skb);
+ ep->tx_skbuff[entry] = 0;
+ }
+
+#ifndef final_version
+ if (cur_tx - dirty_tx > TX_RING_SIZE) {
+ printk(KERN_WARNING
+ "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+ dev->name, dirty_tx, cur_tx, ep->tx_full);
+ dirty_tx += TX_RING_SIZE;
+ }
+#endif
+ ep->dirty_tx = dirty_tx;
+ if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+ /* The ring is no longer full, allow new TX entries. */
+ ep->tx_full = 0;
+ netif_wake_queue(dev);
+ }
+}
+
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1042,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
do {
status = inl(ioaddr + INTSTAT);
/* Acknowledge all of the current interrupt sources ASAP. */
- outl(status & 0x00007fff, ioaddr + INTSTAT);
+ outl(status & EpicNormalEvent, ioaddr + INTSTAT);
if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1053,73 +1175,18 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
- epic_rx(dev);
-
- if (status & (TxEmpty | TxDone)) {
- unsigned int dirty_tx, cur_tx;
-
- /* Note: if this lock becomes a problem we can narrow the locked
- region at the cost of occasionally grabbing the lock more
- times. */
- spin_lock(&ep->lock);
- cur_tx = ep->cur_tx;
- dirty_tx = ep->dirty_tx;
- for (; cur_tx - dirty_tx > 0; dirty_tx++) {
- struct sk_buff *skb;
- int entry = dirty_tx % TX_RING_SIZE;
- int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
- if (txstatus & DescOwn)
- break; /* It still hasn't been Txed */
-
- if ( ! (txstatus & 0x0001)) {
- /* There was an major error, log it. */
-#ifndef final_version
- if (debug > 1)
- printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
- dev->name, txstatus);
-#endif
- ep->stats.tx_errors++;
- if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
- if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
- if (txstatus & 0x0040) ep->stats.tx_window_errors++;
- if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
- } else {
- ep->stats.collisions += (txstatus >> 8) & 15;
- ep->stats.tx_packets++;
- ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
- }
-
- /* Free the original skb. */
- skb = ep->tx_skbuff[entry];
- pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
- skb->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
- ep->tx_skbuff[entry] = 0;
+ if (status & EpicNapiEvent) {
+ spin_lock(&ep->napi_lock);
+ if (netif_rx_schedule_prep(dev)) {
+ epic_napi_irq_off(dev, ep);
+ __netif_rx_schedule(dev);
}
-
-#ifndef final_version
- if (cur_tx - dirty_tx > TX_RING_SIZE) {
- printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
- dev->name, dirty_tx, cur_tx, ep->tx_full);
- dirty_tx += TX_RING_SIZE;
- }
-#endif
- ep->dirty_tx = dirty_tx;
- if (ep->tx_full
- && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
- /* The ring is no longer full, allow new TX entries. */
- ep->tx_full = 0;
- spin_unlock(&ep->lock);
- netif_wake_queue(dev);
- } else
- spin_unlock(&ep->lock);
+ spin_unlock(&ep->napi_lock);
}
/* Check uncommon events all at once. */
- if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
- PCIBusErr170 | PCIBusErr175)) {
+ if (status &
+ (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
break;
/* Always update the error counts to avoid overhead later. */
@@ -1133,11 +1200,6 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
- ep->stats.rx_errors++;
- }
- if (status & (RxOverflow | RxFull))
- outw(RxQueued, ioaddr + COMMAND);
if (status & PCIBusErr170) {
printk(KERN_ERR "%s: PCI Bus Error! EPIC status %4.4x.\n",
dev->name, status);
@@ -1147,6 +1209,8 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
+ if (!(status & EpicNormalEvent))
+ break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
"IntrStatus=0x%8.8x.\n",
@@ -1164,7 +1228,7 @@ static irqreturn_t epic_interrupt(int ir
return IRQ_RETVAL(handled);
}
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
{
struct epic_private *ep = dev->priv;
int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1174,6 +1238,10 @@ static int epic_rx(struct net_device *de
if (debug > 4)
printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
ep->rx_ring[entry].rxstatus);
+
+ if (rx_work_limit > budget)
+ rx_work_limit = budget;
+
/* If we own the next entry, it's a new packet. Send it up. */
while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1228,7 +1296,7 @@ static int epic_rx(struct net_device *de
ep->rx_skbuff[entry] = NULL;
}
skb->protocol = eth_type_trans(skb, dev);
- netif_rx(skb);
+ netif_receive_skb(skb);
dev->last_rx = jiffies;
ep->stats.rx_packets++;
ep->stats.rx_bytes += pkt_len;
@@ -1256,6 +1324,60 @@ static int epic_rx(struct net_device *de
return work_done;
}
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+ int status;
+
+ status = inl(ioaddr + INTSTAT);
+
+ if (status == 0xffffffff)
+ return;
+ if (status & RxOverflow) /* Missed a Rx frame. */
+ ep->stats.rx_errors++;
+ if (status & (RxOverflow | RxFull))
+ outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+ struct epic_private *ep = dev->priv;
+ int work_done, orig_budget;
+ long ioaddr = dev->base_addr;
+
+ epic_tx(dev, ep);
+
+ orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+ work_done = epic_rx(dev, *budget);
+
+ epic_rx_err(dev, ep);
+
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done < orig_budget) {
+ unsigned long flags;
+ int status;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+ }
+ }
+
+ return (work_done >= orig_budget);
+}
+
static int epic_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
@@ -1270,9 +1392,13 @@ static int epic_close(struct net_device
dev->name, (int)inl(ioaddr + INTSTAT));
del_timer_sync(&ep->timer);
- epic_pause(dev);
+
+ epic_disable_int(dev, ep);
+
free_irq(dev->irq, dev);
+ epic_pause(dev);
+
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i < RX_RING_SIZE; i++) {
skb = ep->rx_skbuff[i];
@@ -1470,6 +1596,7 @@ static void __devexit epic_remove_one (s
#endif
pci_release_regions(pdev);
free_netdev(dev);
+ pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
/* pci_power_off(pdev, -1); */
}
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
@ 2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
1 sibling, 2 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-21 18:24 UTC (permalink / raw)
To: Francois Romieu; +Cc: netdev
Francois Romieu wrote:
> People are welcome to report how the following patch behaves on their
> hardware. It does not seem too bad here but it probably is still a bit rough.
> A split version of the patch will follow tomorrow. This one definitely aims
> at brave and/or bored testers.
Looks pretty good, but includes a standard NAPI race...
When you split up the patches, I'll throw it into my -netdev tree, which
means it will be automatically included in -mm for testing (as is r8169
now).
FWIW Andrew Morton has made me lazy... I don't bother publishing
separate -netdev patches anymore, since he automatically downloads my
netdev-2.6 BK tree before doing each -mm release.
> + if (work_done < orig_budget) {
> + unsigned long flags;
> + int status;
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> + epic_napi_irq_on(dev, ep);
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
> + }
Need to add a netif_running() check to the 'if' test at the top of the
quote.
Are you (or somebody else?) interested in reviewing all the in-tree NAPI
drivers, and seeing if other drivers have this bug? I think 8139cp.c
does at least, maybe e100 too... Such a fix would need to go into 2.4.x
as well.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-21 23:47 UTC (permalink / raw)
To: Jeff Garzik; +Cc: netdev
Jeff Garzik <jgarzik@pobox.com> :
[...]
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
>
> Are you (or somebody else?) interested in reviewing all the in-tree NAPI
> drivers, and seeing if other drivers have this bug? I think 8139cp.c
> does at least, maybe e100 too... Such a fix would need to go into 2.4.x
> as well.
Ok, I'll check for the race against dev_close.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 0/4] 2.6.5-rc2 - epic100 update
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-22 23:50 ` Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
1 sibling, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:50 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
Schedule:
- epic100-fixup.patch: opportunistic cleanup
- epic100-napi-00.patch: code shuffling before the move
- epic100-napi-10.patch: rx napi (includes netif_running change)
- epic100-napi-20.patch: minimalistic tx napi
2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
so the patches should apply equally to both.
The driver has been moderately tested.
Feedback welcome.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/4] 2.6.5-rc2 - epic100 fixup
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
@ 2004-03-22 23:51 ` Francois Romieu
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:51 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
- extra pci_disable_device() to balance invocation of pci_enable_device()
in epic_init_one() (-> error path + epic_remove_one());
- lazy return status in epic_init_one(), tsss...;
- memory dedicated to Rx descriptors was not freed after failure of
register_netdev() in epic_init_one();
- use of epic_pause() in epic_close() offers a small window for a late
interruption just before the final free_irq(). Let's close the window to
avoid two epic_rx() threads racing with each other.
drivers/net/epic100.c | 59 +++++++++++++++++++++++++++++++++-----------------
1 files changed, 40 insertions(+), 19 deletions(-)
diff -puN drivers/net/epic100.c~epic100-fixup drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-fixup 2004-03-22 22:53:16.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-22 22:53:16.000000000 +0100
@@ -378,7 +378,7 @@ static int __devinit epic_init_one (stru
int irq;
struct net_device *dev;
struct epic_private *ep;
- int i, option = 0, duplex = 0;
+ int i, ret, option = 0, duplex = 0;
void *ring_space;
dma_addr_t ring_dma;
@@ -392,29 +392,33 @@ static int __devinit epic_init_one (stru
card_idx++;
- i = pci_enable_device(pdev);
- if (i)
- return i;
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto out;
irq = pdev->irq;
if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_out_disable;
}
pci_set_master(pdev);
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret < 0)
+ goto err_out_disable;
+
+ ret = -ENOMEM;
+
dev = alloc_etherdev(sizeof (*ep));
if (!dev) {
printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
- return -ENOMEM;
+ goto err_out_free_res;
}
SET_MODULE_OWNER(dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- if (pci_request_regions(pdev, DRV_NAME))
- goto err_out_free_netdev;
-
#ifdef USE_IO_OPS
ioaddr = pci_resource_start (pdev, 0);
#else
@@ -422,7 +426,7 @@ static int __devinit epic_init_one (stru
ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
if (!ioaddr) {
printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
- goto err_out_free_res;
+ goto err_out_free_netdev;
}
#endif
@@ -544,9 +548,9 @@ static int __devinit epic_init_one (stru
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
- i = register_netdev(dev);
- if (i)
- goto err_out_unmap_tx;
+ ret = register_netdev(dev);
+ if (ret < 0)
+ goto err_out_unmap_rx;
printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +558,24 @@ static int __devinit epic_init_one (stru
printk("%2.2x:", dev->dev_addr[i]);
printk("%2.2x.\n", dev->dev_addr[i]);
- return 0;
+out:
+ return ret;
+err_out_unmap_rx:
+ pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
err_out_unmap_tx:
pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
err_out_iounmap:
#ifndef USE_IO_OPS
iounmap(ioaddr);
-err_out_free_res:
-#endif
- pci_release_regions(pdev);
err_out_free_netdev:
+#endif
free_netdev(dev);
- return -ENODEV;
+err_out_free_res:
+ pci_release_regions(pdev);
+err_out_disable:
+ pci_disable_device(pdev);
+ goto out;
}
\f
/* Serial EEPROM section. */
@@ -592,6 +601,13 @@ err_out_free_netdev:
#define EE_READ256_CMD (6 << 8)
#define EE_ERASE_CMD (7 << 6)
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(0x00000000, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -1276,9 +1292,13 @@ static int epic_close(struct net_device
dev->name, (int)inl(ioaddr + INTSTAT));
del_timer_sync(&ep->timer);
- epic_pause(dev);
+
+ epic_disable_int(dev, ep);
+
free_irq(dev->irq, dev);
+ epic_pause(dev);
+
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i < RX_RING_SIZE; i++) {
skb = ep->rx_skbuff[i];
@@ -1476,6 +1496,7 @@ static void __devexit epic_remove_one (s
#endif
pci_release_regions(pdev);
free_netdev(dev);
+ pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
/* pci_power_off(pdev, -1); */
}
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 2/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-22 23:52 ` Francois Romieu
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:52 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
Isolate the classical TX part of epic_interrupt. Innocent code shuffling.
drivers/net/epic100.c | 137 +++++++++++++++++++++++++++-----------------------
1 files changed, 76 insertions(+), 61 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-00 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-00 2004-03-22 22:53:18.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-22 22:53:18.000000000 +0100
@@ -1045,6 +1045,79 @@ static int epic_start_xmit(struct sk_buf
return 0;
}
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+ int status)
+{
+ struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+ /* There was an major error, log it. */
+ if (debug > 1)
+ printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+ dev->name, status);
+#endif
+ stats->tx_errors++;
+ if (status & 0x1050)
+ stats->tx_aborted_errors++;
+ if (status & 0x0008)
+ stats->tx_carrier_errors++;
+ if (status & 0x0040)
+ stats->tx_window_errors++;
+ if (status & 0x0010)
+ stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+ unsigned int dirty_tx, cur_tx;
+
+ /*
+ * Note: if this lock becomes a problem we can narrow the locked
+ * region at the cost of occasionally grabbing the lock more times.
+ */
+ spin_lock(&ep->lock);
+ cur_tx = ep->cur_tx;
+ for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+ struct sk_buff *skb;
+ int entry = dirty_tx % TX_RING_SIZE;
+ int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+ if (txstatus & DescOwn)
+ break; /* It still hasn't been Txed */
+
+ if (likely(txstatus & 0x0001)) {
+ ep->stats.collisions += (txstatus >> 8) & 15;
+ ep->stats.tx_packets++;
+ ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+ } else
+ epic_tx_error(dev, ep, txstatus);
+
+ /* Free the original skb. */
+ skb = ep->tx_skbuff[entry];
+ pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
+ skb->len, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(skb);
+ ep->tx_skbuff[entry] = 0;
+ }
+
+#ifndef final_version
+ if (cur_tx - dirty_tx > TX_RING_SIZE) {
+ printk(KERN_WARNING
+ "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+ dev->name, dirty_tx, cur_tx, ep->tx_full);
+ dirty_tx += TX_RING_SIZE;
+ }
+#endif
+ ep->dirty_tx = dirty_tx;
+ if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+ /* The ring is no longer full, allow new TX entries. */
+ ep->tx_full = 0;
+ netif_wake_queue(dev);
+ }
+ spin_unlock(&ep->lock);
+}
+
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1072,66 +1145,8 @@ static irqreturn_t epic_interrupt(int ir
if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
epic_rx(dev);
- if (status & (TxEmpty | TxDone)) {
- unsigned int dirty_tx, cur_tx;
-
- /* Note: if this lock becomes a problem we can narrow the locked
- region at the cost of occasionally grabbing the lock more
- times. */
- spin_lock(&ep->lock);
- cur_tx = ep->cur_tx;
- dirty_tx = ep->dirty_tx;
- for (; cur_tx - dirty_tx > 0; dirty_tx++) {
- struct sk_buff *skb;
- int entry = dirty_tx % TX_RING_SIZE;
- int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
- if (txstatus & DescOwn)
- break; /* It still hasn't been Txed */
-
- if ( ! (txstatus & 0x0001)) {
- /* There was an major error, log it. */
-#ifndef final_version
- if (debug > 1)
- printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
- dev->name, txstatus);
-#endif
- ep->stats.tx_errors++;
- if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
- if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
- if (txstatus & 0x0040) ep->stats.tx_window_errors++;
- if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
- } else {
- ep->stats.collisions += (txstatus >> 8) & 15;
- ep->stats.tx_packets++;
- ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
- }
-
- /* Free the original skb. */
- skb = ep->tx_skbuff[entry];
- pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
- skb->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
- ep->tx_skbuff[entry] = 0;
- }
-
-#ifndef final_version
- if (cur_tx - dirty_tx > TX_RING_SIZE) {
- printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
- dev->name, dirty_tx, cur_tx, ep->tx_full);
- dirty_tx += TX_RING_SIZE;
- }
-#endif
- ep->dirty_tx = dirty_tx;
- if (ep->tx_full
- && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
- /* The ring is no longer full, allow new TX entries. */
- ep->tx_full = 0;
- spin_unlock(&ep->lock);
- netif_wake_queue(dev);
- } else
- spin_unlock(&ep->lock);
- }
+ if (status & (TxEmpty | TxDone))
+ epic_tx(dev, ep);
/* Check uncommon events all at once. */
if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
@@ -1149,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
+ if (status & RxOverflow) { /* Missed a Rx frame. */
ep->stats.rx_errors++;
}
if (status & (RxOverflow | RxFull))
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
@ 2004-03-22 23:53 ` Francois Romieu
2004-03-22 23:53 ` [PATCH 4/4] " Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
RX NAPI.
drivers/net/epic100.c | 137 ++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 116 insertions(+), 21 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-10 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-10 2004-03-22 22:53:19.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-23 00:18:33.000000000 +0100
@@ -98,7 +98,7 @@ static int rx_copybreak;
There are no ill effects from too-large receive rings. */
#define TX_RING_SIZE 16
#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
-#define RX_RING_SIZE 32
+#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -292,6 +292,11 @@ enum CommandBits {
StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
};
+#define EpicRemoved 0xffffffff /* Chip failed or removed (CardBus) */
+
+#define EpicNapiEvent (RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent)
+
static u16 media2miictl[16] = {
0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
@@ -330,9 +335,11 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
+ spinlock_t napi_lock;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
+ u32 irq_mask;
unsigned int rx_buf_sz; /* Based on MTU+slack. */
struct pci_dev *pci_dev; /* PCI bus location. */
@@ -359,7 +366,8 @@ static void epic_timer(unsigned long dat
static void epic_tx_timeout(struct net_device *dev);
static void epic_init_ring(struct net_device *dev);
static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static struct ethtool_ops netdev_ethtool_ops;
@@ -493,6 +501,9 @@ static int __devinit epic_init_one (stru
ep->pci_dev = pdev;
ep->chip_id = chip_idx;
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+ ep->irq_mask =
+ (ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
+ | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -547,6 +558,8 @@ static int __devinit epic_init_one (stru
dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
+ dev->poll = epic_poll;
+ dev->weight = 64;
ret = register_netdev(dev);
if (ret < 0)
@@ -608,6 +621,29 @@ static void epic_disable_int(struct net_
outl(0x00000000, ioaddr + INTMASK);
}
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+ inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+ __epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ /* No need to commit possible posted write */
+ outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -769,8 +805,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -811,7 +846,7 @@ static void epic_pause(struct net_device
}
/* Remove the packets on the Rx queue. */
- epic_rx(dev);
+ epic_rx(dev, RX_RING_SIZE);
}
static void epic_restart(struct net_device *dev)
@@ -858,8 +893,8 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
+
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
" interrupt %4.4x.\n",
dev->name, (int)inl(ioaddr + COMMAND), (int)inl(ioaddr + GENCTL),
@@ -945,7 +980,8 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1131,7 +1167,7 @@ static irqreturn_t epic_interrupt(int ir
do {
status = inl(ioaddr + INTSTAT);
/* Acknowledge all of the current interrupt sources ASAP. */
- outl(status & 0x00007fff, ioaddr + INTSTAT);
+ outl(status & EpicNormalEvent, ioaddr + INTSTAT);
if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1142,16 +1178,22 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
- epic_rx(dev);
+ if (status & EpicNapiEvent) {
+ spin_lock(&ep->napi_lock);
+ if (netif_rx_schedule_prep(dev)) {
+ epic_napi_irq_off(dev, ep);
+ __netif_rx_schedule(dev);
+ }
+ spin_unlock(&ep->napi_lock);
+ }
if (status & (TxEmpty | TxDone))
epic_tx(dev, ep);
/* Check uncommon events all at once. */
- if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
- PCIBusErr170 | PCIBusErr175)) {
- if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
+ if (status &
+ (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
+ if (status == EpicRemoved)
break;
/* Always update the error counts to avoid overhead later. */
ep->stats.rx_missed_errors += inb(ioaddr + MPCNT);
@@ -1164,11 +1206,6 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
- ep->stats.rx_errors++;
- }
- if (status & (RxOverflow | RxFull))
- outw(RxQueued, ioaddr + COMMAND);
if (status & PCIBusErr170) {
printk(KERN_ERR "%s: PCI Bus Error! EPIC status %4.4x.\n",
dev->name, status);
@@ -1178,6 +1215,8 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
+ if (status & EpicNormalEvent)
+ break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
"IntrStatus=0x%8.8x.\n",
@@ -1195,7 +1234,7 @@ static irqreturn_t epic_interrupt(int ir
return IRQ_RETVAL(handled);
}
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
{
struct epic_private *ep = dev->priv;
int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1205,6 +1244,10 @@ static int epic_rx(struct net_device *de
if (debug > 4)
printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
ep->rx_ring[entry].rxstatus);
+
+ if (rx_work_limit > budget)
+ rx_work_limit = budget;
+
/* If we own the next entry, it's a new packet. Send it up. */
while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1265,7 +1308,7 @@ static int epic_rx(struct net_device *de
ep->rx_skbuff[entry] = NULL;
}
skb->protocol = eth_type_trans(skb, dev);
- netif_rx(skb);
+ netif_receive_skb(skb);
dev->last_rx = jiffies;
ep->stats.rx_packets++;
ep->stats.rx_bytes += pkt_len;
@@ -1293,6 +1336,58 @@ static int epic_rx(struct net_device *de
return work_done;
}
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+ int status;
+
+ status = inl(ioaddr + INTSTAT);
+
+ if (status == EpicRemoved)
+ return;
+ if (status & RxOverflow) /* Missed a Rx frame. */
+ ep->stats.rx_errors++;
+ if (status & (RxOverflow | RxFull))
+ outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+ struct epic_private *ep = dev->priv;
+ int work_done, orig_budget;
+ long ioaddr = dev->base_addr;
+
+ orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+ work_done = epic_rx(dev, *budget);
+
+ epic_rx_err(dev, ep);
+
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (netif_running(dev) && (work_done < orig_budget)) {
+ unsigned long flags;
+ int status;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+ }
+ }
+
+ return (work_done >= orig_budget);
+}
+
static int epic_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 4/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
@ 2004-03-22 23:53 ` Francois Romieu
0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
TX NAPI.
drivers/net/epic100.c | 20 +++++++++-----------
1 files changed, 9 insertions(+), 11 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-20 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-20 2004-03-23 00:18:40.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-23 00:18:40.000000000 +0100
@@ -96,8 +96,8 @@ static int rx_copybreak;
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE 16
-#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
+#define TX_RING_SIZE 256
+#define TX_QUEUE_LEN 240 /* Limit ring entries actually used. */
#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -294,7 +294,8 @@ enum CommandBits {
#define EpicRemoved 0xffffffff /* Chip failed or removed (CardBus) */
-#define EpicNapiEvent (RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNapiEvent (TxEmpty | TxDone | \
+ RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
#define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent)
static u16 media2miictl[16] = {
@@ -503,7 +504,7 @@ static int __devinit epic_init_one (stru
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
ep->irq_mask =
(ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
+ | CntFull | TxUnderrun | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -804,7 +805,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty
+ | CntFull | TxUnderrun
| RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
@@ -892,7 +893,7 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty
+ | CntFull | TxUnderrun
| RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
@@ -1111,7 +1112,6 @@ static void epic_tx(struct net_device *d
* Note: if this lock becomes a problem we can narrow the locked
* region at the cost of occasionally grabbing the lock more times.
*/
- spin_lock(&ep->lock);
cur_tx = ep->cur_tx;
for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
struct sk_buff *skb;
@@ -1150,7 +1150,6 @@ static void epic_tx(struct net_device *d
ep->tx_full = 0;
netif_wake_queue(dev);
}
- spin_unlock(&ep->lock);
}
@@ -1187,9 +1186,6 @@ static irqreturn_t epic_interrupt(int ir
spin_unlock(&ep->napi_lock);
}
- if (status & (TxEmpty | TxDone))
- epic_tx(dev, ep);
-
/* Check uncommon events all at once. */
if (status &
(CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
@@ -1362,6 +1358,8 @@ static int epic_poll(struct net_device *
rx_action:
outl(EpicNapiEvent, ioaddr + INTSTAT);
+ epic_tx(dev, ep);
+
work_done = epic_rx(dev, *budget);
epic_rx_err(dev, ep);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/4] 2.6.5-rc2 - epic100 update
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-23 0:12 ` Jeff Garzik
1 sibling, 0 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 0:12 UTC (permalink / raw)
To: Francois Romieu; +Cc: netdev, Andrew Morton
Francois Romieu wrote:
> Schedule:
>
> - epic100-fixup.patch: opportunistic cleanup
> - epic100-napi-00.patch: code shuffling before the move
> - epic100-napi-10.patch: rx napi (includes netif_running change)
> - epic100-napi-20.patch: minimalistic tx napi
>
> 2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
> so the patches should apply equally to both.
>
> The driver has been moderately tested.
Thanks.
Applied to my netdev-2.6 queue, and so it should automatically appear in
Andrew's -mm tree soon.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
@ 2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
2004-03-23 18:51 ` Francois Romieu
1 sibling, 2 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 14:29 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Francois Romieu, netdev
Jeff Garzik <jgarzik@pobox.com> writes:
> > + if (work_done < orig_budget) {
> > + unsigned long flags;
> > + int status;
> > +
> > + spin_lock_irqsave(&ep->napi_lock, flags);
> > + epic_napi_irq_on(dev, ep);
> > + __netif_rx_complete(dev);
> > + spin_unlock_irqrestore(&ep->napi_lock, flags);
> > +
> > + status = inl(ioaddr + INTSTAT);
> > + if (status & EpicNapiEvent) {
> > + epic_napi_irq_off(dev, ep);
> > + goto rx_action;
> > + }
>
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
>
> Are you (or somebody else?) interested in reviewing all the in-tree
> NAPI drivers, and seeing if other drivers have this bug? I think
> 8139cp.c does at least, maybe e100 too... Such a fix would need to go
> into 2.4.x as well.
Umm.. the above code is part of ->poll(). I think xxx_interrut() need
netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
flag...
BTW, ->napi_lock is unneeded because netif_schedule() is already
atomic, it need only local_irq_enable/disable().
After __netif_rx_complete() must not do "goto rx_action", otherwise it
may become cause of twice scheduleing, it should move before spin_lock().
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 14:29 ` OGAWA Hirofumi
@ 2004-03-23 15:14 ` Jeff Garzik
2004-03-23 16:05 ` OGAWA Hirofumi
2004-03-23 18:51 ` Francois Romieu
1 sibling, 1 reply; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 15:14 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Francois Romieu, netdev
OGAWA Hirofumi wrote:
> Jeff Garzik <jgarzik@pobox.com> writes:
>
>
>>>+ if (work_done < orig_budget) {
>>>+ unsigned long flags;
>>>+ int status;
>>>+
>>>+ spin_lock_irqsave(&ep->napi_lock, flags);
>>>+ epic_napi_irq_on(dev, ep);
>>>+ __netif_rx_complete(dev);
>>>+ spin_unlock_irqrestore(&ep->napi_lock, flags);
>>>+
>>>+ status = inl(ioaddr + INTSTAT);
>>>+ if (status & EpicNapiEvent) {
>>>+ epic_napi_irq_off(dev, ep);
>>>+ goto rx_action;
>>>+ }
>>
>>Need to add a netif_running() check to the 'if' test at the top of the
>>quote.
>>
>>Are you (or somebody else?) interested in reviewing all the in-tree
>>NAPI drivers, and seeing if other drivers have this bug? I think
>>8139cp.c does at least, maybe e100 too... Such a fix would need to go
>>into 2.4.x as well.
>
>
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
Most interrupt routines already test this, look at
static inline int netif_rx_schedule_prep(struct net_device *dev)
{
return netif_running(dev) &&
!test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
}
It shouldn't schedule unless the interface is running.
However... I believe it was you that added this check to 8139cp.c:
/* close possible race's with dev_close */
if (unlikely(!netif_running(dev))) {
cpw16(IntrMask, 0);
goto out;
}
I like this, because regardless of NAPI, most drivers have
non-NAPI-related interrupts they must still process. This check handles
that.
Although this code is a bit redundant to some of the locking and
synchronization found in net driver dev->close() methods, I think it is
a nice thing to do.
I do wonder about the consequences, on some hardware, about receiving an
interrupt and -not- processing the RX or TX completions associated with
that. For most NIC hardware, you'll get sane behavior, but not all, I
bet...
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
>
> After __netif_rx_complete() must not do "goto rx_action", otherwise it
Agreed.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 15:14 ` Jeff Garzik
@ 2004-03-23 16:05 ` OGAWA Hirofumi
0 siblings, 0 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 16:05 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Francois Romieu, netdev
Jeff Garzik <jgarzik@pobox.com> writes:
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
>
> Most interrupt routines already test this, look at
>
> static inline int netif_rx_schedule_prep(struct net_device *dev)
> {
> return netif_running(dev) &&
> !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
> }
>
> It shouldn't schedule unless the interface is running.
Yes.
> However... I believe it was you that added this check to 8139cp.c:
>
> /* close possible race's with dev_close */
> if (unlikely(!netif_running(dev))) {
> cpw16(IntrMask, 0);
> goto out;
> }
Yes, I added. And my suggestion was about this.
Because in case of 8139too, I got too many interrupts about pending RX
during the following, and the following wasn't finished.
(dev->close() wasn't called).
dev_close(),
clear_bit(__LINK_STATE_START, &dev->state);
smp_mb__after_clear_bit(); /* Commit netif_running(). */
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
/* No hurry. */
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(1);
}
> I do wonder about the consequences, on some hardware, about receiving
> an interrupt and -not- processing the RX or TX completions associated
> with that. For most NIC hardware, you'll get sane behavior, but not
> all, I bet...
Is this meaning should _not_ receive the interrupt about pending RX/TX?
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
@ 2004-03-23 18:51 ` Francois Romieu
2004-03-23 19:59 ` OGAWA Hirofumi
1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-23 18:51 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
>
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
Color me confused. The lock is supposed to protect against:
CPU1 CPU2
[poll]
epic_napi_irq_on(dev, ep);
[irq handler]
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
}
__netif_rx_complete(dev);
-> napi irq are disabled and device is removed from poll list. What will
prevent it ?
> After __netif_rx_complete() must not do "goto rx_action", otherwise it
> may become cause of twice scheduleing, it should move before spin_lock().
understand the previous statement as:
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
Can you reformulate as I feel I still did not get it right.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 18:51 ` Francois Romieu
@ 2004-03-23 19:59 ` OGAWA Hirofumi
2004-03-24 0:41 ` Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 19:59 UTC (permalink / raw)
To: Francois Romieu; +Cc: Jeff Garzik, netdev
Francois Romieu <romieu@fr.zoreil.com> writes:
> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
> [...]
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> >
> > BTW, ->napi_lock is unneeded because netif_schedule() is already
> > atomic, it need only local_irq_enable/disable().
>
> Color me confused. The lock is supposed to protect against:
>
> CPU1 CPU2
> [poll]
> epic_napi_irq_on(dev, ep);
> [irq handler]
> if (netif_rx_schedule_prep(dev)) {
> epic_napi_irq_off(dev, ep);
> __netif_rx_schedule(dev);
> }
> __netif_rx_complete(dev);
>
> -> napi irq are disabled and device is removed from poll list. What will
> prevent it ?
__LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
So netif_rx_schedule_prep() returns 0.
> > After __netif_rx_complete() must not do "goto rx_action", otherwise it
> > may become cause of twice scheduleing, it should move before spin_lock().
>
> understand the previous statement as:
>
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> + epic_napi_irq_on(dev, ep);
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
>
> Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
Yes, maybe. But, if after spin_lock, it loop may call the twice
__netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
It will leaks the dev->refcnt, I think.
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
-- interrupt and call __netif_rx_schedule() --
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 19:59 ` OGAWA Hirofumi
@ 2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 0:41 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> > -> napi irq are disabled and device is removed from poll list. What will
> > prevent it ?
>
> __LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
> So netif_rx_schedule_prep() returns 0.
Ok, thanks for the explanation. It is possible that the lock stays anyway (see
below).
[...]
> > Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
>
> Yes, maybe. But, if after spin_lock, it loop may call the twice
> __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> It will leaks the dev->refcnt, I think.
@$*#!zW
The following patch should avoid the leak as well as the packet rot
(untested, 1:33 AM, apply on top of previous serie).
Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.
drivers/net/epic100.c | 26 +++++++++++++-------------
1 files changed, 13 insertions(+), 13 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-24 01:19:35.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
spinlock_t napi_lock;
+ unsigned int reschedule_in_poll;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
dev->base_addr = ioaddr;
dev->irq = irq;
- spin_lock_init (&ep->lock);
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
+ ep->reschedule_in_poll = 0;
/* Bring the chip out of low-power mode. */
outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- spin_lock_init(&ep->lock);
- spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
}
}
-
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,12 +1177,13 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & EpicNapiEvent) {
+ if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
spin_lock(&ep->napi_lock);
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
- }
+ } else
+ ep->reschedule_in_poll++;
spin_unlock(&ep->napi_lock);
}
@@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
-rx_action:
outl(EpicNapiEvent, ioaddr + INTSTAT);
epic_tx(dev, ep);
@@ -1369,18 +1369,18 @@ rx_action:
if (netif_running(dev) && (work_done < orig_budget)) {
unsigned long flags;
- int status;
- spin_lock_irqsave(&ep->napi_lock, flags);
epic_napi_irq_on(dev, ep);
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
__netif_rx_complete(dev);
- spin_unlock_irqrestore(&ep->napi_lock, flags);
- status = inl(ioaddr + INTSTAT);
- if (status & EpicNapiEvent) {
+ if (ep->reschedule_in_poll) {
epic_napi_irq_off(dev, ep);
- goto rx_action;
+ __netif_rx_schedule(dev);
+ ep->reschedule_in_poll--;
}
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
}
return (work_done >= orig_budget);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-24 0:41 ` Francois Romieu
@ 2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-24 12:33 ` Francois Romieu
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
1 sibling, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-24 2:52 UTC (permalink / raw)
To: Francois Romieu; +Cc: Jeff Garzik, netdev
Francois Romieu <romieu@fr.zoreil.com> writes:
> > Yes, maybe. But, if after spin_lock, it loop may call the twice
> > __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> > It will leaks the dev->refcnt, I think.
>
> @$*#!zW
>
> The following patch should avoid the leak as well as the packet rot
> (untested, 1:33 AM, apply on top of previous serie).
>
>
> Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
> epic_poll loops over __netif_rx_complete() leads to serious device
> refcount leak.
Do you care the lost interrupt? If so, I was miss reading it.
IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
signal until the driver clears the pending interrupt. No?
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-24 2:52 ` OGAWA Hirofumi
@ 2004-03-24 12:33 ` Francois Romieu
0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 12:33 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Do you care the lost interrupt? If so, I was miss reading it.
>
> IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
> signal until the driver clears the pending interrupt. No?
<insert usual "correct me if I'm wrong" disclaimer here>
The driver only masks the interruptions which are napi related so
epic_poll() and epic_interrupt() are always racing. I completely agree that
it "wastes" the nice behavior of level-triggered irq. If one wants to avoid
the race, everything should go to the poll() handler. It implies polled
access to the INTSTAT register (as done in epic_rx_err).
I could not find in the archives some general napi-wisdom which suggests
that everything has to go to the poll() handler, be it for simplicity or
for a real gain. So I hesitate to follow the other way and exchange the
polled access for some (locked) traffic between the poll() and the irq
handler.
Comments welcome.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH] 2.6.5-rc2 - more epic100 napi
2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
@ 2004-03-25 0:27 ` Francois Romieu
1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-25 0:27 UTC (permalink / raw)
To: Jeff Garzik; +Cc: OGAWA Hirofumi, netdev
Francois Romieu <romieu@fr.zoreil.com> :
[...]
> diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
> --- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
> +++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-24 01:19:35.000000000 +0100
> @@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
>
> orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
>
> -rx_action:
> outl(EpicNapiEvent, ioaddr + INTSTAT);
>
> epic_tx(dev, ep);
> @@ -1369,18 +1369,18 @@ rx_action:
>
> if (netif_running(dev) && (work_done < orig_budget)) {
> unsigned long flags;
> - int status;
>
> - spin_lock_irqsave(&ep->napi_lock, flags);
> epic_napi_irq_on(dev, ep);
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> __netif_rx_complete(dev);
> - spin_unlock_irqrestore(&ep->napi_lock, flags);
>
> - status = inl(ioaddr + INTSTAT);
> - if (status & EpicNapiEvent) {
> + if (ep->reschedule_in_poll) {
> epic_napi_irq_off(dev, ep);
> - goto rx_action;
> + __netif_rx_schedule(dev);
^^^^^^^^^^^^^^^^^^^^^^^^^
While in poll() handler, brilliant :o(
Please apply (tested) patch below on top of the acked patches (i.e 1/4...4/4):
- issuing commands via the serial console under an incoming stream of 40k
short sized pps sucks but it is possible;
- does not leak refcount (it rmmods fine).
Next step: identify the best performer amongst the previously discussed changes.
Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.
drivers/net/epic100.c | 33 ++++++++++++++++++---------------
1 files changed, 18 insertions(+), 15 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-25 00:51:30.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
spinlock_t napi_lock;
+ unsigned int reschedule_in_poll;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
dev->base_addr = ioaddr;
dev->irq = irq;
- spin_lock_init (&ep->lock);
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
+ ep->reschedule_in_poll = 0;
/* Bring the chip out of low-power mode. */
outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- spin_lock_init(&ep->lock);
- spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
}
}
-
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,14 +1177,16 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & EpicNapiEvent) {
+ if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
spin_lock(&ep->napi_lock);
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
- }
+ } else
+ ep->reschedule_in_poll++;
spin_unlock(&ep->napi_lock);
}
+ status &= ~EpicNapiEvent;
/* Check uncommon events all at once. */
if (status &
@@ -1211,7 +1213,7 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
- if (status & EpicNormalEvent)
+ if (!(status & EpicNormalEvent))
break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
@@ -1356,7 +1358,6 @@ static int epic_poll(struct net_device *
orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
rx_action:
- outl(EpicNapiEvent, ioaddr + INTSTAT);
epic_tx(dev, ep);
@@ -1369,18 +1370,20 @@ rx_action:
if (netif_running(dev) && (work_done < orig_budget)) {
unsigned long flags;
- int status;
spin_lock_irqsave(&ep->napi_lock, flags);
- epic_napi_irq_on(dev, ep);
- __netif_rx_complete(dev);
- spin_unlock_irqrestore(&ep->napi_lock, flags);
- status = inl(ioaddr + INTSTAT);
- if (status & EpicNapiEvent) {
- epic_napi_irq_off(dev, ep);
+ if (ep->reschedule_in_poll) {
+ ep->reschedule_in_poll--;
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
goto rx_action;
}
+
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
}
return (work_done >= orig_budget);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2004-03-25 0:27 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
2004-03-23 16:05 ` OGAWA Hirofumi
2004-03-23 18:51 ` Francois Romieu
2004-03-23 19:59 ` OGAWA Hirofumi
2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-24 12:33 ` Francois Romieu
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
2004-03-22 23:53 ` [PATCH 4/4] " Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).