* [PATCH] [RFT] 2.6.4 - epic100 napi
@ 2004-03-20 14:21 Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-20 14:21 UTC (permalink / raw)
To: netdev, Jeff Garzik
People are welcome to report how the following patch behaves on their
hardware. It does not seem too bad here but it probably is still a bit rough.
A split version of the patch will follow tomorrow. This one definitely aims
at brave and/or bored testers.
The driver lacks ethtool support. Badly. :o/
--- linux-2.6.4/drivers/net/epic100.c 2004-03-20 14:52:08.000000000 +0100
+++ linux-2.6.4/drivers/net/epic100.c 2004-03-20 14:52:13.000000000 +0100
@@ -96,9 +96,9 @@ static int rx_copybreak;
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE 16
+#define TX_RING_SIZE 256
#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
-#define RX_RING_SIZE 32
+#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -292,6 +292,10 @@ enum CommandBits {
StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
};
+#define EpicNapiEvent (TxEmpty | TxDone | \
+ RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent (0x0000ffffUL & ~EpicNapiEvent)
+
static u16 media2miictl[16] = {
0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
@@ -330,9 +334,11 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
+ spinlock_t napi_lock;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
+ u32 irq_mask;
unsigned int rx_buf_sz; /* Based on MTU+slack. */
struct pci_dev *pci_dev; /* PCI bus location. */
@@ -359,7 +365,8 @@ static void epic_timer(unsigned long dat
static void epic_tx_timeout(struct net_device *dev);
static void epic_init_ring(struct net_device *dev);
static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static struct ethtool_ops netdev_ethtool_ops;
@@ -378,7 +385,7 @@ static int __devinit epic_init_one (stru
int irq;
struct net_device *dev;
struct epic_private *ep;
- int i, option = 0, duplex = 0;
+ int i, ret, option = 0, duplex = 0;
void *ring_space;
dma_addr_t ring_dma;
@@ -392,29 +399,33 @@ static int __devinit epic_init_one (stru
card_idx++;
- i = pci_enable_device(pdev);
- if (i)
- return i;
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto out;
irq = pdev->irq;
if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_out_disable;
}
pci_set_master(pdev);
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret < 0)
+ goto err_out_disable;
+
+ ret = -ENOMEM;
+
dev = alloc_etherdev(sizeof (*ep));
if (!dev) {
printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
- return -ENOMEM;
+ goto err_out_free_res;
}
SET_MODULE_OWNER(dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- if (pci_request_regions(pdev, DRV_NAME))
- goto err_out_free_netdev;
-
#ifdef USE_IO_OPS
ioaddr = pci_resource_start (pdev, 0);
#else
@@ -422,7 +433,7 @@ static int __devinit epic_init_one (stru
ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
if (!ioaddr) {
printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
- goto err_out_free_res;
+ goto err_out_free_netdev;
}
#endif
@@ -489,6 +500,9 @@ static int __devinit epic_init_one (stru
ep->pci_dev = pdev;
ep->chip_id = chip_idx;
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+ ep->irq_mask =
+ (ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
+ | CntFull | TxUnderrun | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -543,10 +557,12 @@ static int __devinit epic_init_one (stru
dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
+ dev->poll = epic_poll;
+ dev->weight = 64;
- i = register_netdev(dev);
- if (i)
- goto err_out_unmap_tx;
+ ret = register_netdev(dev);
+ if (ret < 0)
+ goto err_out_unmap_rx;
printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +570,24 @@ static int __devinit epic_init_one (stru
printk("%2.2x:", dev->dev_addr[i]);
printk("%2.2x.\n", dev->dev_addr[i]);
- return 0;
+out:
+ return ret;
+err_out_unmap_rx:
+ pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
err_out_unmap_tx:
pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
err_out_iounmap:
#ifndef USE_IO_OPS
iounmap(ioaddr);
-err_out_free_res:
-#endif
- pci_release_regions(pdev);
err_out_free_netdev:
+#endif
free_netdev(dev);
- return -ENODEV;
+err_out_free_res:
+ pci_release_regions(pdev);
+err_out_disable:
+ pci_disable_device(pdev);
+ goto out;
}
\f
/* Serial EEPROM section. */
@@ -592,6 +613,36 @@ err_out_free_netdev:
#define EE_READ256_CMD (6 << 8)
#define EE_ERASE_CMD (7 << 6)
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(0x00000000, ioaddr + INTMASK);
+}
+
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+ inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+ __epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ /* No need to commit possible posted write */
+ outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -753,8 +804,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -795,7 +845,7 @@ static void epic_pause(struct net_device
}
/* Remove the packets on the Rx queue. */
- epic_rx(dev);
+ epic_rx(dev, RX_RING_SIZE);
}
static void epic_restart(struct net_device *dev)
@@ -842,7 +892,7 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
+ | RxError | EpicNapiEvent,
ioaddr + INTMASK);
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
" interrupt %4.4x.\n",
@@ -929,7 +979,8 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1029,6 +1080,77 @@ static int epic_start_xmit(struct sk_buf
return 0;
}
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+ int status)
+{
+ struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+ /* There was an major error, log it. */
+ if (debug > 1)
+ printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+ dev->name, status);
+#endif
+ stats->tx_errors++;
+ if (status & 0x1050)
+ stats->tx_aborted_errors++;
+ if (status & 0x0008)
+ stats->tx_carrier_errors++;
+ if (status & 0x0040)
+ stats->tx_window_errors++;
+ if (status & 0x0010)
+ stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+ unsigned int dirty_tx, cur_tx;
+
+ /*
+ * Note: if this lock becomes a problem we can narrow the locked
+ * region at the cost of occasionally grabbing the lock more times.
+ */
+ cur_tx = ep->cur_tx;
+ for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+ struct sk_buff *skb;
+ int entry = dirty_tx % TX_RING_SIZE;
+ int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+ if (txstatus & DescOwn)
+ break; /* It still hasn't been Txed */
+
+ if (likely(txstatus & 0x0001)) {
+ ep->stats.collisions += (txstatus >> 8) & 15;
+ ep->stats.tx_packets++;
+ ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+ } else
+ epic_tx_error(dev, ep, txstatus);
+
+ /* Free the original skb. */
+ skb = ep->tx_skbuff[entry];
+ pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
+ skb->len, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(skb);
+ ep->tx_skbuff[entry] = 0;
+ }
+
+#ifndef final_version
+ if (cur_tx - dirty_tx > TX_RING_SIZE) {
+ printk(KERN_WARNING
+ "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+ dev->name, dirty_tx, cur_tx, ep->tx_full);
+ dirty_tx += TX_RING_SIZE;
+ }
+#endif
+ ep->dirty_tx = dirty_tx;
+ if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+ /* The ring is no longer full, allow new TX entries. */
+ ep->tx_full = 0;
+ netif_wake_queue(dev);
+ }
+}
+
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1042,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
do {
status = inl(ioaddr + INTSTAT);
/* Acknowledge all of the current interrupt sources ASAP. */
- outl(status & 0x00007fff, ioaddr + INTSTAT);
+ outl(status & EpicNormalEvent, ioaddr + INTSTAT);
if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1053,73 +1175,18 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
- epic_rx(dev);
-
- if (status & (TxEmpty | TxDone)) {
- unsigned int dirty_tx, cur_tx;
-
- /* Note: if this lock becomes a problem we can narrow the locked
- region at the cost of occasionally grabbing the lock more
- times. */
- spin_lock(&ep->lock);
- cur_tx = ep->cur_tx;
- dirty_tx = ep->dirty_tx;
- for (; cur_tx - dirty_tx > 0; dirty_tx++) {
- struct sk_buff *skb;
- int entry = dirty_tx % TX_RING_SIZE;
- int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
- if (txstatus & DescOwn)
- break; /* It still hasn't been Txed */
-
- if ( ! (txstatus & 0x0001)) {
- /* There was an major error, log it. */
-#ifndef final_version
- if (debug > 1)
- printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
- dev->name, txstatus);
-#endif
- ep->stats.tx_errors++;
- if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
- if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
- if (txstatus & 0x0040) ep->stats.tx_window_errors++;
- if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
- } else {
- ep->stats.collisions += (txstatus >> 8) & 15;
- ep->stats.tx_packets++;
- ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
- }
-
- /* Free the original skb. */
- skb = ep->tx_skbuff[entry];
- pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
- skb->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
- ep->tx_skbuff[entry] = 0;
+ if (status & EpicNapiEvent) {
+ spin_lock(&ep->napi_lock);
+ if (netif_rx_schedule_prep(dev)) {
+ epic_napi_irq_off(dev, ep);
+ __netif_rx_schedule(dev);
}
-
-#ifndef final_version
- if (cur_tx - dirty_tx > TX_RING_SIZE) {
- printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
- dev->name, dirty_tx, cur_tx, ep->tx_full);
- dirty_tx += TX_RING_SIZE;
- }
-#endif
- ep->dirty_tx = dirty_tx;
- if (ep->tx_full
- && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
- /* The ring is no longer full, allow new TX entries. */
- ep->tx_full = 0;
- spin_unlock(&ep->lock);
- netif_wake_queue(dev);
- } else
- spin_unlock(&ep->lock);
+ spin_unlock(&ep->napi_lock);
}
/* Check uncommon events all at once. */
- if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
- PCIBusErr170 | PCIBusErr175)) {
+ if (status &
+ (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
break;
/* Always update the error counts to avoid overhead later. */
@@ -1133,11 +1200,6 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
- ep->stats.rx_errors++;
- }
- if (status & (RxOverflow | RxFull))
- outw(RxQueued, ioaddr + COMMAND);
if (status & PCIBusErr170) {
printk(KERN_ERR "%s: PCI Bus Error! EPIC status %4.4x.\n",
dev->name, status);
@@ -1147,6 +1209,8 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
+ if (!(status & EpicNormalEvent))
+ break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
"IntrStatus=0x%8.8x.\n",
@@ -1164,7 +1228,7 @@ static irqreturn_t epic_interrupt(int ir
return IRQ_RETVAL(handled);
}
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
{
struct epic_private *ep = dev->priv;
int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1174,6 +1238,10 @@ static int epic_rx(struct net_device *de
if (debug > 4)
printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
ep->rx_ring[entry].rxstatus);
+
+ if (rx_work_limit > budget)
+ rx_work_limit = budget;
+
/* If we own the next entry, it's a new packet. Send it up. */
while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1228,7 +1296,7 @@ static int epic_rx(struct net_device *de
ep->rx_skbuff[entry] = NULL;
}
skb->protocol = eth_type_trans(skb, dev);
- netif_rx(skb);
+ netif_receive_skb(skb);
dev->last_rx = jiffies;
ep->stats.rx_packets++;
ep->stats.rx_bytes += pkt_len;
@@ -1256,6 +1324,60 @@ static int epic_rx(struct net_device *de
return work_done;
}
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+ int status;
+
+ status = inl(ioaddr + INTSTAT);
+
+ if (status == 0xffffffff)
+ return;
+ if (status & RxOverflow) /* Missed a Rx frame. */
+ ep->stats.rx_errors++;
+ if (status & (RxOverflow | RxFull))
+ outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+ struct epic_private *ep = dev->priv;
+ int work_done, orig_budget;
+ long ioaddr = dev->base_addr;
+
+ epic_tx(dev, ep);
+
+ orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+ work_done = epic_rx(dev, *budget);
+
+ epic_rx_err(dev, ep);
+
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done < orig_budget) {
+ unsigned long flags;
+ int status;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+ }
+ }
+
+ return (work_done >= orig_budget);
+}
+
static int epic_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
@@ -1270,9 +1392,13 @@ static int epic_close(struct net_device
dev->name, (int)inl(ioaddr + INTSTAT));
del_timer_sync(&ep->timer);
- epic_pause(dev);
+
+ epic_disable_int(dev, ep);
+
free_irq(dev->irq, dev);
+ epic_pause(dev);
+
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i < RX_RING_SIZE; i++) {
skb = ep->rx_skbuff[i];
@@ -1470,6 +1596,7 @@ static void __devexit epic_remove_one (s
#endif
pci_release_regions(pdev);
free_netdev(dev);
+ pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
/* pci_power_off(pdev, -1); */
}
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
@ 2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
1 sibling, 2 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-21 18:24 UTC (permalink / raw)
To: Francois Romieu; +Cc: netdev
Francois Romieu wrote:
> People are welcome to report how the following patch behaves on their
> hardware. It does not seem too bad here but it probably is still a bit rough.
> A split version of the patch will follow tomorrow. This one definitely aims
> at brave and/or bored testers.
Looks pretty good, but includes a standard NAPI race...
When you split up the patches, I'll throw it into my -netdev tree, which
means it will be automatically included in -mm for testing (as is r8169
now).
FWIW Andrew Morton has made me lazy... I don't bother publishing
separate -netdev patches anymore, since he automatically downloads my
netdev-2.6 BK tree before doing each -mm release.
> + if (work_done < orig_budget) {
> + unsigned long flags;
> + int status;
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> + epic_napi_irq_on(dev, ep);
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
> + }
Need to add a netif_running() check to the 'if' test at the top of the
quote.
Are you (or somebody else?) interested in reviewing all the in-tree NAPI
drivers, and seeing if other drivers have this bug? I think 8139cp.c
does at least, maybe e100 too... Such a fix would need to go into 2.4.x
as well.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-21 23:47 UTC (permalink / raw)
To: Jeff Garzik; +Cc: netdev
Jeff Garzik <jgarzik@pobox.com> :
[...]
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
>
> Are you (or somebody else?) interested in reviewing all the in-tree NAPI
> drivers, and seeing if other drivers have this bug? I think 8139cp.c
> does at least, maybe e100 too... Such a fix would need to go into 2.4.x
> as well.
Ok, I'll check for the race against dev_close.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
@ 2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
2004-03-23 18:51 ` Francois Romieu
1 sibling, 2 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 14:29 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Francois Romieu, netdev
Jeff Garzik <jgarzik@pobox.com> writes:
> > + if (work_done < orig_budget) {
> > + unsigned long flags;
> > + int status;
> > +
> > + spin_lock_irqsave(&ep->napi_lock, flags);
> > + epic_napi_irq_on(dev, ep);
> > + __netif_rx_complete(dev);
> > + spin_unlock_irqrestore(&ep->napi_lock, flags);
> > +
> > + status = inl(ioaddr + INTSTAT);
> > + if (status & EpicNapiEvent) {
> > + epic_napi_irq_off(dev, ep);
> > + goto rx_action;
> > + }
>
> Need to add a netif_running() check to the 'if' test at the top of the
> quote.
>
> Are you (or somebody else?) interested in reviewing all the in-tree
> NAPI drivers, and seeing if other drivers have this bug? I think
> 8139cp.c does at least, maybe e100 too... Such a fix would need to go
> into 2.4.x as well.
Umm.. the above code is part of ->poll(). I think xxx_interrut() need
netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
flag...
BTW, ->napi_lock is unneeded because netif_schedule() is already
atomic, it need only local_irq_enable/disable().
After __netif_rx_complete() must not do "goto rx_action", otherwise it
may become cause of twice scheduleing, it should move before spin_lock().
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 14:29 ` OGAWA Hirofumi
@ 2004-03-23 15:14 ` Jeff Garzik
2004-03-23 16:05 ` OGAWA Hirofumi
2004-03-23 18:51 ` Francois Romieu
1 sibling, 1 reply; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 15:14 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Francois Romieu, netdev
OGAWA Hirofumi wrote:
> Jeff Garzik <jgarzik@pobox.com> writes:
>
>
>>>+ if (work_done < orig_budget) {
>>>+ unsigned long flags;
>>>+ int status;
>>>+
>>>+ spin_lock_irqsave(&ep->napi_lock, flags);
>>>+ epic_napi_irq_on(dev, ep);
>>>+ __netif_rx_complete(dev);
>>>+ spin_unlock_irqrestore(&ep->napi_lock, flags);
>>>+
>>>+ status = inl(ioaddr + INTSTAT);
>>>+ if (status & EpicNapiEvent) {
>>>+ epic_napi_irq_off(dev, ep);
>>>+ goto rx_action;
>>>+ }
>>
>>Need to add a netif_running() check to the 'if' test at the top of the
>>quote.
>>
>>Are you (or somebody else?) interested in reviewing all the in-tree
>>NAPI drivers, and seeing if other drivers have this bug? I think
>>8139cp.c does at least, maybe e100 too... Such a fix would need to go
>>into 2.4.x as well.
>
>
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
Most interrupt routines already test this, look at
static inline int netif_rx_schedule_prep(struct net_device *dev)
{
return netif_running(dev) &&
!test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
}
It shouldn't schedule unless the interface is running.
However... I believe it was you that added this check to 8139cp.c:
/* close possible race's with dev_close */
if (unlikely(!netif_running(dev))) {
cpw16(IntrMask, 0);
goto out;
}
I like this, because regardless of NAPI, most drivers have
non-NAPI-related interrupts they must still process. This check handles
that.
Although this code is a bit redundant to some of the locking and
synchronization found in net driver dev->close() methods, I think it is
a nice thing to do.
I do wonder about the consequences, on some hardware, about receiving an
interrupt and -not- processing the RX or TX completions associated with
that. For most NIC hardware, you'll get sane behavior, but not all, I
bet...
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
>
> After __netif_rx_complete() must not do "goto rx_action", otherwise it
Agreed.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 15:14 ` Jeff Garzik
@ 2004-03-23 16:05 ` OGAWA Hirofumi
0 siblings, 0 replies; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 16:05 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Francois Romieu, netdev
Jeff Garzik <jgarzik@pobox.com> writes:
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
>
> Most interrupt routines already test this, look at
>
> static inline int netif_rx_schedule_prep(struct net_device *dev)
> {
> return netif_running(dev) &&
> !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
> }
>
> It shouldn't schedule unless the interface is running.
Yes.
> However... I believe it was you that added this check to 8139cp.c:
>
> /* close possible race's with dev_close */
> if (unlikely(!netif_running(dev))) {
> cpw16(IntrMask, 0);
> goto out;
> }
Yes, I added. And my suggestion was about this.
Because in case of 8139too, I got too many interrupts about pending RX
during the following, and the following wasn't finished.
(dev->close() wasn't called).
dev_close(),
clear_bit(__LINK_STATE_START, &dev->state);
smp_mb__after_clear_bit(); /* Commit netif_running(). */
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
/* No hurry. */
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(1);
}
> I do wonder about the consequences, on some hardware, about receiving
> an interrupt and -not- processing the RX or TX completions associated
> with that. For most NIC hardware, you'll get sane behavior, but not
> all, I bet...
Is this meaning should _not_ receive the interrupt about pending RX/TX?
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
@ 2004-03-23 18:51 ` Francois Romieu
2004-03-23 19:59 ` OGAWA Hirofumi
1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-23 18:51 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> flag...
>
> BTW, ->napi_lock is unneeded because netif_schedule() is already
> atomic, it need only local_irq_enable/disable().
Color me confused. The lock is supposed to protect against:
CPU1 CPU2
[poll]
epic_napi_irq_on(dev, ep);
[irq handler]
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
}
__netif_rx_complete(dev);
-> napi irq are disabled and device is removed from poll list. What will
prevent it ?
> After __netif_rx_complete() must not do "goto rx_action", otherwise it
> may become cause of twice scheduleing, it should move before spin_lock().
understand the previous statement as:
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
Can you reformulate as I feel I still did not get it right.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 18:51 ` Francois Romieu
@ 2004-03-23 19:59 ` OGAWA Hirofumi
2004-03-24 0:41 ` Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-23 19:59 UTC (permalink / raw)
To: Francois Romieu; +Cc: Jeff Garzik, netdev
Francois Romieu <romieu@fr.zoreil.com> writes:
> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
> [...]
> > Umm.. the above code is part of ->poll(). I think xxx_interrut() need
> > netif_running() instead. The driver must clear __LINK_STATE_RX_SCHED
> > flag...
> >
> > BTW, ->napi_lock is unneeded because netif_schedule() is already
> > atomic, it need only local_irq_enable/disable().
>
> Color me confused. The lock is supposed to protect against:
>
> CPU1 CPU2
> [poll]
> epic_napi_irq_on(dev, ep);
> [irq handler]
> if (netif_rx_schedule_prep(dev)) {
> epic_napi_irq_off(dev, ep);
> __netif_rx_schedule(dev);
> }
> __netif_rx_complete(dev);
>
> -> napi irq are disabled and device is removed from poll list. What will
> prevent it ?
__LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
So netif_rx_schedule_prep() returns 0.
> > After __netif_rx_complete() must not do "goto rx_action", otherwise it
> > may become cause of twice scheduleing, it should move before spin_lock().
>
> understand the previous statement as:
>
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> + epic_napi_irq_on(dev, ep);
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
> +
>
> Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
Yes, maybe. But, if after spin_lock, it loop may call the twice
__netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
It will leaks the dev->refcnt, I think.
> + __netif_rx_complete(dev);
> + spin_unlock_irqrestore(&ep->napi_lock, flags);
-- interrupt and call __netif_rx_schedule() --
> + status = inl(ioaddr + INTSTAT);
> + if (status & EpicNapiEvent) {
> + epic_napi_irq_off(dev, ep);
> + goto rx_action;
Thanks.
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-23 19:59 ` OGAWA Hirofumi
@ 2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
0 siblings, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 0:41 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> > -> napi irq are disabled and device is removed from poll list. What will
> > prevent it ?
>
> __LINK_STATE_RX_SCHED flag is setting until __netif_rx_complete() is called.
> So netif_rx_schedule_prep() returns 0.
Ok, thanks for the explanation. It is possible that the lock stays anyway (see
below).
[...]
> > Afaiu, if some data comes in just before the spin_lock, it may wait for ages.
>
> Yes, maybe. But, if after spin_lock, it loop may call the twice
> __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> It will leaks the dev->refcnt, I think.
@$*#!zW
The following patch should avoid the leak as well as the packet rot
(untested, 1:33 AM, apply on top of previous serie).
Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.
drivers/net/epic100.c | 26 +++++++++++++-------------
1 files changed, 13 insertions(+), 13 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-24 01:19:35.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
spinlock_t napi_lock;
+ unsigned int reschedule_in_poll;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
dev->base_addr = ioaddr;
dev->irq = irq;
- spin_lock_init (&ep->lock);
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
+ ep->reschedule_in_poll = 0;
/* Bring the chip out of low-power mode. */
outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- spin_lock_init(&ep->lock);
- spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
}
}
-
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,12 +1177,13 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & EpicNapiEvent) {
+ if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
spin_lock(&ep->napi_lock);
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
- }
+ } else
+ ep->reschedule_in_poll++;
spin_unlock(&ep->napi_lock);
}
@@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
-rx_action:
outl(EpicNapiEvent, ioaddr + INTSTAT);
epic_tx(dev, ep);
@@ -1369,18 +1369,18 @@ rx_action:
if (netif_running(dev) && (work_done < orig_budget)) {
unsigned long flags;
- int status;
- spin_lock_irqsave(&ep->napi_lock, flags);
epic_napi_irq_on(dev, ep);
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
__netif_rx_complete(dev);
- spin_unlock_irqrestore(&ep->napi_lock, flags);
- status = inl(ioaddr + INTSTAT);
- if (status & EpicNapiEvent) {
+ if (ep->reschedule_in_poll) {
epic_napi_irq_off(dev, ep);
- goto rx_action;
+ __netif_rx_schedule(dev);
+ ep->reschedule_in_poll--;
}
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
}
return (work_done >= orig_budget);
_
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-24 0:41 ` Francois Romieu
@ 2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-24 12:33 ` Francois Romieu
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
1 sibling, 1 reply; 18+ messages in thread
From: OGAWA Hirofumi @ 2004-03-24 2:52 UTC (permalink / raw)
To: Francois Romieu; +Cc: Jeff Garzik, netdev
Francois Romieu <romieu@fr.zoreil.com> writes:
> > Yes, maybe. But, if after spin_lock, it loop may call the twice
> > __netif_rx_schedule(). And netif_rx_complete() doesn't call dev_put().
> > It will leaks the dev->refcnt, I think.
>
> @$*#!zW
>
> The following patch should avoid the leak as well as the packet rot
> (untested, 1:33 AM, apply on top of previous serie).
>
>
> Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
> epic_poll loops over __netif_rx_complete() leads to serious device
> refcount leak.
Do you care the lost interrupt? If so, I was miss reading it.
IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
signal until the driver clears the pending interrupt. No?
--
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] [RFT] 2.6.4 - epic100 napi
2004-03-24 2:52 ` OGAWA Hirofumi
@ 2004-03-24 12:33 ` Francois Romieu
0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-24 12:33 UTC (permalink / raw)
To: OGAWA Hirofumi; +Cc: Jeff Garzik, netdev
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> Do you care the lost interrupt? If so, I was miss reading it.
>
> IIRC, PCI spec requires the level-trigger. So devices asserts IRQ
> signal until the driver clears the pending interrupt. No?
<insert usual "correct me if I'm wrong" disclaimer here>
The driver only masks the interruptions which are napi related so
epic_poll() and epic_interrupt() are always racing. I completely agree that
it "wastes" the nice behavior of level-triggered irq. If one wants to avoid
the race, everything should go to the poll() handler. It implies polled
access to the INTSTAT register (as done in epic_rx_err).
I could not find in the archives some general napi-wisdom which suggests
that everything has to go to the poll() handler, be it for simplicity or
for a real gain. So I hesitate to follow the other way and exchange the
polled access for some (locked) traffic between the poll() and the irq
handler.
Comments welcome.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH] 2.6.5-rc2 - more epic100 napi
2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
@ 2004-03-25 0:27 ` Francois Romieu
1 sibling, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-25 0:27 UTC (permalink / raw)
To: Jeff Garzik; +Cc: OGAWA Hirofumi, netdev
Francois Romieu <romieu@fr.zoreil.com> :
[...]
> diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
> --- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
> +++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-24 01:19:35.000000000 +0100
> @@ -1355,7 +1356,6 @@ static int epic_poll(struct net_device *
>
> orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
>
> -rx_action:
> outl(EpicNapiEvent, ioaddr + INTSTAT);
>
> epic_tx(dev, ep);
> @@ -1369,18 +1369,18 @@ rx_action:
>
> if (netif_running(dev) && (work_done < orig_budget)) {
> unsigned long flags;
> - int status;
>
> - spin_lock_irqsave(&ep->napi_lock, flags);
> epic_napi_irq_on(dev, ep);
> +
> + spin_lock_irqsave(&ep->napi_lock, flags);
> __netif_rx_complete(dev);
> - spin_unlock_irqrestore(&ep->napi_lock, flags);
>
> - status = inl(ioaddr + INTSTAT);
> - if (status & EpicNapiEvent) {
> + if (ep->reschedule_in_poll) {
> epic_napi_irq_off(dev, ep);
> - goto rx_action;
> + __netif_rx_schedule(dev);
^^^^^^^^^^^^^^^^^^^^^^^^^
While in poll() handler, brilliant :o(
Please apply (tested) patch below on top of the acked patches (i.e 1/4...4/4):
- issuing commands via the serial console under an incoming stream of 40k
short sized pps sucks but it is possible;
- does not leak refcount (it rmmods fine).
Next step: identify the best performer amongst the previously discussed changes.
Multiple invocation of __netif_rx_schedule() in epic_interrupt() while
epic_poll loops over __netif_rx_complete() leads to serious device
refcount leak.
drivers/net/epic100.c | 33 ++++++++++++++++++---------------
1 files changed, 18 insertions(+), 15 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-30 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-30 2004-03-24 01:18:25.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-25 00:51:30.000000000 +0100
@@ -337,6 +337,7 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
spinlock_t napi_lock;
+ unsigned int reschedule_in_poll;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
@@ -472,7 +473,9 @@ static int __devinit epic_init_one (stru
dev->base_addr = ioaddr;
dev->irq = irq;
- spin_lock_init (&ep->lock);
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
+ ep->reschedule_in_poll = 0;
/* Bring the chip out of low-power mode. */
outl(0x4200, ioaddr + GENCTL);
@@ -981,8 +984,6 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- spin_lock_init(&ep->lock);
- spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1152,7 +1153,6 @@ static void epic_tx(struct net_device *d
}
}
-
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1177,14 +1177,16 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & EpicNapiEvent) {
+ if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
spin_lock(&ep->napi_lock);
if (netif_rx_schedule_prep(dev)) {
epic_napi_irq_off(dev, ep);
__netif_rx_schedule(dev);
- }
+ } else
+ ep->reschedule_in_poll++;
spin_unlock(&ep->napi_lock);
}
+ status &= ~EpicNapiEvent;
/* Check uncommon events all at once. */
if (status &
@@ -1211,7 +1213,7 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
- if (status & EpicNormalEvent)
+ if (!(status & EpicNormalEvent))
break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
@@ -1356,7 +1358,6 @@ static int epic_poll(struct net_device *
orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
rx_action:
- outl(EpicNapiEvent, ioaddr + INTSTAT);
epic_tx(dev, ep);
@@ -1369,18 +1370,20 @@ rx_action:
if (netif_running(dev) && (work_done < orig_budget)) {
unsigned long flags;
- int status;
spin_lock_irqsave(&ep->napi_lock, flags);
- epic_napi_irq_on(dev, ep);
- __netif_rx_complete(dev);
- spin_unlock_irqrestore(&ep->napi_lock, flags);
- status = inl(ioaddr + INTSTAT);
- if (status & EpicNapiEvent) {
- epic_napi_irq_off(dev, ep);
+ if (ep->reschedule_in_poll) {
+ ep->reschedule_in_poll--;
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
goto rx_action;
}
+
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
}
return (work_done >= orig_budget);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 0/4] 2.6.5-rc2 - epic100 update
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
@ 2004-03-22 23:50 ` Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
1 sibling, 2 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:50 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
Schedule:
- epic100-fixup.patch: opportunistic cleanup
- epic100-napi-00.patch: code shuffling before the move
- epic100-napi-10.patch: rx napi (includes netif_running change)
- epic100-napi-20.patch: minimalistic tx napi
2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
so the patches should apply equally to both.
The driver has been moderately tested.
Feedback welcome.
--
Ueimor
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/4] 2.6.5-rc2 - epic100 fixup
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
@ 2004-03-22 23:51 ` Francois Romieu
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
1 sibling, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:51 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
- extra pci_disable_device() to balance invocation of pci_enable_device()
in epic_init_one() (-> error path + epic_remove_one());
- lazy return status in epic_init_one(), tsss...;
- memory dedicated to Rx descriptors was not freed after failure of
register_netdev() in epic_init_one();
- use of epic_pause() in epic_close() offers a small window for a late
interruption just before the final free_irq(). Let's close the window to
avoid two epic_rx() threads racing with each other.
drivers/net/epic100.c | 59 +++++++++++++++++++++++++++++++++-----------------
1 files changed, 40 insertions(+), 19 deletions(-)
diff -puN drivers/net/epic100.c~epic100-fixup drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-fixup 2004-03-22 22:53:16.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-22 22:53:16.000000000 +0100
@@ -378,7 +378,7 @@ static int __devinit epic_init_one (stru
int irq;
struct net_device *dev;
struct epic_private *ep;
- int i, option = 0, duplex = 0;
+ int i, ret, option = 0, duplex = 0;
void *ring_space;
dma_addr_t ring_dma;
@@ -392,29 +392,33 @@ static int __devinit epic_init_one (stru
card_idx++;
- i = pci_enable_device(pdev);
- if (i)
- return i;
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto out;
irq = pdev->irq;
if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) {
printk (KERN_ERR "card %d: no PCI region space\n", card_idx);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_out_disable;
}
pci_set_master(pdev);
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret < 0)
+ goto err_out_disable;
+
+ ret = -ENOMEM;
+
dev = alloc_etherdev(sizeof (*ep));
if (!dev) {
printk (KERN_ERR "card %d: no memory for eth device\n", card_idx);
- return -ENOMEM;
+ goto err_out_free_res;
}
SET_MODULE_OWNER(dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- if (pci_request_regions(pdev, DRV_NAME))
- goto err_out_free_netdev;
-
#ifdef USE_IO_OPS
ioaddr = pci_resource_start (pdev, 0);
#else
@@ -422,7 +426,7 @@ static int __devinit epic_init_one (stru
ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1));
if (!ioaddr) {
printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx);
- goto err_out_free_res;
+ goto err_out_free_netdev;
}
#endif
@@ -544,9 +548,9 @@ static int __devinit epic_init_one (stru
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
- i = register_netdev(dev);
- if (i)
- goto err_out_unmap_tx;
+ ret = register_netdev(dev);
+ if (ret < 0)
+ goto err_out_unmap_rx;
printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ",
dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq);
@@ -554,19 +558,24 @@ static int __devinit epic_init_one (stru
printk("%2.2x:", dev->dev_addr[i]);
printk("%2.2x.\n", dev->dev_addr[i]);
- return 0;
+out:
+ return ret;
+err_out_unmap_rx:
+ pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
err_out_unmap_tx:
pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
err_out_iounmap:
#ifndef USE_IO_OPS
iounmap(ioaddr);
-err_out_free_res:
-#endif
- pci_release_regions(pdev);
err_out_free_netdev:
+#endif
free_netdev(dev);
- return -ENODEV;
+err_out_free_res:
+ pci_release_regions(pdev);
+err_out_disable:
+ pci_disable_device(pdev);
+ goto out;
}
\f
/* Serial EEPROM section. */
@@ -592,6 +601,13 @@ err_out_free_netdev:
#define EE_READ256_CMD (6 << 8)
#define EE_ERASE_CMD (7 << 6)
+static void epic_disable_int(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(0x00000000, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -1276,9 +1292,13 @@ static int epic_close(struct net_device
dev->name, (int)inl(ioaddr + INTSTAT));
del_timer_sync(&ep->timer);
- epic_pause(dev);
+
+ epic_disable_int(dev, ep);
+
free_irq(dev->irq, dev);
+ epic_pause(dev);
+
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i < RX_RING_SIZE; i++) {
skb = ep->rx_skbuff[i];
@@ -1476,6 +1496,7 @@ static void __devexit epic_remove_one (s
#endif
pci_release_regions(pdev);
free_netdev(dev);
+ pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
/* pci_power_off(pdev, -1); */
}
_
^ permalink raw reply [flat|nested] 18+ messages in thread* [PATCH 2/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-22 23:52 ` Francois Romieu
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:52 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
Isolate the classical TX part of epic_interrupt. Innocent code shuffling.
drivers/net/epic100.c | 137 +++++++++++++++++++++++++++-----------------------
1 files changed, 76 insertions(+), 61 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-00 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-00 2004-03-22 22:53:18.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-22 22:53:18.000000000 +0100
@@ -1045,6 +1045,79 @@ static int epic_start_xmit(struct sk_buf
return 0;
}
+static void epic_tx_error(struct net_device *dev, struct epic_private *ep,
+ int status)
+{
+ struct net_device_stats *stats = &ep->stats;
+
+#ifndef final_version
+ /* There was an major error, log it. */
+ if (debug > 1)
+ printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+ dev->name, status);
+#endif
+ stats->tx_errors++;
+ if (status & 0x1050)
+ stats->tx_aborted_errors++;
+ if (status & 0x0008)
+ stats->tx_carrier_errors++;
+ if (status & 0x0040)
+ stats->tx_window_errors++;
+ if (status & 0x0010)
+ stats->tx_fifo_errors++;
+}
+
+static void epic_tx(struct net_device *dev, struct epic_private *ep)
+{
+ unsigned int dirty_tx, cur_tx;
+
+ /*
+ * Note: if this lock becomes a problem we can narrow the locked
+ * region at the cost of occasionally grabbing the lock more times.
+ */
+ spin_lock(&ep->lock);
+ cur_tx = ep->cur_tx;
+ for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
+ struct sk_buff *skb;
+ int entry = dirty_tx % TX_RING_SIZE;
+ int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
+
+ if (txstatus & DescOwn)
+ break; /* It still hasn't been Txed */
+
+ if (likely(txstatus & 0x0001)) {
+ ep->stats.collisions += (txstatus >> 8) & 15;
+ ep->stats.tx_packets++;
+ ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
+ } else
+ epic_tx_error(dev, ep, txstatus);
+
+ /* Free the original skb. */
+ skb = ep->tx_skbuff[entry];
+ pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
+ skb->len, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(skb);
+ ep->tx_skbuff[entry] = 0;
+ }
+
+#ifndef final_version
+ if (cur_tx - dirty_tx > TX_RING_SIZE) {
+ printk(KERN_WARNING
+ "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+ dev->name, dirty_tx, cur_tx, ep->tx_full);
+ dirty_tx += TX_RING_SIZE;
+ }
+#endif
+ ep->dirty_tx = dirty_tx;
+ if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
+ /* The ring is no longer full, allow new TX entries. */
+ ep->tx_full = 0;
+ netif_wake_queue(dev);
+ }
+ spin_unlock(&ep->lock);
+}
+
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
@@ -1072,66 +1145,8 @@ static irqreturn_t epic_interrupt(int ir
if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
epic_rx(dev);
- if (status & (TxEmpty | TxDone)) {
- unsigned int dirty_tx, cur_tx;
-
- /* Note: if this lock becomes a problem we can narrow the locked
- region at the cost of occasionally grabbing the lock more
- times. */
- spin_lock(&ep->lock);
- cur_tx = ep->cur_tx;
- dirty_tx = ep->dirty_tx;
- for (; cur_tx - dirty_tx > 0; dirty_tx++) {
- struct sk_buff *skb;
- int entry = dirty_tx % TX_RING_SIZE;
- int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus);
-
- if (txstatus & DescOwn)
- break; /* It still hasn't been Txed */
-
- if ( ! (txstatus & 0x0001)) {
- /* There was an major error, log it. */
-#ifndef final_version
- if (debug > 1)
- printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
- dev->name, txstatus);
-#endif
- ep->stats.tx_errors++;
- if (txstatus & 0x1050) ep->stats.tx_aborted_errors++;
- if (txstatus & 0x0008) ep->stats.tx_carrier_errors++;
- if (txstatus & 0x0040) ep->stats.tx_window_errors++;
- if (txstatus & 0x0010) ep->stats.tx_fifo_errors++;
- } else {
- ep->stats.collisions += (txstatus >> 8) & 15;
- ep->stats.tx_packets++;
- ep->stats.tx_bytes += ep->tx_skbuff[entry]->len;
- }
-
- /* Free the original skb. */
- skb = ep->tx_skbuff[entry];
- pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
- skb->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
- ep->tx_skbuff[entry] = 0;
- }
-
-#ifndef final_version
- if (cur_tx - dirty_tx > TX_RING_SIZE) {
- printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
- dev->name, dirty_tx, cur_tx, ep->tx_full);
- dirty_tx += TX_RING_SIZE;
- }
-#endif
- ep->dirty_tx = dirty_tx;
- if (ep->tx_full
- && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
- /* The ring is no longer full, allow new TX entries. */
- ep->tx_full = 0;
- spin_unlock(&ep->lock);
- netif_wake_queue(dev);
- } else
- spin_unlock(&ep->lock);
- }
+ if (status & (TxEmpty | TxDone))
+ epic_tx(dev, ep);
/* Check uncommon events all at once. */
if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
@@ -1149,7 +1164,7 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
+ if (status & RxOverflow) { /* Missed a Rx frame. */
ep->stats.rx_errors++;
}
if (status & (RxOverflow | RxFull))
_
^ permalink raw reply [flat|nested] 18+ messages in thread* [PATCH 3/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
@ 2004-03-22 23:53 ` Francois Romieu
2004-03-22 23:53 ` [PATCH 4/4] " Francois Romieu
0 siblings, 1 reply; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
RX NAPI.
drivers/net/epic100.c | 137 ++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 116 insertions(+), 21 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-10 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-10 2004-03-22 22:53:19.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-23 00:18:33.000000000 +0100
@@ -98,7 +98,7 @@ static int rx_copybreak;
There are no ill effects from too-large receive rings. */
#define TX_RING_SIZE 16
#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
-#define RX_RING_SIZE 32
+#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -292,6 +292,11 @@ enum CommandBits {
StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80,
};
+#define EpicRemoved 0xffffffff /* Chip failed or removed (CardBus) */
+
+#define EpicNapiEvent (RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent)
+
static u16 media2miictl[16] = {
0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
@@ -330,9 +335,11 @@ struct epic_private {
/* Ring pointers. */
spinlock_t lock; /* Group with Tx control cache line. */
+ spinlock_t napi_lock;
unsigned int cur_tx, dirty_tx;
unsigned int cur_rx, dirty_rx;
+ u32 irq_mask;
unsigned int rx_buf_sz; /* Based on MTU+slack. */
struct pci_dev *pci_dev; /* PCI bus location. */
@@ -359,7 +366,8 @@ static void epic_timer(unsigned long dat
static void epic_tx_timeout(struct net_device *dev);
static void epic_init_ring(struct net_device *dev);
static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int epic_rx(struct net_device *dev);
+static int epic_rx(struct net_device *dev, int budget);
+static int epic_poll(struct net_device *dev, int *budget);
static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static struct ethtool_ops netdev_ethtool_ops;
@@ -493,6 +501,9 @@ static int __devinit epic_init_one (stru
ep->pci_dev = pdev;
ep->chip_id = chip_idx;
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
+ ep->irq_mask =
+ (ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
+ | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -547,6 +558,8 @@ static int __devinit epic_init_one (stru
dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
dev->tx_timeout = &epic_tx_timeout;
+ dev->poll = epic_poll;
+ dev->weight = 64;
ret = register_netdev(dev);
if (ret < 0)
@@ -608,6 +621,29 @@ static void epic_disable_int(struct net_
outl(0x00000000, ioaddr + INTMASK);
}
+static inline void __epic_pci_commit(long ioaddr)
+{
+#ifndef USE_IO_OPS
+ inl(ioaddr + INTMASK);
+#endif
+}
+
+static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK);
+ __epic_pci_commit(ioaddr);
+}
+
+static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+
+ /* No need to commit possible posted write */
+ outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK);
+}
+
static int __devinit read_eeprom(long ioaddr, int location)
{
int i;
@@ -769,8 +805,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x "
@@ -811,7 +846,7 @@ static void epic_pause(struct net_device
}
/* Remove the packets on the Rx queue. */
- epic_rx(dev);
+ epic_rx(dev, RX_RING_SIZE);
}
static void epic_restart(struct net_device *dev)
@@ -858,8 +893,8 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
| CntFull | TxUnderrun | TxDone | TxEmpty
- | RxError | RxOverflow | RxFull | RxHeader | RxDone,
- ioaddr + INTMASK);
+ | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
+
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
" interrupt %4.4x.\n",
dev->name, (int)inl(ioaddr + COMMAND), (int)inl(ioaddr + GENCTL),
@@ -945,7 +980,8 @@ static void epic_init_ring(struct net_de
int i;
ep->tx_full = 0;
- ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&ep->lock);
+ spin_lock_init(&ep->napi_lock);
ep->dirty_tx = ep->cur_tx = 0;
ep->cur_rx = ep->dirty_rx = 0;
ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
@@ -1131,7 +1167,7 @@ static irqreturn_t epic_interrupt(int ir
do {
status = inl(ioaddr + INTSTAT);
/* Acknowledge all of the current interrupt sources ASAP. */
- outl(status & 0x00007fff, ioaddr + INTSTAT);
+ outl(status & EpicNormalEvent, ioaddr + INTSTAT);
if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new "
@@ -1142,16 +1178,22 @@ static irqreturn_t epic_interrupt(int ir
break;
handled = 1;
- if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow))
- epic_rx(dev);
+ if (status & EpicNapiEvent) {
+ spin_lock(&ep->napi_lock);
+ if (netif_rx_schedule_prep(dev)) {
+ epic_napi_irq_off(dev, ep);
+ __netif_rx_schedule(dev);
+ }
+ spin_unlock(&ep->napi_lock);
+ }
if (status & (TxEmpty | TxDone))
epic_tx(dev, ep);
/* Check uncommon events all at once. */
- if (status & (CntFull | TxUnderrun | RxOverflow | RxFull |
- PCIBusErr170 | PCIBusErr175)) {
- if (status == 0xffffffff) /* Chip failed or removed (CardBus). */
+ if (status &
+ (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
+ if (status == EpicRemoved)
break;
/* Always update the error counts to avoid overhead later. */
ep->stats.rx_missed_errors += inb(ioaddr + MPCNT);
@@ -1164,11 +1206,6 @@ static irqreturn_t epic_interrupt(int ir
/* Restart the transmit process. */
outl(RestartTx, ioaddr + COMMAND);
}
- if (status & RxOverflow) { /* Missed a Rx frame. */
- ep->stats.rx_errors++;
- }
- if (status & (RxOverflow | RxFull))
- outw(RxQueued, ioaddr + COMMAND);
if (status & PCIBusErr170) {
printk(KERN_ERR "%s: PCI Bus Error! EPIC status %4.4x.\n",
dev->name, status);
@@ -1178,6 +1215,8 @@ static irqreturn_t epic_interrupt(int ir
/* Clear all error sources. */
outl(status & 0x7f18, ioaddr + INTSTAT);
}
+ if (status & EpicNormalEvent)
+ break;
if (--boguscnt < 0) {
printk(KERN_ERR "%s: Too much work at interrupt, "
"IntrStatus=0x%8.8x.\n",
@@ -1195,7 +1234,7 @@ static irqreturn_t epic_interrupt(int ir
return IRQ_RETVAL(handled);
}
-static int epic_rx(struct net_device *dev)
+static int epic_rx(struct net_device *dev, int budget)
{
struct epic_private *ep = dev->priv;
int entry = ep->cur_rx % RX_RING_SIZE;
@@ -1205,6 +1244,10 @@ static int epic_rx(struct net_device *de
if (debug > 4)
printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry,
ep->rx_ring[entry].rxstatus);
+
+ if (rx_work_limit > budget)
+ rx_work_limit = budget;
+
/* If we own the next entry, it's a new packet. Send it up. */
while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) {
int status = le32_to_cpu(ep->rx_ring[entry].rxstatus);
@@ -1265,7 +1308,7 @@ static int epic_rx(struct net_device *de
ep->rx_skbuff[entry] = NULL;
}
skb->protocol = eth_type_trans(skb, dev);
- netif_rx(skb);
+ netif_receive_skb(skb);
dev->last_rx = jiffies;
ep->stats.rx_packets++;
ep->stats.rx_bytes += pkt_len;
@@ -1293,6 +1336,58 @@ static int epic_rx(struct net_device *de
return work_done;
}
+static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
+{
+ long ioaddr = dev->base_addr;
+ int status;
+
+ status = inl(ioaddr + INTSTAT);
+
+ if (status == EpicRemoved)
+ return;
+ if (status & RxOverflow) /* Missed a Rx frame. */
+ ep->stats.rx_errors++;
+ if (status & (RxOverflow | RxFull))
+ outw(RxQueued, ioaddr + COMMAND);
+}
+
+static int epic_poll(struct net_device *dev, int *budget)
+{
+ struct epic_private *ep = dev->priv;
+ int work_done, orig_budget;
+ long ioaddr = dev->base_addr;
+
+ orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
+
+rx_action:
+ outl(EpicNapiEvent, ioaddr + INTSTAT);
+
+ work_done = epic_rx(dev, *budget);
+
+ epic_rx_err(dev, ep);
+
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (netif_running(dev) && (work_done < orig_budget)) {
+ unsigned long flags;
+ int status;
+
+ spin_lock_irqsave(&ep->napi_lock, flags);
+ epic_napi_irq_on(dev, ep);
+ __netif_rx_complete(dev);
+ spin_unlock_irqrestore(&ep->napi_lock, flags);
+
+ status = inl(ioaddr + INTSTAT);
+ if (status & EpicNapiEvent) {
+ epic_napi_irq_off(dev, ep);
+ goto rx_action;
+ }
+ }
+
+ return (work_done >= orig_budget);
+}
+
static int epic_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
_
^ permalink raw reply [flat|nested] 18+ messages in thread* [PATCH 4/4] 2.6.5-rc2 - epic100 napi
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
@ 2004-03-22 23:53 ` Francois Romieu
0 siblings, 0 replies; 18+ messages in thread
From: Francois Romieu @ 2004-03-22 23:53 UTC (permalink / raw)
To: netdev; +Cc: Jeff Garzik
TX NAPI.
drivers/net/epic100.c | 20 +++++++++-----------
1 files changed, 9 insertions(+), 11 deletions(-)
diff -puN drivers/net/epic100.c~epic100-napi-20 drivers/net/epic100.c
--- linux-2.6.5-rc2/drivers/net/epic100.c~epic100-napi-20 2004-03-23 00:18:40.000000000 +0100
+++ linux-2.6.5-rc2-fr/drivers/net/epic100.c 2004-03-23 00:18:40.000000000 +0100
@@ -96,8 +96,8 @@ static int rx_copybreak;
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE 16
-#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
+#define TX_RING_SIZE 256
+#define TX_QUEUE_LEN 240 /* Limit ring entries actually used. */
#define RX_RING_SIZE 256
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc)
@@ -294,7 +294,8 @@ enum CommandBits {
#define EpicRemoved 0xffffffff /* Chip failed or removed (CardBus) */
-#define EpicNapiEvent (RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
+#define EpicNapiEvent (TxEmpty | TxDone | \
+ RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull)
#define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent)
static u16 media2miictl[16] = {
@@ -503,7 +504,7 @@ static int __devinit epic_init_one (stru
ep->chip_flags = pci_id_tbl[chip_idx].drv_flags;
ep->irq_mask =
(ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty | EpicNapiEvent;
+ | CntFull | TxUnderrun | EpicNapiEvent;
/* Find the connected MII xcvrs.
Doing this in open() would allow detecting external xcvrs later, but
@@ -804,7 +805,7 @@ static int epic_open(struct net_device *
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty
+ | CntFull | TxUnderrun
| RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
if (debug > 1)
@@ -892,7 +893,7 @@ static void epic_restart(struct net_devi
/* Enable interrupts by setting the interrupt mask. */
outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170)
- | CntFull | TxUnderrun | TxDone | TxEmpty
+ | CntFull | TxUnderrun
| RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK);
printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x"
@@ -1111,7 +1112,6 @@ static void epic_tx(struct net_device *d
* Note: if this lock becomes a problem we can narrow the locked
* region at the cost of occasionally grabbing the lock more times.
*/
- spin_lock(&ep->lock);
cur_tx = ep->cur_tx;
for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) {
struct sk_buff *skb;
@@ -1150,7 +1150,6 @@ static void epic_tx(struct net_device *d
ep->tx_full = 0;
netif_wake_queue(dev);
}
- spin_unlock(&ep->lock);
}
@@ -1187,9 +1186,6 @@ static irqreturn_t epic_interrupt(int ir
spin_unlock(&ep->napi_lock);
}
- if (status & (TxEmpty | TxDone))
- epic_tx(dev, ep);
-
/* Check uncommon events all at once. */
if (status &
(CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) {
@@ -1362,6 +1358,8 @@ static int epic_poll(struct net_device *
rx_action:
outl(EpicNapiEvent, ioaddr + INTSTAT);
+ epic_tx(dev, ep);
+
work_done = epic_rx(dev, *budget);
epic_rx_err(dev, ep);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/4] 2.6.5-rc2 - epic100 update
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
@ 2004-03-23 0:12 ` Jeff Garzik
1 sibling, 0 replies; 18+ messages in thread
From: Jeff Garzik @ 2004-03-23 0:12 UTC (permalink / raw)
To: Francois Romieu; +Cc: netdev, Andrew Morton
Francois Romieu wrote:
> Schedule:
>
> - epic100-fixup.patch: opportunistic cleanup
> - epic100-napi-00.patch: code shuffling before the move
> - epic100-napi-10.patch: rx napi (includes netif_running change)
> - epic100-napi-20.patch: minimalistic tx napi
>
> 2.6.5-rc2 and 2.6.5-rc2-mm1 contain the same drivers/net/epic100.c
> so the patches should apply equally to both.
>
> The driver has been moderately tested.
Thanks.
Applied to my netdev-2.6 queue, and so it should automatically appear in
Andrew's -mm tree soon.
Jeff
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2004-03-25 0:27 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-20 14:21 [PATCH] [RFT] 2.6.4 - epic100 napi Francois Romieu
2004-03-21 18:24 ` Jeff Garzik
2004-03-21 23:47 ` Francois Romieu
2004-03-23 14:29 ` OGAWA Hirofumi
2004-03-23 15:14 ` Jeff Garzik
2004-03-23 16:05 ` OGAWA Hirofumi
2004-03-23 18:51 ` Francois Romieu
2004-03-23 19:59 ` OGAWA Hirofumi
2004-03-24 0:41 ` Francois Romieu
2004-03-24 2:52 ` OGAWA Hirofumi
2004-03-24 12:33 ` Francois Romieu
2004-03-25 0:27 ` [PATCH] 2.6.5-rc2 - more " Francois Romieu
2004-03-22 23:50 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Francois Romieu
2004-03-22 23:51 ` [PATCH 1/4] 2.6.5-rc2 - epic100 fixup Francois Romieu
2004-03-22 23:52 ` [PATCH 2/4] 2.6.5-rc2 - epic100 napi Francois Romieu
2004-03-22 23:53 ` [PATCH 3/4] " Francois Romieu
2004-03-22 23:53 ` [PATCH 4/4] " Francois Romieu
2004-03-23 0:12 ` [PATCH 0/4] 2.6.5-rc2 - epic100 update Jeff Garzik
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.