* [PATCH 2.6] natsemi.c NAPI
@ 2004-09-20 14:10 Harald Welte
2004-09-20 18:57 ` David S. Miller
0 siblings, 1 reply; 8+ messages in thread
From: Harald Welte @ 2004-09-20 14:10 UTC (permalink / raw)
To: netdev
[-- Attachment #1: Type: text/plain, Size: 10686 bytes --]
Hi!
As announced before, here is the NAPI-patch for natsemi.c
My 266 MHz National Geode SC1100 (http://www.pcengines.ch/wrap.htm) with
two natsemi chips can now forward 32kpps at (64bytes, 148kpps, single
flow UDP input).
Any comments welcome, just like inclusion of this patch :)
Signed-off-by: Harald Welte <laforge@gnumonks.org>
diff -Nru linux-2.6.9-rc1-plain/drivers/net/Kconfig linux-2.6.9-rc1-natsemi-napi/drivers/net/Kconfig
--- linux-2.6.9-rc1-plain/drivers/net/Kconfig 2004-08-31 20:24:39.000000000 +0200
+++ linux-2.6.9-rc1-natsemi-napi/drivers/net/Kconfig 2004-09-14 22:10:29.000000000 +0200
@@ -1534,6 +1534,22 @@
and others, including the 83815 chip.
More specific information and updates are available from
<http://www.scyld.com/network/natsemi.html>.
+config NATSEMI_NAPI
+ bool "Use Rx Polling (NAPI) (EXPERIMENTAL)"
+ depends on NATSEMI && EXPERIMENTAL
+ help
+ NAPI is a new driver API designed to reduce CPU and interrupt load
+ when the driver is receiving lots of packets from the card. It is
+ still somewhat experimental and thus not yet enabled by default.
+
+ If your estimated Rx load is 10kpps or more, or if the card will be
+ deployed on potentially unfriendly networks (e.g. in a firewall),
+ then say Y here.
+
+ See <file:Documentation/networking/NAPI_HOWTO.txt> for more
+ information.
+
+ If in doubt, say N.
config NE2K_PCI
tristate "PCI NE2000 and clones support (see help)"
diff -Nru linux-2.6.9-rc1-plain/drivers/net/natsemi.c linux-2.6.9-rc1-natsemi-napi/drivers/net/natsemi.c
--- linux-2.6.9-rc1-plain/drivers/net/natsemi.c 2004-08-31 20:24:39.000000000 +0200
+++ linux-2.6.9-rc1-natsemi-napi/drivers/net/natsemi.c 2004-09-18 14:09:59.000000000 +0200
@@ -3,6 +3,7 @@
Written/copyright 1999-2001 by Donald Becker.
Portions copyright (c) 2001,2002 Sun Microsystems (thockin@sun.com)
Portions copyright 2001,2002 Manfred Spraul (manfred@colorfullife.com)
+ Portions copyright 2004 Harald Welte <laforge@gnumonks.org>
This software may be used and distributed according to the terms of
the GNU General Public License (GPL), incorporated herein by reference.
@@ -133,10 +134,12 @@
* comments update (Manfred)
* do the right thing on a phy-reset (Manfred and Tim)
+ version 1.0.18:
+ * Make use of NAPI (Harald Welte)
+
TODO:
* big endian support with CFG:BEM instead of cpu_to_le32
* support for an external PHY
- * NAPI
*/
#include <linux/config.h>
@@ -166,8 +169,8 @@
#include <asm/uaccess.h>
#define DRV_NAME "natsemi"
-#define DRV_VERSION "1.07+LK1.0.17"
-#define DRV_RELDATE "Sep 27, 2002"
+#define DRV_VERSION "1.07+LK1.0.18"
+#define DRV_RELDATE "Sep 18, 2004"
#define RX_OFFSET 2
@@ -183,8 +186,6 @@
NETIF_MSG_TX_ERR)
static int debug = -1;
-/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
-static int max_interrupt_work = 20;
static int mtu;
/* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
@@ -251,14 +252,11 @@
MODULE_DESCRIPTION("National Semiconductor DP8381x series PCI Ethernet driver");
MODULE_LICENSE("GPL");
-MODULE_PARM(max_interrupt_work, "i");
MODULE_PARM(mtu, "i");
MODULE_PARM(debug, "i");
MODULE_PARM(rx_copybreak, "i");
MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i");
MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
-MODULE_PARM_DESC(max_interrupt_work,
- "DP8381x maximum events handled per interrupt");
MODULE_PARM_DESC(mtu, "DP8381x MTU (all boards)");
MODULE_PARM_DESC(debug, "DP8381x default debug level");
MODULE_PARM_DESC(rx_copybreak,
@@ -416,8 +414,12 @@
StatsCtrl = 0x5C,
StatsData = 0x60,
RxPktErrs = 0x60,
- RxMissed = 0x68,
RxCRCErrs = 0x64,
+ RxMissed = 0x68,
+ RxFAErrors = 0x6C,
+ RxSymbolErrors = 0x70,
+ RxFrameTooLong = 0x74,
+ TxSQEErrors = 0x78,
BasicControl = 0x80,
BasicStatus = 0x84,
AnegAdv = 0x90,
@@ -769,6 +771,20 @@
static int netdev_get_regs(struct net_device *dev, u8 *buf);
static int netdev_get_eeprom(struct net_device *dev, u8 *buf);
+static inline void natsemi_irq_enable(struct netdev_private *np)
+{
+ /* Enable interrupts by setting the interrupt mask. */
+ writel(DEFAULT_INTR, np->base_addr + IntrMask);
+ writel(1, np->base_addr + IntrEnable);
+ mb();
+}
+
+static inline void natsemi_irq_disable(struct netdev_private *np)
+{
+ writel(0, np->base_addr + IntrEnable);
+ mb();
+}
+
static void move_int_phy(struct net_device *dev, int addr)
{
struct netdev_private *np = netdev_priv(dev);
@@ -923,6 +939,11 @@
dev->do_ioctl = &netdev_ioctl;
dev->tx_timeout = &tx_timeout;
dev->watchdog_timeo = TX_TIMEOUT;
+#ifdef CONFIG_NATSEMI_NAPI
+ dev->poll = natsemi_clean;
+ dev->weight = 64;
+#endif
+
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = &natsemi_poll_controller;
#endif
@@ -2135,6 +2156,62 @@
}
}
+/* the second half of the interrupt handler, used for NAPI and non-NAPI path */
+#ifdef CONFIG_NATSEMI_NAPI
+static int intr_handler2(struct net_device *dev, int *work_done, int work_to_do)
+#else
+static int intr_handler2(struct net_device *dev)
+#endif
+{
+ struct netdev_private *np = netdev_priv(dev);
+ long ioaddr = dev->base_addr;
+ int boguscnt = max_interrupt_work;
+
+ /* Reading automatically acknowledges all int sources. */
+ u32 intr_status = readl(ioaddr + IntrStatus);
+
+ if (np->hands_off)
+ return 0;
+
+ if (intr_status == 0)
+ return 0;
+
+ if (netif_msg_intr(np))
+ printk(KERN_DEBUG
+ "%s: Interrupt, status %#08x, mask %#08x.\n",
+ dev->name, intr_status,
+ readl(ioaddr + IntrMask));
+
+ /* Abnormal error summary/uncommon events handlers. */
+ if (intr_status & IntrAbnormalSummary)
+ netdev_error(dev, intr_status);
+
+ if (intr_status &
+ (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) {
+ spin_lock(&np->lock);
+ netdev_tx_done(dev);
+ spin_unlock(&np->lock);
+ }
+
+ if (intr_status &
+ (IntrRxDone | IntrRxIntr | RxStatusFIFOOver |
+ IntrRxErr | IntrRxOverrun)) {
+#ifdef CONFIG_NATSEMI_NAPI
+ netdev_rx(dev, work_done, work_to_do);
+#else
+ netdev_rx(dev);
+#endif
+ }
+ /* FIXME: if quota not yet fulfilled, read status and restart
+ * from top if non-null */
+
+ if (netif_msg_intr(np))
+ printk(KERN_DEBUG "%s: exiting interrupt.\n", dev->name);
+
+ return 1;
+}
+
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static irqreturn_t intr_handler(int irq, void *dev_instance, struct pt_regs *rgs)
@@ -2143,60 +2220,55 @@
struct netdev_private *np = netdev_priv(dev);
long ioaddr = dev->base_addr;
int boguscnt = max_interrupt_work;
- unsigned int handled = 0;
- if (np->hands_off)
+#ifdef CONFIG_NATSEMI_NAPI
+ if (n->hands_off)
return IRQ_NONE;
- do {
- /* Reading automatically acknowledges all int sources. */
- u32 intr_status = readl(ioaddr + IntrStatus);
- if (netif_msg_intr(np))
- printk(KERN_DEBUG
- "%s: Interrupt, status %#08x, mask %#08x.\n",
- dev->name, intr_status,
- readl(ioaddr + IntrMask));
-
- if (intr_status == 0)
- break;
- handled = 1;
-
- if (intr_status &
- (IntrRxDone | IntrRxIntr | RxStatusFIFOOver |
- IntrRxErr | IntrRxOverrun)) {
- netdev_rx(dev);
- }
+ /* We cannot read IntrStatus since this would acknowledge
+ * all interrupt sources. Thus we just blindly assume that
+ * the interrupt really was for us -HW! */
+
+ if (netif_schedule_prep(dev)) {
+ /* Disable interrupts and register for poll */
+ natsemi_irq_disable(np);
+ __netif_rx_schedule(dev);
+ }
+ /* FIXME: IRQ_NONE since we're not sure whether it was for us ? */
+ return IRQ_HANDLED;
+#else
+ return IRQ_RETVAL(intr_handler2(dev));
+#endif
+}
- if (intr_status &
- (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) {
- spin_lock(&np->lock);
- netdev_tx_done(dev);
- spin_unlock(&np->lock);
- }
+#ifdef CONFIG_NATSEMI_NAPI
+static int natsemi_clean(struct net_device *dev, int *budget)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int work_to_do = min(*budget, dev->quota);
+ int work_done = 0;
+
+ intr_handler2(dev, &work_done, work_to_do);
- /* Abnormal error summary/uncommon events handlers. */
- if (intr_status & IntrAbnormalSummary)
- netdev_error(dev, intr_status);
-
- if (--boguscnt < 0) {
- if (netif_msg_intr(np))
- printk(KERN_WARNING
- "%s: Too much work at interrupt, "
- "status=%#08x.\n",
- dev->name, intr_status);
- break;
- }
- } while (1);
+ *budget -= work_done;
+ dev->quota -= work_done;
- if (netif_msg_intr(np))
- printk(KERN_DEBUG "%s: exiting interrupt.\n", dev->name);
+ if (work_done < work_to_do || !netif_running(dev)) {
+ netif_rx_complete(dev);
+ natsemi_irq_enable(np);
+ }
- return IRQ_RETVAL(handled);
+ return (work_done >= work_to_do);
}
+#endif
/* This routine is logically part of the interrupt handler, but separated
for clarity and better register allocation. */
+#ifdef CONFIG_NATSEMI_NAPI
+static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
+#else
static void netdev_rx(struct net_device *dev)
+#endif
{
struct netdev_private *np = netdev_priv(dev);
int entry = np->cur_rx % RX_RING_SIZE;
@@ -2213,6 +2285,14 @@
entry, desc_status);
if (--boguscnt < 0)
break;
+
+#ifdef CONFIG_NATSEMI_NAPI
+ if (*work_done >= work_to_do)
+ break;
+
+ (*work_done)++;
+#endif
+
pkt_len = (desc_status & DescSizeMask) - 4;
if ((desc_status&(DescMore|DescPktOK|DescRxLong)) != DescPktOK){
if (desc_status & DescMore) {
@@ -2269,7 +2349,11 @@
np->rx_skbuff[entry] = NULL;
}
skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_NATSEMI_NAPI
+ netif_receive_skb(skb);
+#else
netif_rx(skb);
+#endif
dev->last_rx = jiffies;
np->stats.rx_packets++;
np->stats.rx_bytes += pkt_len;
@@ -2355,6 +2439,7 @@
/* The chip only need report frame silently dropped. */
np->stats.rx_crc_errors += readl(ioaddr + RxCRCErrs);
np->stats.rx_missed_errors += readl(ioaddr + RxMissed);
+ np->stats.tx_heartbeat_errors += readl(ioaddr + TxSQEErrors);
}
static struct net_device_stats *get_stats(struct net_device *dev)
--
- Harald Welte <laforge@gnumonks.org> http://www.gnumonks.org/
============================================================================
Programming is like sex: One mistake and you have to support it your lifetime
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
2004-09-20 14:10 [PATCH 2.6] natsemi.c NAPI Harald Welte
@ 2004-09-20 18:57 ` David S. Miller
2004-09-20 19:07 ` Jeff Garzik
0 siblings, 1 reply; 8+ messages in thread
From: David S. Miller @ 2004-09-20 18:57 UTC (permalink / raw)
To: Harald Welte; +Cc: netdev
On Mon, 20 Sep 2004 16:10:30 +0200
Harald Welte <laforge@gnumonks.org> wrote:
> +static inline void natsemi_irq_enable(struct netdev_private *np)
> +{
> + /* Enable interrupts by setting the interrupt mask. */
> + writel(DEFAULT_INTR, np->base_addr + IntrMask);
> + writel(1, np->base_addr + IntrEnable);
> + mb();
> +}
> +
> +static inline void natsemi_irq_disable(struct netdev_private *np)
> +{
> + writel(0, np->base_addr + IntrEnable);
> + mb();
> +}
Kill the mb(), try using:
readl(np->base_addr + IntrEnable);
in it's place.
This driver (before the NAPI patch) is a bit buggy, it does
no RX processing SMP locking. So if one one cpu you're doing
RX interrupt processing, and on another cpu tx_timeout() runs
we're totally screwed.
This is not caused by Harald's patch, but there are implications
for his NAPI patch once it is fixed.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
2004-09-20 18:57 ` David S. Miller
@ 2004-09-20 19:07 ` Jeff Garzik
2004-09-20 19:12 ` David S. Miller
0 siblings, 1 reply; 8+ messages in thread
From: Jeff Garzik @ 2004-09-20 19:07 UTC (permalink / raw)
To: David S. Miller; +Cc: Harald Welte, netdev
On Mon, Sep 20, 2004 at 11:57:46AM -0700, David S. Miller wrote:
> This driver (before the NAPI patch) is a bit buggy, it does
> no RX processing SMP locking. So if one one cpu you're doing
> RX interrupt processing, and on another cpu tx_timeout() runs
> we're totally screwed.
RX processing in current natsemi only happens in irq, and the natsemi
driver properly disables irqs when it needs to, AFAICS, using
enable/disable_irq().
Jeff
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
2004-09-20 19:07 ` Jeff Garzik
@ 2004-09-20 19:12 ` David S. Miller
0 siblings, 0 replies; 8+ messages in thread
From: David S. Miller @ 2004-09-20 19:12 UTC (permalink / raw)
To: Jeff Garzik; +Cc: laforge, netdev
On Mon, 20 Sep 2004 15:07:46 -0400
Jeff Garzik <jgarzik@pobox.com> wrote:
> On Mon, Sep 20, 2004 at 11:57:46AM -0700, David S. Miller wrote:
> > This driver (before the NAPI patch) is a bit buggy, it does
> > no RX processing SMP locking. So if one one cpu you're doing
> > RX interrupt processing, and on another cpu tx_timeout() runs
> > we're totally screwed.
>
> RX processing in current natsemi only happens in irq, and the natsemi
> driver properly disables irqs when it needs to, AFAICS, using
> enable/disable_irq().
Aha, ok that works.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
[not found] ` <4155D781.8050700@colorfullife.com>
@ 2004-09-27 9:11 ` Harald Welte
2004-09-27 10:30 ` Robert Olsson
[not found] ` <20040927081411.GC3236@sunbeam.de.gnumonks.org>
1 sibling, 1 reply; 8+ messages in thread
From: Harald Welte @ 2004-09-27 9:11 UTC (permalink / raw)
To: netdev
[-- Attachment #1: Type: text/plain, Size: 2781 bytes --]
[resend, went to wrong list address]
Manfred, thanks for your comments.
[explicit Cc to jamal becuase of NAPI-related stuff below]
On Sat, Sep 25, 2004 at 10:39:29PM +0200, Manfred Spraul wrote:
> >+config NATSEMI_NAPI
> >+ bool "Use Rx Polling (NAPI) (EXPERIMENTAL)"
> >
> I'm not a big fan on config options for drivers. I'd prefer a runtime
> option.
I see.
> > TODO:
> > * big endian support with CFG:BEM instead of cpu_to_le32
> > * support for an external PHY
> >- * NAPI
> >
> >
> Hmm. Actually all TODO points are done: CFG:BEM is unusable because it
> swaps data words.
> External PHYs are supported. You are closing the last point.
Ok, I'll remove all three with my next patch
> Additional registers. You didn't mention that in the changelog.
> Changelogs should mention every change, please do not piggypack
> unrelated changes.
ack.
> >+ long ioaddr = dev->base_addr;
> >+ int boguscnt = max_interrupt_work;
> >+
> This doesn't compile: you've removed max_interrupt_work.
mh, apparently I've sent an outdated patch then. I'll repost an updated
version once I know what the solution for IntrStatus is
> >+ /* We cannot read IntrStatus since this would acknowledge
> >+ * all interrupt sources. Thus we just blindly assume that
> >+ * the interrupt really was for us -HW! */
> Huge problem - this is not acceptable. It means the driver is unusable
> for shared interrupts. We must find a solution.
yes, I know :( Luckily on my embedded boards, there are no shared
interrupts - but this is obviously a special case.
I think this overall problem can be solved if there was some per-device
variable that saves the IntrStatus until the NAPI callback gets
scheduled. What do you think? This wouldn't even need some locking,
since interrupts would be disabled before the field is updated, and not
re-enabled before the field is read by the NAPI callback?
I was surprised that this solution is not suggested in the NAPI-HOWTO.txt, so I though there must be an error in my proposal...
By using such a scheme, isn't it also possible to only offload RX into
the NAPI callback with clear-on-read devices?
> IRQ_NONE would be definitively wrong: The kernel disables interrupt
> sources if it can't find a handler. If our handler returns IRQ_NONE and
> the irq is not shared, then the kernel will disable the irq after a
> short while.
ok.
> Also missing in the changelog.
is related to the register definitions above.
> Manfred
--
- Harald Welte <laforge@gnumonks.org> http://www.gnumonks.org/
============================================================================
Programming is like sex: One mistake and you have to support it your lifetime
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
2004-09-27 9:11 ` Harald Welte
@ 2004-09-27 10:30 ` Robert Olsson
2004-09-27 17:30 ` Eric Lemoine
0 siblings, 1 reply; 8+ messages in thread
From: Robert Olsson @ 2004-09-27 10:30 UTC (permalink / raw)
To: Harald Welte; +Cc: netdev
> I think this overall problem can be solved if there was some per-device
> variable that saves the IntrStatus until the NAPI callback gets
> scheduled. What do you think? This wouldn't even need some locking,
> since interrupts would be disabled before the field is updated, and not
> re-enabled before the field is read by the NAPI callback?
>
> I was surprised that this solution is not suggested in the NAPI-HOWTO.txt, so I though there must be an error in my proposal...
>
> By using such a scheme, isn't it also possible to only offload RX into
> the NAPI callback with clear-on-read devices?
>
e1000 used such technique before.If a remember correctly IntrStatus was
saved in device priv struct.
Cheers.
--ro
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
2004-09-27 10:30 ` Robert Olsson
@ 2004-09-27 17:30 ` Eric Lemoine
0 siblings, 0 replies; 8+ messages in thread
From: Eric Lemoine @ 2004-09-27 17:30 UTC (permalink / raw)
To: Robert Olsson; +Cc: Harald Welte, netdev
> > I think this overall problem can be solved if there was some per-device
> > variable that saves the IntrStatus until the NAPI callback gets
> > scheduled. What do you think? This wouldn't even need some locking,
> > since interrupts would be disabled before the field is updated, and not
> > re-enabled before the field is read by the NAPI callback?
> >
> > I was surprised that this solution is not suggested in the NAPI-HOWTO.txt, so I though there must be an error in my proposal...
> >
> > By using such a scheme, isn't it also possible to only offload RX into
> > the NAPI callback with clear-on-read devices?
> >
>
> e1000 used such technique before.If a remember correctly IntrStatus was
> saved in device priv struct.
That's also how it is done in current sungem.
--
Eric
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2.6] natsemi.c NAPI
[not found] ` <4158E74C.7030709@colorfullife.com>
@ 2004-09-28 10:39 ` jamal
0 siblings, 0 replies; 8+ messages in thread
From: jamal @ 2004-09-28 10:39 UTC (permalink / raw)
To: Manfred Spraul; +Cc: Harald Welte, netdev
On Tue, 2004-09-28 at 00:23, Manfred Spraul wrote:
> jamal wrote:
[..]
> >Does this mean that the interupt clears when read?
> >Why would reading a NIC register clear a soundcard interupt?
> >
> >
> >
> No, of course not. But each soundcard interrupt would cause a
> netif_rx_schedule, with the following callback from tasklet context.
> IMHO the overhead of that operation is too large. Additionally it breaks
> the stuck irq detection.
Ok, In such a case it is advised that a e1000 style NAPI is used
(as opposed to the tulip varinat).
Sorry, I should actually read the code first - I ma hoping i am in
context here.
cheers,
jamal
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2004-09-28 10:39 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-09-20 14:10 [PATCH 2.6] natsemi.c NAPI Harald Welte
2004-09-20 18:57 ` David S. Miller
2004-09-20 19:07 ` Jeff Garzik
2004-09-20 19:12 ` David S. Miller
[not found] <20040919163642.GC1307@sunbeam.de.gnumonks.org>
[not found] ` <4155D781.8050700@colorfullife.com>
2004-09-27 9:11 ` Harald Welte
2004-09-27 10:30 ` Robert Olsson
2004-09-27 17:30 ` Eric Lemoine
[not found] ` <20040927081411.GC3236@sunbeam.de.gnumonks.org>
[not found] ` <1096343829.8660.107.camel@jzny.localdomain>
[not found] ` <4158E74C.7030709@colorfullife.com>
2004-09-28 10:39 ` jamal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).