netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-03-21 14:46 Jeff Garzik
  2003-03-22 15:34 ` Robert Olsson
  0 siblings, 1 reply; 20+ messages in thread
From: Jeff Garzik @ 2003-03-21 14:46 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 105 bytes --]

For review by the list... The other e100/e1000 changes are in the 2.5 
nightly snapshot on kernel.org...

[-- Attachment #2: [E1000] NAPI re-insertion w/ changes --]
[-- Type: message/rfc822, Size: 6734 bytes --]

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
To: bk-commits-head@vger.kernel.org
Subject: [E1000] NAPI re-insertion w/ changes
Date: Fri, 21 Mar 2003 04:46:10 +0000
Message-ID: <200303210713.h2L7Dn802239@hera.kernel.org>

ChangeSet 1.1101.22.17, 2003/03/20 23:46:10-05:00, cramerj@intel.com

	[E1000] NAPI re-insertion w/ changes
	
	* Previous patch wiped NAPI support, adding it back here.  But,
	  with a twist: this one doesn't disable/enable interrupts each
	  time we enter/leave polling.  (It's EXPERIMENTAL).


# This patch includes the following deltas:
#	           ChangeSet	1.1101.22.16 -> 1.1101.22.17
#	drivers/net/e1000/e1000_main.c	1.60    -> 1.61   
#

 e1000_main.c |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 69 insertions(+), 1 deletion(-)


diff -Nru a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
--- a/drivers/net/e1000/e1000_main.c	Thu Mar 20 23:13:52 2003
+++ b/drivers/net/e1000/e1000_main.c	Thu Mar 20 23:13:52 2003
@@ -155,8 +155,14 @@
 static inline void e1000_irq_disable(struct e1000_adapter *adapter);
 static inline void e1000_irq_enable(struct e1000_adapter *adapter);
 static void e1000_intr(int irq, void *data, struct pt_regs *regs);
-static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter);
+#ifdef CONFIG_E1000_NAPI
+static int e1000_clean(struct net_device *netdev, int *budget);
+static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
+                                    int *work_done, int work_to_do);
+#else
 static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter);
+#endif
+static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter);
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
@@ -418,6 +424,10 @@
 	netdev->do_ioctl = &e1000_ioctl;
 	netdev->tx_timeout = &e1000_tx_timeout;
 	netdev->watchdog_timeo = 5 * HZ;
+#ifdef CONFIG_E1000_NAPI
+	netdev->poll = &e1000_clean;
+	netdev->weight = 64;
+#endif
 	netdev->vlan_rx_register = e1000_vlan_rx_register;
 	netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
 	netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid;
@@ -1977,7 +1987,9 @@
 	struct net_device *netdev = data;
 	struct e1000_adapter *adapter = netdev->priv;
 	uint32_t icr = E1000_READ_REG(&adapter->hw, ICR);
+#ifndef CONFIG_E1000_NAPI
 	int i;
+#endif
 
 	if(!icr)
 		return;  /* Not our interrupt */
@@ -1987,12 +1999,46 @@
 		mod_timer(&adapter->watchdog_timer, jiffies);
 	}
 
+#ifdef CONFIG_E1000_NAPI
+	/* Don't disable interrupts - rely on h/w interrupt
+	 * moderation to keep interrupts low.  netif_rx_schedule
+	 * is NOP if already polling. */
+	netif_rx_schedule(netdev);
+#else
 	for(i = 0; i < E1000_MAX_INTR; i++)
 		if(!e1000_clean_rx_irq(adapter) &&
 		   !e1000_clean_tx_irq(adapter))
 			break;
+#endif
+}
 
+#ifdef CONFIG_E1000_NAPI
+/**
+ * e1000_clean - NAPI Rx polling callback
+ * @adapter: board private structure
+ **/
+
+static int
+e1000_clean(struct net_device *netdev, int *budget)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	int work_to_do = min(*budget, netdev->quota);
+	int work_done = 0;
+	
+	while(work_done < work_to_do)
+		if(!e1000_clean_rx_irq(adapter, &work_done, work_to_do) &&
+		   !e1000_clean_tx_irq(adapter))
+			break;
+
+	*budget -= work_done;
+	netdev->quota -= work_done;
+	
+	if(work_done < work_to_do)
+		netif_rx_complete(netdev);
+
+	return (work_done >= work_to_do);
 }
+#endif
 
 /**
  * e1000_clean_tx_irq - Reclaim resources after transmit completes
@@ -2054,7 +2100,12 @@
  **/
 
 static boolean_t
+#ifdef CONFIG_E1000_NAPI
+e1000_clean_rx_irq(struct e1000_adapter *adapter, int *work_done,
+                   int work_to_do)
+#else
 e1000_clean_rx_irq(struct e1000_adapter *adapter)
+#endif
 {
 	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct net_device *netdev = adapter->netdev;
@@ -2071,6 +2122,13 @@
 
 	while(rx_desc->status & E1000_RXD_STAT_DD) {
 
+#ifdef CONFIG_E1000_NAPI
+		if(*work_done >= work_to_do)
+			break;
+
+		(*work_done)++;
+#endif
+
 		cleaned = TRUE;
 
 		pci_unmap_single(pdev,
@@ -2133,12 +2191,22 @@
 		e1000_rx_checksum(adapter, rx_desc, skb);
 
 		skb->protocol = eth_type_trans(skb, netdev);
+#ifdef CONFIG_E1000_NAPI
+		if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
+			vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
+				(rx_desc->special & E1000_RXD_SPC_VLAN_MASK));
+		} else {
+			netif_receive_skb(skb);
+		}
+#else /* CONFIG_E1000_NAPI */
 		if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
 			vlan_hwaccel_rx(skb, adapter->vlgrp,
 				(rx_desc->special & E1000_RXD_SPC_VLAN_MASK));
 		} else {
 			netif_rx(skb);
 		}
+#endif /* CONFIG_E1000_NAPI */
+
 		netdev->last_rx = jiffies;
 
 		rx_desc->status = 0;
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-21 14:46 [Fwd: [E1000] NAPI re-insertion w/ changes] Jeff Garzik
@ 2003-03-22 15:34 ` Robert Olsson
  0 siblings, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-03-22 15:34 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: netdev



Jeff Garzik writes:
 > For review by the list... The other e100/e1000 changes are in the 2.5 

 > +#ifdef CONFIG_E1000_NAPI
 > +	/* Don't disable interrupts - rely on h/w interrupt
 > +	 * moderation to keep interrupts low.  netif_rx_schedule
 > +	 * is NOP if already polling. */
 > +	netif_rx_schedule(netdev);
 > +#else


It's clean but I have some concerns...

I think this will add interrupts when resources are fully utilized.
In other words a decrease in top performance. I say "think" because I have 
no numbers. 

At GIGE rate we have ~1 k interrupts/sec using interrupt delay. (it depend 
of ring sizes etc). We are now seeing Linux boxes with ~10 GIGE interfaces. 
So any effects gets multiplied.

It makes the use zero latency RX complicated. We see Ethernet getting
used for "new" applications as SCSI, filesystems etc. In current e1000 
we just can set the desired interrupt delay and relax.

If/when PCI-X uses message signalled interrupts (MSI) we have this "un-
necessary" load over PCI too with bus arbitrations etc.


IMO believe your old plan having e1000 irq disable and with mitigation as 
default feels better but testing is needed.

Cheers.

						--ro

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-03-22 18:47 Feldman, Scott
  2003-03-22 20:28 ` Robert Olsson
  2003-03-31 17:14 ` Robert Olsson
  0 siblings, 2 replies; 20+ messages in thread
From: Feldman, Scott @ 2003-03-22 18:47 UTC (permalink / raw)
  To: Robert Olsson, Jeff Garzik; +Cc: netdev

> It's clean but I have some concerns...

Thanks for the feedback.  It's a twist on the previous driver where we
disabled/enabled interrupts each time we went in/out of polling.  Trying
to avoid those extra PCI writes.  My experience is that you have to
really load up the interface to stay in polling mode (get up on step).

> I think this will add interrupts when resources are fully 
> utilized. In other words a decrease in top performance. I say 
> "think" because I have 
> no numbers. 
> 
> At GIGE rate we have ~1 k interrupts/sec using interrupt 
> delay. (it depend 
> of ring sizes etc). We are now seeing Linux boxes with ~10 
> GIGE interfaces. 
> So any effects gets multiplied.

Should be the same interrupt rate with or without NAPI.
 
> It makes the use zero latency RX complicated. We see Ethernet 
> getting used for "new" applications as SCSI, filesystems etc. 
> In current e1000 
> we just can set the desired interrupt delay and relax.
> 
> If/when PCI-X uses message signalled interrupts (MSI) we have 
> this "un- necessary" load over PCI too with bus arbitrations etc.
> 
> 
> IMO believe your old plan having e1000 irq disable and with 
> mitigation as 
> default feels better but testing is needed.

Easy enough to revert back.  I don't think we've lost any of the
non-perf benefits of NAPI, and if testing shows no meaningful perf
difference, let's let Occam's razor rule.

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-22 18:47 Feldman, Scott
@ 2003-03-22 20:28 ` Robert Olsson
  2003-03-31 17:14 ` Robert Olsson
  1 sibling, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-03-22 20:28 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jeff Garzik, netdev


Feldman, Scott writes:
 > > It's clean but I have some concerns...
 > 
 > Thanks for the feedback.  It's a twist on the previous driver where we
 > disabled/enabled interrupts each time we went in/out of polling.  Trying
 > to avoid those extra PCI writes.  My experience is that you have to
 > really load up the interface to stay in polling mode (get up on step).

 True. Making interrupt delay larger will collect more packets on RX-ring
 and have the two PCI-writes to disable/enables irq to be shared by many 
 packets.

 > Should be the same interrupt rate with or without NAPI.
 
 When NAPI stays in polling there are no interrupts and no extra PCI-writes
 so the high-load situation is optimized. 

 So I fear that interrupts are now added to the high-load situation and this
 will impact top performance -- especially with many NIC's.

 But lets see what comes out from testing.
 
 Cheers.
						--ro

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-03-27 16:54 Robert Olsson
  2003-03-27 17:10 ` Jason Lunz
  0 siblings, 1 reply; 20+ messages in thread
From: Robert Olsson @ 2003-03-27 16:54 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jeff Garzik, netdev


Feldman, Scott writes:

 > Easy enough to revert back.  I don't think we've lost any of the
 > non-perf benefits of NAPI, and if testing shows no meaningful perf
 > difference, let's let Occam's razor rule.

Hello!

Here is a hi-perf routing/DoS to start with...

10 Million pkts injected at high speed into eth2 and forwarded to eth3. Rx and 
Tx buffers are 256 and HW_FLOW is disabled and RxIntDelay=1. Which is same 
parameters as we use for production systems. As seen link now flaps. 
Eventually can hw_flowcontrol and interrupt delays help this... but thats not 
an option at least not for us. 


Twist:        New                        Old
             ====================================
Input rate:    680 (due to link drop)    820 kpps
T-put:         309                       385 kpps 
RX irq's:    78963                       434


Cheers.
						--ro


e1000 2.5.66 NAPI (680 kpps in)
-------------------------------
NETDEV WATCHDOG: eth2: transmit timed out
e1000: eth2 NIC Link is Up 1000 Mbps Full Duplex

Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth2   1500   0 4175392 7277442 7277442 4696985     18      0      0      0 BRU
eth3   1500   0      1      0      0      0 4555009      0      0      0 BRU

 26:      78963   IO-APIC-level  eth2
 27:      92979   IO-APIC-level  eth3


e1000 2.5.66 NAPI with patch (820 kpps in)
------------------------------------------
Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth2   1500   0 4708196 8173469 8173469 5291805     18      0      0      0 BRU
eth3   1500   0      1      0      0      0 4708135      0      0      0 BRU

 26:        434   IO-APIC-level  eth2
 27:      74584   IO-APIC-level  eth3


--- e1000_main.c.orig   2003-03-27 14:38:02.000000000 +0100
+++ e1000_main.c        2003-03-27 16:43:38.000000000 +0100
@@ -2000,9 +2000,12 @@
        }
 
 #ifdef CONFIG_E1000_NAPI
-       /* Don't disable interrupts - rely on h/w interrupt
-        * moderation to keep interrupts low.  netif_rx_schedule
-        * is NOP if already polling. */
+       /* Disable interrupts and register for poll. The flush 
+          of the posted write is intentionally left out.
+       */
+
+       atomic_inc(&adapter->irq_sem);
+       E1000_WRITE_REG(&adapter->hw, IMC, ~0);
        netif_rx_schedule(netdev);
 #else
        for(i = 0; i < E1000_MAX_INTR; i++)
@@ -2024,18 +2027,17 @@
        struct e1000_adapter *adapter = netdev->priv;
        int work_to_do = min(*budget, netdev->quota);
        int work_done = 0;
-
-       while(work_done < work_to_do)
-               if(!e1000_clean_rx_irq(adapter, &work_done, work_to_do) &&
-                  !e1000_clean_tx_irq(adapter))
-                       break;
+
+       e1000_clean_tx_irq(adapter);
+       e1000_clean_rx_irq(adapter, &work_done, work_to_do);
 
        *budget -= work_done;
        netdev->quota -= work_done;
 
-       if(work_done < work_to_do)
+       if(work_done < work_to_do) {
                netif_rx_complete(netdev);
-
+               e1000_irq_enable(adapter);
+       }
        return (work_done >= work_to_do);
 }
 #endif

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-27 16:54 Robert Olsson
@ 2003-03-27 17:10 ` Jason Lunz
  2003-03-28  8:27   ` Robert Olsson
  0 siblings, 1 reply; 20+ messages in thread
From: Jason Lunz @ 2003-03-27 17:10 UTC (permalink / raw)
  To: netdev

Robert.Olsson@data.slu.se said:
> 10 Million pkts injected at high speed into eth2 and forwarded to eth3. Rx and 
> Tx buffers are 256 and HW_FLOW is disabled and RxIntDelay=1. Which is same 
> parameters as we use for production systems. As seen link now flaps. 
> Eventually can hw_flowcontrol and interrupt delays help this... but thats not 
> an option at least not for us. 
> 
> 
> Twist:        New                        Old
>              ====================================
> Input rate:    680 (due to link drop)    820 kpps
> T-put:         309                       385 kpps 
> RX irq's:    78963                       434

I've seen pretty much the same thing. I plotted throughput vs. offered
load for e1000 4.4.12-k1, 4.4.19-k3, and 5.0.43-k1 (all backported to
2.4.20). A summary with graphs is at:

http://gtf.org/lunz/linux/net/perf/

5.0.43 seems to be a significant regression, both in terms of throughput
and CPU load.

-- 
Jason Lunz			Reflex Security
lunz@reflexsecurity.com		http://www.reflexsecurity.com/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-27 17:10 ` Jason Lunz
@ 2003-03-28  8:27   ` Robert Olsson
  2003-03-28 17:32     ` Jason Lunz
  0 siblings, 1 reply; 20+ messages in thread
From: Robert Olsson @ 2003-03-28  8:27 UTC (permalink / raw)
  To: Jason Lunz; +Cc: netdev


Jason Lunz writes:

 > I've seen pretty much the same thing. I plotted throughput vs. offered
 > load for e1000 4.4.12-k1, 4.4.19-k3, and 5.0.43-k1 (all backported to
 > 2.4.20). A summary with graphs is at:
 > 
 > http://gtf.org/lunz/linux/net/perf/
 
 Illustrative.

 > 5.0.43 seems to be a significant regression, both in terms of throughput
 > and CPU load.
 
 Can you test the patch I sent for 5.0.43 with your equipment/setup?
 

 Cheers.
						--ro

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-28  8:27   ` Robert Olsson
@ 2003-03-28 17:32     ` Jason Lunz
  2003-03-28 18:43       ` Robert Olsson
  0 siblings, 1 reply; 20+ messages in thread
From: Jason Lunz @ 2003-03-28 17:32 UTC (permalink / raw)
  To: netdev

Robert.Olsson@data.slu.se said:
>  Can you test the patch I sent for 5.0.43 with your equipment/setup?

We have a winner! I called the driver with your patch 5.0.43-k1-ro1. I
also removed all the cyclesoak lines; all they really show is that
throughput is really sensitive to how much CPU time ksoftirqd gets.

http://gtf.org/lunz/linux/net/perf/ is still the url.

Jason

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-28 17:32     ` Jason Lunz
@ 2003-03-28 18:43       ` Robert Olsson
  0 siblings, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-03-28 18:43 UTC (permalink / raw)
  To: Jason Lunz; +Cc: netdev


Jason Lunz writes:
 > Robert.Olsson@data.slu.se said:
 > >  Can you test the patch I sent for 5.0.43 with your equipment/setup?
 > 
 > We have a winner! I called the driver with your patch 5.0.43-k1-ro1. I
 > also removed all the cyclesoak lines; all they really show is that
 > throughput is really sensitive to how much CPU time ksoftirqd gets.
 > 
 > http://gtf.org/lunz/linux/net/perf/ is still the url.

 Excellent!

 We have a flat performance curve at ~350 kpps at any load and packet size 
 again. Of course this can be improved further.

 I think Intel still did the hard work but left the "fine" tuning and testing 
 for us. :-) 

 I guess we propose the patch to maintainers.

 Cheers
						--ro
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-03-22 18:47 Feldman, Scott
  2003-03-22 20:28 ` Robert Olsson
@ 2003-03-31 17:14 ` Robert Olsson
  1 sibling, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-03-31 17:14 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jeff Garzik, netdev



Hello!

A better approximation I think but probably not the last...

Cheers.
						--ro



--- linux/drivers/net/e1000/e1000_main.c.orig   2003-03-27 14:38:02.000000000 +0100
+++ linux/drivers/net/e1000/e1000_main.c        2003-03-31 17:56:05.000000000 +0200
@@ -1999,11 +1999,17 @@
                mod_timer(&adapter->watchdog_timer, jiffies);
        }
 
-#ifdef CONFIG_E1000_NAPI
-       /* Don't disable interrupts - rely on h/w interrupt
-        * moderation to keep interrupts low.  netif_rx_schedule
-        * is NOP if already polling. */
-       netif_rx_schedule(netdev);
+#ifdef  CONFIG_E1000_NAPI
+       if (netif_rx_schedule_prep(netdev)) {
+
+               /* Disable interrupts and register for poll. The flush 
+                  of the posted write is intentionally left out.
+               */
+
+               atomic_inc(&adapter->irq_sem);
+               E1000_WRITE_REG(&adapter->hw, IMC, ~0);
+               __netif_rx_schedule(netdev);
+       }
 #else
        for(i = 0; i < E1000_MAX_INTR; i++)
                if(!e1000_clean_rx_irq(adapter) &&
@@ -2025,17 +2031,16 @@
        int work_to_do = min(*budget, netdev->quota);
        int work_done = 0;
        
-       while(work_done < work_to_do)
-               if(!e1000_clean_rx_irq(adapter, &work_done, work_to_do) &&
-                  !e1000_clean_tx_irq(adapter))
-                       break;
+       e1000_clean_tx_irq(adapter);
+       e1000_clean_rx_irq(adapter, &work_done, work_to_do);
 
        *budget -= work_done;
        netdev->quota -= work_done;
        
-       if(work_done < work_to_do)
+       if(work_done < work_to_do) {
                netif_rx_complete(netdev);
-
+               e1000_irq_enable(adapter);
+       }
        return (work_done >= work_to_do);
 }
#endif

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-04-01 17:47 Feldman, Scott
  2003-04-01 18:57 ` Robert Olsson
  2003-04-01 21:16 ` Jeff Garzik
  0 siblings, 2 replies; 20+ messages in thread
From: Feldman, Scott @ 2003-04-01 17:47 UTC (permalink / raw)
  To: Robert Olsson, Jason Lunz; +Cc: Jeff Garzik, netdev

> A better approximation I think but probably not the last...

Thanks for you help guys!  I broke NAPI (don't hate me) and then went on
vacation, so I apologize for not responding sooner.  I like what you've
come up with here, so I'll turn the patch around for Jeff to update 2.5
e1000.

> +       e1000_clean_tx_irq(adapter);
> +       e1000_clean_rx_irq(adapter, &work_done, work_to_do);

Just curious, why give priority to Tx over Rx?

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-01 17:47 Feldman, Scott
@ 2003-04-01 18:57 ` Robert Olsson
  2003-04-01 21:16 ` Jeff Garzik
  1 sibling, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-04-01 18:57 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jason Lunz, Jeff Garzik, netdev


Feldman, Scott writes:

 > Thanks for you help guys!  I broke NAPI (don't hate me) and then went on
 > vacation, so I apologize for not responding sooner.  I like what you've
 > come up with here, so I'll turn the patch around for Jeff to update 2.5
 > e1000.
 > 
 > > +       e1000_clean_tx_irq(adapter);
 > > +       e1000_clean_rx_irq(adapter, &work_done, work_to_do);
 > 
 > Just curious, why give priority to Tx over Rx?

 Hello!

 Honestly not too much thinking... well we can always argue by having
 Rx last we get more packets per poll. Yes we should test the other
 way around.

 Also still some concern by having the watchdog kicked in e1000_intr 
 wouldn't e1000_clean feel better? 

 It would also be interesting to see what happens if we remove the flush when 
 enable interrupts in e1000_clean?

 I've tested the patch that removes some of the PCI-accesses. I think I sent
 you some version of it. Anyway it seems to have some effects. If Jason has 
 time we will get some numbers from his instrument/setup.

 Cheers.
						--ro
 
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-04-01 19:13 Feldman, Scott
  2003-04-01 19:23 ` Jason Lunz
  2003-04-01 19:44 ` Robert Olsson
  0 siblings, 2 replies; 20+ messages in thread
From: Feldman, Scott @ 2003-04-01 19:13 UTC (permalink / raw)
  To: Robert Olsson; +Cc: Jason Lunz, Jeff Garzik, netdev

>  Also still some concern by having the watchdog kicked in e1000_intr 
>  wouldn't e1000_clean feel better? 

e1000_intr feels better to me in keeping the clean-up work separate from
managing link status change (hopefully an infrequent event :).  Do you
see any problems?
 
>  It would also be interesting to see what happens if we 
> remove the flush when enable interrupts in e1000_clean?

Ya, that read hurts, which was part of the motivation to get rid of the
disable/enables altogether.  

>  I've tested the patch that removes some of the PCI-accesses. 
> I think I sent  you some version of it. Anyway it seems to 
> have some effects. If Jason has 
>  time we will get some numbers from his instrument/setup.

Ok, let's see.

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-01 19:13 Feldman, Scott
@ 2003-04-01 19:23 ` Jason Lunz
  2003-04-01 22:40   ` Jason Lunz
  2003-04-01 19:44 ` Robert Olsson
  1 sibling, 1 reply; 20+ messages in thread
From: Jason Lunz @ 2003-04-01 19:23 UTC (permalink / raw)
  To: netdev

scott.feldman@intel.com said:
>>  I've tested the patch that removes some of the PCI-accesses.  I
>>  think I sent  you some version of it. Anyway it seems to have some
>>  effects. If Jason has time we will get some numbers from his
>>  instrument/setup.
> 
> Ok, let's see.

It works. Reducing PCI accessess gives a measurable improvement. I'll
post plots later today.

-- 
Jason Lunz			Reflex Security
lunz@reflexsecurity.com		http://www.reflexsecurity.com/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-01 19:13 Feldman, Scott
  2003-04-01 19:23 ` Jason Lunz
@ 2003-04-01 19:44 ` Robert Olsson
  1 sibling, 0 replies; 20+ messages in thread
From: Robert Olsson @ 2003-04-01 19:44 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jason Lunz, Jeff Garzik, netdev


Feldman, Scott writes:
 > >  Also still some concern by having the watchdog kicked in e1000_intr 
 > >  wouldn't e1000_clean feel better? 
 > 
 > e1000_intr feels better to me in keeping the clean-up work separate from
 > managing link status change (hopefully an infrequent event :).  Do you
 > see any problems?

 Well I was afraid that this would not be run at very heavy loads due to 
 interrupt disabling but I forgot:

e1000_watchdog(unsigned long data)
{      
    /* Cause software interrupt to ensure rx ring is cleaned */
    E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0);

 
 Which generates irq from watchdog/timer so it should work and all practial 
 experiments indicates this is OK.

 Cheers.
						--ro

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-01 17:47 Feldman, Scott
  2003-04-01 18:57 ` Robert Olsson
@ 2003-04-01 21:16 ` Jeff Garzik
  1 sibling, 0 replies; 20+ messages in thread
From: Jeff Garzik @ 2003-04-01 21:16 UTC (permalink / raw)
  To: Feldman, Scott; +Cc: Robert Olsson, Jason Lunz, netdev

On Tue, Apr 01, 2003 at 09:47:55AM -0800, Feldman, Scott wrote:
> > A better approximation I think but probably not the last...
> 
> Thanks for you help guys!  I broke NAPI (don't hate me) and then went on
> vacation, so I apologize for not responding sooner.  I like what you've
> come up with here, so I'll turn the patch around for Jeff to update 2.5
> e1000.
> 
> > +       e1000_clean_tx_irq(adapter);
> > +       e1000_clean_rx_irq(adapter, &work_done, work_to_do);
> 
> Just curious, why give priority to Tx over Rx?

TX frees skbs, RX allocates skbs.

You lessen the chance of allocation failure or "additional work" being
performed by the allocator when you do it in this order.

	Jeff

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-04-01 21:22 Feldman, Scott
  0 siblings, 0 replies; 20+ messages in thread
From: Feldman, Scott @ 2003-04-01 21:22 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Robert Olsson, Jason Lunz, netdev

> > Just curious, why give priority to Tx over Rx?
> 
> TX frees skbs, RX allocates skbs.
> 
> You lessen the chance of allocation failure or "additional 
> work" being performed by the allocator when you do it in this order.

At the expense of Rx latency.  I suppose the best would be:

rx_clean
tx_clean
rx_alloc

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-01 19:23 ` Jason Lunz
@ 2003-04-01 22:40   ` Jason Lunz
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Lunz @ 2003-04-01 22:40 UTC (permalink / raw)
  To: netdev

lunz@reflexsecurity.com said:
> It works. Reducing PCI accessess gives a measurable improvement. I'll
> post plots later today.

The graphs are up now. The best driver measured so far is Robert's
change to reinstate irq-disable-on-poll + reduce-pci-traffic, with
XsumRX turned off. I'm currently measuring to see if it still has an
advantage with checksumming turned on.

Scott, your bugfix of removing the while() from e1000_clean() puts the
original 5.0.43-k1 driver from 2.5.66 back in the running. It no longer
suffers from watchdog timeouts, and doesn't degrade under high load.  It
still scores lowest, though.  I've seen that the max-pps-ceiling visible
in these graphs is highly sensitive to CPU load, so I'd guess that the
sf1 test lost solely because of the overhead of still running the irq
handler once in a while.  

-- 
Jason Lunz			Reflex Security
lunz@reflexsecurity.com		http://www.reflexsecurity.com/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [Fwd: [E1000] NAPI re-insertion w/ changes]
@ 2003-04-02  0:13 Feldman, Scott
  2003-04-02  4:19 ` Jason Lunz
  0 siblings, 1 reply; 20+ messages in thread
From: Feldman, Scott @ 2003-04-02  0:13 UTC (permalink / raw)
  To: Jason Lunz, Jeff Garzik, Robert Olsson, netdev

> The graphs are up now. The best driver measured so far is 
> Robert's change to reinstate irq-disable-on-poll + 
> reduce-pci-traffic, with XsumRX turned off. I'm currently 
> measuring to see if it still has an advantage with 
> checksumming turned on.

Very good.  Jeff has the patch for -k2 which should be equivalent to
-ro2.

Robert, please post your -ro3 changes - I can't find them.

BTW, the things that changed from 4.4.19-k3 to 5.0.43-k2 that may
account for the improvements are:

o eliminate the modulus operator for ring index wrapping
(anton@samba.org)
o single read of ICR (Interrupt Cause Register) in e1000_intr
o dynamic ITR (Interrupt Throttle Rate) algorithm for 82545/6

Sum of many second-order improvements add up.  We'll take all we can
find.

> Scott, your bugfix of removing the while() from e1000_clean() 
> puts the original 5.0.43-k1 driver from 2.5.66 back in the 
> running. It no longer suffers from watchdog timeouts, and 
> doesn't degrade under high load.  It still scores lowest, 
> though.  I've seen that the max-pps-ceiling visible in these 
> graphs is highly sensitive to CPU load, so I'd guess that the 
> sf1 test lost solely because of the overhead of still running 
> the irq handler once in a while.  

Ok, that settles that.  Thanks for re-running the tests.

I'm pretty happy with this NAPI code for e1000.  Next step is to get
some more generic QA testing with NAPI and then port back to 2.4.x.

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Fwd: [E1000] NAPI re-insertion w/ changes]
  2003-04-02  0:13 Feldman, Scott
@ 2003-04-02  4:19 ` Jason Lunz
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Lunz @ 2003-04-02  4:19 UTC (permalink / raw)
  To: netdev

scott.feldman@intel.com said:
> Robert, please post your -ro3 changes - I can't find them.

sorry, the -roX and -sfX notations are mine; i'm backporting everything
to 2.4.20 for testing and most of the patches have to be tweaked one way
or another, so I gave them all my own names to help keep them straight.
-ro1 and -ro2 come from Robert's posts here, and -ro3 is what you get
when you apply

ftp://robur.slu.se/pub/Linux/net-development/skb_recycling/e1000_pci_2.pat

on top of that.

> I'm pretty happy with this NAPI code for e1000.  Next step is to get
> some more generic QA testing with NAPI and then port back to 2.4.x.

For what it's worth, I've subjected various e1000 chipsets to just about
every kind of synthetic load all the way up to the maximum theoretical
full-duplex gigE framerate for hours without a hiccup. I'm using
backports from 2.5 with the TSO stuff stripped out. Judging from my
apache logs and the emails I get, quite a few others have been using
those patches on 2.4 too, but I've received no real complaints.

Jason

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2003-04-02  4:19 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-21 14:46 [Fwd: [E1000] NAPI re-insertion w/ changes] Jeff Garzik
2003-03-22 15:34 ` Robert Olsson
  -- strict thread matches above, loose matches on Subject: below --
2003-03-22 18:47 Feldman, Scott
2003-03-22 20:28 ` Robert Olsson
2003-03-31 17:14 ` Robert Olsson
2003-03-27 16:54 Robert Olsson
2003-03-27 17:10 ` Jason Lunz
2003-03-28  8:27   ` Robert Olsson
2003-03-28 17:32     ` Jason Lunz
2003-03-28 18:43       ` Robert Olsson
2003-04-01 17:47 Feldman, Scott
2003-04-01 18:57 ` Robert Olsson
2003-04-01 21:16 ` Jeff Garzik
2003-04-01 19:13 Feldman, Scott
2003-04-01 19:23 ` Jason Lunz
2003-04-01 22:40   ` Jason Lunz
2003-04-01 19:44 ` Robert Olsson
2003-04-01 21:22 Feldman, Scott
2003-04-02  0:13 Feldman, Scott
2003-04-02  4:19 ` Jason Lunz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).