netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit)
@ 2004-12-05 18:25 Manfred Spraul
  0 siblings, 0 replies; 89+ messages in thread
From: Manfred Spraul @ 2004-12-05 18:25 UTC (permalink / raw)
  To: Lennert Buytenhek, Netdev, Martin Josefsson

[-- Attachment #1: Type: text/plain, Size: 1604 bytes --]

Lennert wrote:

> A dirty way, yes ;-)  Open up e1000_osdep.h and do:
>
> -#define E1000_READ_REG(a, reg) ( \
> -    readl((a)->hw_addr + \
> -        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)))
> +#define E1000_READ_REG(a, reg) ({ \
> +    unsigned long s, e, d, v; \
> +\
> +    (a)->mmio_reads++; \
> +    rdtsc(s, d); \
> +    v = readl((a)->hw_addr + \
> +        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)); \
> +    rdtsc(e, d); \
> +    e -= s; \
> +    printk(KERN_INFO "e1000: MMIO read took %ld clocks\n", e); \
> +    printk(KERN_INFO "e1000: in process %d(%s)\n", current->pid, current->comm); \
> +    dump_stack(); \
> +    v; \
> +})

Too dirty: rdtsc is not serializing, thus my Opteron happily reorders 
the read and the rdtsc and reports 9 cycles.
Attached is a longer patch that I usually use for microbenchmarks. I get 
around 506 cycles with it for an Opteron 2 GHz to the nForce 250 Gb nic 
(i.e. integrated nic in the chipset, just one HT hop):

Results - zero - shift 0
 40: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 63 0 0 0 0 0 0 0 0 0 0 0 0 0
1e0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
 >>>>>>>>>>> benchmark overhead: 82 cycles
** reading register e08920b4
Results - readl - shift 0
240: 0 0 b 0 0 0 0 0 0 0 0 0 32 0 1 1 :0 0 0 0 0 0 a 0 0 0 0 0 0 0 0 0
260: 1a 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
300: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
 >>>>>>>>>> total: 0x248, i.e. net 506 cycles.

--
    Manfred

[-- Attachment #2: patch-perftest-forcedeth --]
[-- Type: text/plain, Size: 2910 bytes --]

--- 2.6/drivers/net/forcedeth.c	2004-12-05 16:21:28.000000000 +0100
+++ build-2.6/drivers/net/forcedeth.c	2004-12-05 19:18:24.000000000 +0100
@@ -1500,6 +1500,131 @@
 	enable_irq(dev->irq);
 }
 
+int p_shift = 0;
+
+#define STAT_TABLELEN		16384
+static unsigned long totals[STAT_TABLELEN];
+static unsigned int overflows;
+
+static unsigned long long stime;
+static void start_measure(void)
+{
+	 __asm__ __volatile__ (
+		".align 64\n\t"
+	 	"pushal\n\t"
+		"cpuid\n\t"
+		"popal\n\t"
+		"rdtsc\n\t"
+		"movl %%eax,(%0)\n\t"
+		"movl %%edx,4(%0)\n\t"
+		: /* no output */
+		: "c"(&stime)
+		: "eax", "edx", "memory" );
+}
+
+static void end_measure(void)
+{
+static unsigned long long etime;
+	__asm__ __volatile__ (
+		"pushal\n\t"
+		"cpuid\n\t"
+		"popal\n\t"
+		"rdtsc\n\t"
+		"movl %%eax,(%0)\n\t"
+		"movl %%edx,4(%0)\n\t"
+		: /* no output */
+		: "c"(&etime)
+		: "eax", "edx", "memory" );
+	{
+		unsigned long time = (unsigned long)(etime-stime);
+		time >>= p_shift;
+		if(time < STAT_TABLELEN) {
+			totals[time]++;
+		} else {
+			overflows++;
+		}
+	}
+}
+
+static void clean_buf(void)
+{
+	memset(totals,0,sizeof(totals));
+	overflows = 0;
+}
+
+static void print_line(unsigned long* array)
+{
+	int i;
+	for(i=0;i<32;i++) {
+		if((i%32)==16)
+			printk(":");
+		printk("%lx ",array[i]); 
+	}
+}
+
+static void print_buf(char* caption)
+{
+	int i, other = 0;
+	printk("Results - %s - shift %d",
+		caption, p_shift);
+
+	for(i=0;i<STAT_TABLELEN;i+=32) {
+		int j;
+		int local = 0;
+		for(j=0;j<32;j++)
+			local += totals[i+j];
+
+		if(local) {
+			printk("\n%3x: ",i);
+			print_line(&totals[i]);
+			other += local;
+		}
+	}
+	printk("\nOverflows: %d.\n",
+		overflows);
+	printk("Sum: %d\n",other+overflows);
+}
+
+static void return_immediately(void *dummy)
+{
+}
+
+static void bench_readl(u8 __iomem *base)
+{ 
+	int i;
+
+	/* empty test measurement: */
+	printk("******** kernel cpu benchmark started **********\n");
+	clean_buf();
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(200);
+	for(i=0;i<100;i++) {
+		start_measure();
+		return_immediately(NULL);
+		return_immediately(NULL);
+		return_immediately(NULL);
+		return_immediately(NULL);
+		end_measure();
+	}
+	print_buf("zero");
+	clean_buf();
+
+	printk("** reading register %p\n", base);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(200);
+	for(i=0;i<100;i++) {
+		start_measure();
+		return_immediately(NULL);
+		return_immediately(NULL);
+		readl(base);
+		return_immediately(NULL);
+		return_immediately(NULL);
+		end_measure();
+	}
+	print_buf("readl");
+	clean_buf();
+}
+
 static int nv_open(struct net_device *dev)
 {
 	struct fe_priv *np = get_nvpriv(dev);
@@ -1635,6 +1760,8 @@
 		mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
 	spin_unlock_irq(&np->lock);
 
+	bench_readl(base + NvRegMulticastAddrB);
+	bench_readl(base + NvRegIrqStatus);
 	return 0;
 out_drain:
 	drain_ring(dev);

^ permalink raw reply	[flat|nested] 89+ messages in thread
* Re: 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit)
@ 2004-12-06 19:10 Robert Olsson
  2004-12-06 22:29 ` Martin Josefsson
  0 siblings, 1 reply; 89+ messages in thread
From: Robert Olsson @ 2004-12-06 19:10 UTC (permalink / raw)
  To: Lennert Buytenhek
  Cc: jamal, Martin Josefsson, Scott Feldman, Robert Olsson, P, mellia,
	Jorge Manuel Finochietto, Giulio Galante, netdev



Lennert Buytenhek writes:
 > On Mon, Dec 06, 2004 at 08:11:02AM -0500, jamal wrote:
 > 
 > > Hopefully someone will beat me to testing to see if our forwarding
 > > capacity now goes up with this new recipe.


Yes a breakthrough as we now can send small packets at GIGE wire speed this
will make development and testing much easier... A first router test with 
our setup below. Opteron 1.6 GHz SMP kernel. using 1 CPU. 82546 EB + 
82456 GB and PCI-X 100 Mhz & 133 MHz. 

pktgen performance is measured on router box. Remember Scotts patch uses 
4096 TX buffers and w. pktgen we use clone_skb. So with real skb's we probably 
see lower performance due to this. This may explain results below so routing
performance doesn't follow pktgen performance as seen.

T-PUT is routing performance. Also pktgen pure TX performance is given
this on the router.


Input rate for routing test is 2*765 kpps for all three runs. Input 
Packets input to eth0 is routed to eth1 and eth2 to eth3.


Vanilla. T-PUT 657 kpps. pktgen TX perf 818 kpps
-------------------------------------------------
Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0   1500   0 4312682 8253078 8253078 5687318      5      0      0      0 BRU
eth1   1500   0      1      0      0      0 4312199      0      0      0 BRU
eth2   1500   0 4311018 8386504 8386504 5688982      5      0      0      0 BRU
eth3   1500   0      1      0      0      0 4310791      0      0      0 BRU

           CPU0       
  0:     116665    IO-APIC-edge  timer
  1:        208    IO-APIC-edge  i8042
  8:          0    IO-APIC-edge  rtc
  9:          0   IO-APIC-level  acpi
 14:      21943    IO-APIC-edge  ide0
 26:         66   IO-APIC-level  eth0
 27:      58638   IO-APIC-level  eth1
 28:         68   IO-APIC-level  eth2
 29:      58497   IO-APIC-level  eth3
NMI:          0 
LOC:     116605 
ERR:          0
MIS:          0

e1000-TX-prefetch+scott tx patch. T-PUT 540 kpps. pktgen TX perf 1.48 Mpps
--------------------------------------------------------------------------

Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0   1500   0 3533795 8618637 8618637 6466205      5      0      0      0 BRU
eth1   1500   0      3      0      0      0 3533803      0      0      0 BRU
eth2   1500   0 3535804 8697149 8697149 6464196      5      0      0      0 BRU
eth3   1500   0      1      0      0      0 3535321      0      0      0 BRU

           CPU0       
  0:    1372774    IO-APIC-edge  timer
  1:        663    IO-APIC-edge  i8042
  8:          0    IO-APIC-edge  rtc
  9:          0   IO-APIC-level  acpi
 14:      22631    IO-APIC-edge  ide0
 26:        686   IO-APIC-level  eth0
 27:        693   IO-APIC-level  eth1
 28:        687   IO-APIC-level  eth2
 29:        682   IO-APIC-level  eth3
NMI:          0 
LOC:    1372804 
ERR:          0
MIS:          0


e1000-TX-prefetch. T-PUT 657 kpps. pktgen TX perf 1.15 Mpps
-----------------------------------------------------------
Kernel Interface table
Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0   1500   0 4311848 8288270 8288270 5688152      5      0      0      0 BRU
eth1   1500   0      4      0      0      0 4311388      0      0      0 BRU
eth2   1500   0 4309082 8400892 8400892 5690918      5      0      0      0 BRU
eth3   1500   0      1      0      0      0 4308271      0      0      0 BRU
lo    16436   0      0      0      0      0      0      0      0      0 LRU
           CPU0       
  0:     224310    IO-APIC-edge  timer
  1:        250    IO-APIC-edge  i8042
  8:          0    IO-APIC-edge  rtc
  9:          0   IO-APIC-level  acpi
 14:      22055    IO-APIC-edge  ide0
 26:        122   IO-APIC-level  eth0
 27:      58001   IO-APIC-level  eth1
 28:        123   IO-APIC-level  eth2
 29:      57681   IO-APIC-level  eth3
NMI:          0 
LOC:     224251 
ERR:          0
MIS:          0


						--ro

^ permalink raw reply	[flat|nested] 89+ messages in thread

end of thread, other threads:[~2004-12-10 16:24 UTC | newest]

Thread overview: 89+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1101467291.24742.70.camel@mellia.lipar.polito.it>
2004-11-26 14:05 ` [E1000-devel] Transmission limit P
2004-11-26 15:31   ` Marco Mellia
2004-11-26 19:56     ` jamal
2004-11-29 14:21       ` Marco Mellia
2004-11-30 13:46         ` jamal
2004-12-02 17:24           ` Marco Mellia
2004-11-26 20:06     ` jamal
2004-11-26 20:56     ` Lennert Buytenhek
2004-11-26 21:02       ` Lennert Buytenhek
2004-11-27  9:25     ` Harald Welte
     [not found]       ` <20041127111101.GC23139@xi.wantstofly.org>
2004-11-27 11:31         ` Harald Welte
2004-11-27 20:12       ` Cesar Marcondes
2004-11-29  8:53       ` Marco Mellia
2004-11-29 14:50         ` Lennert Buytenhek
2004-11-30  8:42           ` Marco Mellia
2004-12-01 12:25             ` jamal
2004-12-02 13:39               ` Marco Mellia
2004-12-03 13:07                 ` jamal
2004-11-26 15:40   ` Robert Olsson
2004-11-26 15:59     ` Marco Mellia
2004-11-26 16:57       ` P
2004-11-26 20:01         ` jamal
2004-11-29 10:19           ` P
2004-11-29 13:09           ` Robert Olsson
2004-11-29 20:16             ` David S. Miller
2004-12-01 16:47               ` Robert Olsson
2004-11-30 13:31             ` jamal
2004-11-30 13:46               ` Lennert Buytenhek
2004-11-30 14:25                 ` jamal
2004-12-01  0:11                   ` Lennert Buytenhek
2004-12-01  1:09                     ` Scott Feldman
2004-12-01 15:34                       ` Robert Olsson
2004-12-01 16:49                         ` Scott Feldman
2004-12-01 17:37                           ` Robert Olsson
2004-12-02 17:54                           ` Robert Olsson
2004-12-02 18:23                           ` Robert Olsson
2004-12-02 23:25                             ` Lennert Buytenhek
2004-12-03  5:23                             ` Scott Feldman
2004-12-10 16:24                             ` Martin Josefsson
2004-12-01 18:29                       ` Lennert Buytenhek
2004-12-01 21:35                         ` Lennert Buytenhek
2004-12-02  6:13                           ` Scott Feldman
2004-12-03 13:24                             ` jamal
2004-12-05 14:50                             ` 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Lennert Buytenhek
2004-12-05 15:03                               ` Martin Josefsson
2004-12-05 15:15                                 ` Lennert Buytenhek
2004-12-05 15:19                                   ` Martin Josefsson
2004-12-05 15:30                                     ` Martin Josefsson
2004-12-05 17:00                                       ` Lennert Buytenhek
2004-12-05 17:11                                         ` Martin Josefsson
2004-12-05 17:38                                           ` Martin Josefsson
2004-12-05 18:14                                             ` Lennert Buytenhek
2004-12-05 15:42                                 ` Martin Josefsson
2004-12-05 16:48                                   ` Martin Josefsson
2004-12-05 17:01                                     ` Martin Josefsson
2004-12-05 17:58                                     ` Lennert Buytenhek
2004-12-05 17:44                                   ` Lennert Buytenhek
2004-12-05 17:51                                     ` Lennert Buytenhek
2004-12-05 17:54                                       ` Martin Josefsson
2004-12-06 11:32                                         ` 1.03Mpps on e1000 (was: " jamal
2004-12-06 12:11                                           ` Lennert Buytenhek
2004-12-06 12:20                                             ` jamal
2004-12-06 12:23                                               ` Lennert Buytenhek
2004-12-06 12:30                                                 ` Martin Josefsson
2004-12-06 13:11                                                   ` jamal
     [not found]                                                     ` <20041206132907.GA13411@xi.wantstofly.org>
     [not found]                                                       ` <16820.37049.396306.295878@robur.slu.se>
2004-12-06 17:32                                                         ` 1.03Mpps on e1000 (was: Re: [E1000-devel] " P
2004-12-08 23:36                                   ` Ray Lehtiniemi
     [not found]                                     ` <41B825A5.2000009@draigBrady.com>
     [not found]                                       ` <20041209161825.GA32454@mail.com>
2004-12-09 17:12                                         ` 1.03Mpps on e1000 P
     [not found]                                         ` <20041209164820.GB32454@mail.com>
2004-12-09 17:19                                           ` P
2004-12-09 23:25                                             ` Ray Lehtiniemi
2004-12-05 21:12                                 ` 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Scott Feldman
2004-12-05 21:25                                   ` Lennert Buytenhek
2004-12-06  1:23                                     ` 1.03Mpps on e1000 (was: " Scott Feldman
2004-12-02 17:31                       ` [E1000-devel] Transmission limit Marco Mellia
2004-12-03 20:57                       ` Lennert Buytenhek
2004-12-04 10:36                         ` Lennert Buytenhek
2004-12-01 12:08                     ` jamal
2004-12-01 15:24                       ` Lennert Buytenhek
2004-11-26 17:58       ` Robert Olsson
2004-11-27 20:00   ` Lennert Buytenhek
2004-11-29 12:44     ` Marco Mellia
2004-11-29 15:19       ` Lennert Buytenhek
2004-11-29 17:32         ` Marco Mellia
2004-11-29 19:08           ` Lennert Buytenhek
2004-11-29 19:09             ` Lennert Buytenhek
2004-12-05 18:25 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Manfred Spraul
  -- strict thread matches above, loose matches on Subject: below --
2004-12-06 19:10 Robert Olsson
2004-12-06 22:29 ` Martin Josefsson
2004-12-07  3:20   ` jamal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).