From: Manfred Spraul <manfred@colorfullife.com>
To: Lennert Buytenhek <buytenh@wantstofly.org>,
Netdev <netdev@oss.sgi.com>,
Martin Josefsson <gandalf@wlug.westbo.se>
Subject: Re: 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit)
Date: Sun, 05 Dec 2004 19:25:47 +0100 [thread overview]
Message-ID: <41B352AB.4020700@colorfullife.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1604 bytes --]
Lennert wrote:
> A dirty way, yes ;-) Open up e1000_osdep.h and do:
>
> -#define E1000_READ_REG(a, reg) ( \
> - readl((a)->hw_addr + \
> - (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)))
> +#define E1000_READ_REG(a, reg) ({ \
> + unsigned long s, e, d, v; \
> +\
> + (a)->mmio_reads++; \
> + rdtsc(s, d); \
> + v = readl((a)->hw_addr + \
> + (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)); \
> + rdtsc(e, d); \
> + e -= s; \
> + printk(KERN_INFO "e1000: MMIO read took %ld clocks\n", e); \
> + printk(KERN_INFO "e1000: in process %d(%s)\n", current->pid, current->comm); \
> + dump_stack(); \
> + v; \
> +})
Too dirty: rdtsc is not serializing, thus my Opteron happily reorders
the read and the rdtsc and reports 9 cycles.
Attached is a longer patch that I usually use for microbenchmarks. I get
around 506 cycles with it for an Opteron 2 GHz to the nForce 250 Gb nic
(i.e. integrated nic in the chipset, just one HT hop):
Results - zero - shift 0
40: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 63 0 0 0 0 0 0 0 0 0 0 0 0 0
1e0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
>>>>>>>>>>> benchmark overhead: 82 cycles
** reading register e08920b4
Results - readl - shift 0
240: 0 0 b 0 0 0 0 0 0 0 0 0 32 0 1 1 :0 0 0 0 0 0 a 0 0 0 0 0 0 0 0 0
260: 1a 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
300: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
>>>>>>>>>> total: 0x248, i.e. net 506 cycles.
--
Manfred
[-- Attachment #2: patch-perftest-forcedeth --]
[-- Type: text/plain, Size: 2910 bytes --]
--- 2.6/drivers/net/forcedeth.c 2004-12-05 16:21:28.000000000 +0100
+++ build-2.6/drivers/net/forcedeth.c 2004-12-05 19:18:24.000000000 +0100
@@ -1500,6 +1500,131 @@
enable_irq(dev->irq);
}
+int p_shift = 0;
+
+#define STAT_TABLELEN 16384
+static unsigned long totals[STAT_TABLELEN];
+static unsigned int overflows;
+
+static unsigned long long stime;
+static void start_measure(void)
+{
+ __asm__ __volatile__ (
+ ".align 64\n\t"
+ "pushal\n\t"
+ "cpuid\n\t"
+ "popal\n\t"
+ "rdtsc\n\t"
+ "movl %%eax,(%0)\n\t"
+ "movl %%edx,4(%0)\n\t"
+ : /* no output */
+ : "c"(&stime)
+ : "eax", "edx", "memory" );
+}
+
+static void end_measure(void)
+{
+static unsigned long long etime;
+ __asm__ __volatile__ (
+ "pushal\n\t"
+ "cpuid\n\t"
+ "popal\n\t"
+ "rdtsc\n\t"
+ "movl %%eax,(%0)\n\t"
+ "movl %%edx,4(%0)\n\t"
+ : /* no output */
+ : "c"(&etime)
+ : "eax", "edx", "memory" );
+ {
+ unsigned long time = (unsigned long)(etime-stime);
+ time >>= p_shift;
+ if(time < STAT_TABLELEN) {
+ totals[time]++;
+ } else {
+ overflows++;
+ }
+ }
+}
+
+static void clean_buf(void)
+{
+ memset(totals,0,sizeof(totals));
+ overflows = 0;
+}
+
+static void print_line(unsigned long* array)
+{
+ int i;
+ for(i=0;i<32;i++) {
+ if((i%32)==16)
+ printk(":");
+ printk("%lx ",array[i]);
+ }
+}
+
+static void print_buf(char* caption)
+{
+ int i, other = 0;
+ printk("Results - %s - shift %d",
+ caption, p_shift);
+
+ for(i=0;i<STAT_TABLELEN;i+=32) {
+ int j;
+ int local = 0;
+ for(j=0;j<32;j++)
+ local += totals[i+j];
+
+ if(local) {
+ printk("\n%3x: ",i);
+ print_line(&totals[i]);
+ other += local;
+ }
+ }
+ printk("\nOverflows: %d.\n",
+ overflows);
+ printk("Sum: %d\n",other+overflows);
+}
+
+static void return_immediately(void *dummy)
+{
+}
+
+static void bench_readl(u8 __iomem *base)
+{
+ int i;
+
+ /* empty test measurement: */
+ printk("******** kernel cpu benchmark started **********\n");
+ clean_buf();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(200);
+ for(i=0;i<100;i++) {
+ start_measure();
+ return_immediately(NULL);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ end_measure();
+ }
+ print_buf("zero");
+ clean_buf();
+
+ printk("** reading register %p\n", base);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(200);
+ for(i=0;i<100;i++) {
+ start_measure();
+ return_immediately(NULL);
+ return_immediately(NULL);
+ readl(base);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ end_measure();
+ }
+ print_buf("readl");
+ clean_buf();
+}
+
static int nv_open(struct net_device *dev)
{
struct fe_priv *np = get_nvpriv(dev);
@@ -1635,6 +1760,8 @@
mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
spin_unlock_irq(&np->lock);
+ bench_readl(base + NvRegMulticastAddrB);
+ bench_readl(base + NvRegIrqStatus);
return 0;
out_drain:
drain_ring(dev);
next reply other threads:[~2004-12-05 18:25 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-05 18:25 Manfred Spraul [this message]
-- strict thread matches above, loose matches on Subject: below --
2004-12-06 19:10 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Robert Olsson
2004-12-06 22:29 ` Martin Josefsson
2004-12-07 3:20 ` jamal
2004-11-26 20:01 [E1000-devel] Transmission limit jamal
2004-11-29 13:09 ` Robert Olsson
2004-11-30 13:31 ` jamal
2004-11-30 13:46 ` Lennert Buytenhek
2004-11-30 14:25 ` jamal
2004-12-01 0:11 ` Lennert Buytenhek
2004-12-01 1:09 ` Scott Feldman
2004-12-01 18:29 ` Lennert Buytenhek
2004-12-01 21:35 ` Lennert Buytenhek
2004-12-02 6:13 ` Scott Feldman
2004-12-05 14:50 ` 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Lennert Buytenhek
2004-12-05 15:03 ` Martin Josefsson
2004-12-05 15:15 ` Lennert Buytenhek
2004-12-05 15:19 ` Martin Josefsson
2004-12-05 15:30 ` Martin Josefsson
2004-12-05 17:00 ` Lennert Buytenhek
2004-12-05 17:11 ` Martin Josefsson
2004-12-05 17:38 ` Martin Josefsson
2004-12-05 18:14 ` Lennert Buytenhek
2004-12-05 15:42 ` Martin Josefsson
2004-12-05 16:48 ` Martin Josefsson
2004-12-05 17:01 ` Martin Josefsson
2004-12-05 17:58 ` Lennert Buytenhek
2004-12-05 17:44 ` Lennert Buytenhek
2004-12-05 17:51 ` Lennert Buytenhek
2004-12-05 17:54 ` Martin Josefsson
2004-12-06 11:32 ` 1.03Mpps on e1000 (was: " jamal
2004-12-06 12:11 ` Lennert Buytenhek
2004-12-06 12:20 ` jamal
2004-12-06 12:23 ` Lennert Buytenhek
2004-12-06 12:30 ` Martin Josefsson
2004-12-06 13:11 ` jamal
[not found] ` <20041206132907.GA13411@xi.wantstofly.org>
[not found] ` <16820.37049.396306.295878@robur.slu.se>
2004-12-06 17:32 ` 1.03Mpps on e1000 (was: Re: [E1000-devel] " P
2004-12-08 23:36 ` Ray Lehtiniemi
2004-12-05 21:12 ` Scott Feldman
2004-12-05 21:25 ` Lennert Buytenhek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41B352AB.4020700@colorfullife.com \
--to=manfred@colorfullife.com \
--cc=buytenh@wantstofly.org \
--cc=gandalf@wlug.westbo.se \
--cc=netdev@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.