From: Manfred Spraul <manfred@colorfullife.com>
To: Lennert Buytenhek <buytenh@wantstofly.org>,
Netdev <netdev@oss.sgi.com>,
Martin Josefsson <gandalf@wlug.westbo.se>
Subject: Re: 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit)
Date: Sun, 05 Dec 2004 19:25:47 +0100 [thread overview]
Message-ID: <41B352AB.4020700@colorfullife.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1604 bytes --]
Lennert wrote:
> A dirty way, yes ;-) Open up e1000_osdep.h and do:
>
> -#define E1000_READ_REG(a, reg) ( \
> - readl((a)->hw_addr + \
> - (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)))
> +#define E1000_READ_REG(a, reg) ({ \
> + unsigned long s, e, d, v; \
> +\
> + (a)->mmio_reads++; \
> + rdtsc(s, d); \
> + v = readl((a)->hw_addr + \
> + (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)); \
> + rdtsc(e, d); \
> + e -= s; \
> + printk(KERN_INFO "e1000: MMIO read took %ld clocks\n", e); \
> + printk(KERN_INFO "e1000: in process %d(%s)\n", current->pid, current->comm); \
> + dump_stack(); \
> + v; \
> +})
Too dirty: rdtsc is not serializing, thus my Opteron happily reorders
the read and the rdtsc and reports 9 cycles.
Attached is a longer patch that I usually use for microbenchmarks. I get
around 506 cycles with it for an Opteron 2 GHz to the nForce 250 Gb nic
(i.e. integrated nic in the chipset, just one HT hop):
Results - zero - shift 0
40: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 63 0 0 0 0 0 0 0 0 0 0 0 0 0
1e0: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
>>>>>>>>>>> benchmark overhead: 82 cycles
** reading register e08920b4
Results - readl - shift 0
240: 0 0 b 0 0 0 0 0 0 0 0 0 32 0 1 1 :0 0 0 0 0 0 a 0 0 0 0 0 0 0 0 0
260: 1a 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
300: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 :0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
Overflows: 0.
Sum: 100
>>>>>>>>>> total: 0x248, i.e. net 506 cycles.
--
Manfred
[-- Attachment #2: patch-perftest-forcedeth --]
[-- Type: text/plain, Size: 2910 bytes --]
--- 2.6/drivers/net/forcedeth.c 2004-12-05 16:21:28.000000000 +0100
+++ build-2.6/drivers/net/forcedeth.c 2004-12-05 19:18:24.000000000 +0100
@@ -1500,6 +1500,131 @@
enable_irq(dev->irq);
}
+int p_shift = 0;
+
+#define STAT_TABLELEN 16384
+static unsigned long totals[STAT_TABLELEN];
+static unsigned int overflows;
+
+static unsigned long long stime;
+static void start_measure(void)
+{
+ __asm__ __volatile__ (
+ ".align 64\n\t"
+ "pushal\n\t"
+ "cpuid\n\t"
+ "popal\n\t"
+ "rdtsc\n\t"
+ "movl %%eax,(%0)\n\t"
+ "movl %%edx,4(%0)\n\t"
+ : /* no output */
+ : "c"(&stime)
+ : "eax", "edx", "memory" );
+}
+
+static void end_measure(void)
+{
+static unsigned long long etime;
+ __asm__ __volatile__ (
+ "pushal\n\t"
+ "cpuid\n\t"
+ "popal\n\t"
+ "rdtsc\n\t"
+ "movl %%eax,(%0)\n\t"
+ "movl %%edx,4(%0)\n\t"
+ : /* no output */
+ : "c"(&etime)
+ : "eax", "edx", "memory" );
+ {
+ unsigned long time = (unsigned long)(etime-stime);
+ time >>= p_shift;
+ if(time < STAT_TABLELEN) {
+ totals[time]++;
+ } else {
+ overflows++;
+ }
+ }
+}
+
+static void clean_buf(void)
+{
+ memset(totals,0,sizeof(totals));
+ overflows = 0;
+}
+
+static void print_line(unsigned long* array)
+{
+ int i;
+ for(i=0;i<32;i++) {
+ if((i%32)==16)
+ printk(":");
+ printk("%lx ",array[i]);
+ }
+}
+
+static void print_buf(char* caption)
+{
+ int i, other = 0;
+ printk("Results - %s - shift %d",
+ caption, p_shift);
+
+ for(i=0;i<STAT_TABLELEN;i+=32) {
+ int j;
+ int local = 0;
+ for(j=0;j<32;j++)
+ local += totals[i+j];
+
+ if(local) {
+ printk("\n%3x: ",i);
+ print_line(&totals[i]);
+ other += local;
+ }
+ }
+ printk("\nOverflows: %d.\n",
+ overflows);
+ printk("Sum: %d\n",other+overflows);
+}
+
+static void return_immediately(void *dummy)
+{
+}
+
+static void bench_readl(u8 __iomem *base)
+{
+ int i;
+
+ /* empty test measurement: */
+ printk("******** kernel cpu benchmark started **********\n");
+ clean_buf();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(200);
+ for(i=0;i<100;i++) {
+ start_measure();
+ return_immediately(NULL);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ end_measure();
+ }
+ print_buf("zero");
+ clean_buf();
+
+ printk("** reading register %p\n", base);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(200);
+ for(i=0;i<100;i++) {
+ start_measure();
+ return_immediately(NULL);
+ return_immediately(NULL);
+ readl(base);
+ return_immediately(NULL);
+ return_immediately(NULL);
+ end_measure();
+ }
+ print_buf("readl");
+ clean_buf();
+}
+
static int nv_open(struct net_device *dev)
{
struct fe_priv *np = get_nvpriv(dev);
@@ -1635,6 +1760,8 @@
mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
spin_unlock_irq(&np->lock);
+ bench_readl(base + NvRegMulticastAddrB);
+ bench_readl(base + NvRegIrqStatus);
return 0;
out_drain:
drain_ring(dev);
next reply other threads:[~2004-12-05 18:25 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-05 18:25 Manfred Spraul [this message]
-- strict thread matches above, loose matches on Subject: below --
2004-12-06 19:10 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Robert Olsson
2004-12-06 22:29 ` Martin Josefsson
2004-12-07 3:20 ` jamal
2004-11-26 20:01 [E1000-devel] Transmission limit jamal
2004-11-29 13:09 ` Robert Olsson
2004-11-30 13:31 ` jamal
2004-11-30 13:46 ` Lennert Buytenhek
2004-11-30 14:25 ` jamal
2004-12-01 0:11 ` Lennert Buytenhek
2004-12-01 1:09 ` Scott Feldman
2004-12-01 18:29 ` Lennert Buytenhek
2004-12-01 21:35 ` Lennert Buytenhek
2004-12-02 6:13 ` Scott Feldman
2004-12-05 14:50 ` 1.03Mpps on e1000 (was: Re: [E1000-devel] Transmission limit) Lennert Buytenhek
2004-12-05 15:03 ` Martin Josefsson
2004-12-05 15:15 ` Lennert Buytenhek
2004-12-05 15:19 ` Martin Josefsson
2004-12-05 15:30 ` Martin Josefsson
2004-12-05 17:00 ` Lennert Buytenhek
2004-12-05 17:11 ` Martin Josefsson
2004-12-05 17:38 ` Martin Josefsson
2004-12-05 18:14 ` Lennert Buytenhek
2004-12-05 15:42 ` Martin Josefsson
2004-12-05 16:48 ` Martin Josefsson
2004-12-05 17:01 ` Martin Josefsson
2004-12-05 17:58 ` Lennert Buytenhek
2004-12-05 17:44 ` Lennert Buytenhek
2004-12-05 17:51 ` Lennert Buytenhek
2004-12-05 17:54 ` Martin Josefsson
2004-12-06 11:32 ` 1.03Mpps on e1000 (was: " jamal
2004-12-06 12:11 ` Lennert Buytenhek
2004-12-06 12:20 ` jamal
2004-12-06 12:23 ` Lennert Buytenhek
2004-12-06 12:30 ` Martin Josefsson
2004-12-06 13:11 ` jamal
[not found] ` <20041206132907.GA13411@xi.wantstofly.org>
[not found] ` <16820.37049.396306.295878@robur.slu.se>
2004-12-06 17:32 ` 1.03Mpps on e1000 (was: Re: [E1000-devel] " P
2004-12-08 23:36 ` Ray Lehtiniemi
2004-12-05 21:12 ` Scott Feldman
2004-12-05 21:25 ` Lennert Buytenhek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41B352AB.4020700@colorfullife.com \
--to=manfred@colorfullife.com \
--cc=buytenh@wantstofly.org \
--cc=gandalf@wlug.westbo.se \
--cc=netdev@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).