* [parisc-linux] clerar user page test
@ 2004-12-07 14:12 Joel Soete
2004-12-09 7:42 ` Grant Grundler
2004-12-09 17:32 ` Grant Grundler
0 siblings, 2 replies; 4+ messages in thread
From: Joel Soete @ 2004-12-07 14:12 UTC (permalink / raw)
To: parisc-linux
[-- Attachment #1: Type: text/plain, Size: 829 bytes --]
Hello all,
here are the results of some clup test:
(run on b2k running 2.6.10-rc3-pa2 64bit)
time ./clup0; time ./clup1 ; time ./clup2
real 0m0.498s
user 0m0.440s
sys 0m0.014s
real 0m0.277s
user 0m0.229s
sys 0m0.010s
real 0m0.272s
user 0m0.227s
sys 0m0.013s
(the corresponding src are attached.
Compile with (for remind:):
ggc -o clup0 clup0.c
gcc -mach=2.0 -o clup1 clup1.c
gcc -mach=2.0 -o clup2 clup2.c)
so real benefit to use double word insn on 64bit (clup0 verusu clup1)
but not reducing the number of loop (clup1 versus clup2)
hth,
Joel
---------------------------------------------------------------------------
Tiscali vous offre 3 mois d'ADSL et 3 mois de DVD gratuits...profitez-en...
http://reg.tiscali.be/adsl/default.asp?lg=FR
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: clup2.c --]
[-- Type: text/x-csrc, Size: 1584 bytes --]
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#define LP64 1
void __clear_user_page_asm(unsigned long vaddr)
{
asm volatile ("ldi 64,%%r1\n"
#ifdef LP64
"1: std %%r0,0(%0)\n"
" std %%r0,8(%0)\n"
" std %%r0,16(%0)\n"
" std %%r0,24(%0)\n"
" std %%r0,32(%0)\n"
" std %%r0,40(%0)\n"
" std %%r0,48(%0)\n"
" std %%r0,56(%0)\n"
" std %%r0,64(%0)\n"
" std %%r0,72(%0)\n"
" std %%r0,80(%0)\n"
" std %%r0,88(%0)\n"
" std %%r0,96(%0)\n"
" std %%r0,104(%0)\n"
" std %%r0,112(%0)\n"
" std %%r0,120(%0)\n"
" addib,> -2,%%r1,1b\n"
" ldo 128(%0),%0"
#else
"1: stw %%r0,0(%0)\n"
" stw %%r0,4(%0)\n"
" stw %%r0,8(%0)\n"
" stw %%r0,12(%0)\n"
" stw %%r0,16(%0)\n"
" stw %%r0,20(%0)\n"
" stw %%r0,24(%0)\n"
" stw %%r0,28(%0)\n"
" stw %%r0,32(%0)\n"
" stw %%r0,36(%0)\n"
" stw %%r0,40(%0)\n"
" stw %%r0,44(%0)\n"
" stw %%r0,48(%0)\n"
" stw %%r0,52(%0)\n"
" stw %%r0,56(%0)\n"
" stw %%r0,60(%0)\n"
" addib,> -1,%%r1,1b\n"
" ldo 64(%0),%0"
#endif
:
: "r"(vaddr) );
}
/*
#define INIT 1
#define DEBUG 1
*/
int main(int argc, char * * argv, char * * env)
{
char APage[4096];
int i, j, k;
#if INIT
for (i=0; i<4096; i++) {
j = i % 26;
#if DEBUG
printf("%d, %d\n", i, j);
#endif
APage[i] = (char)(j+65);
#if DEBUG
printf("APage[%d] = %d\n", i, APage[i]);
#endif
};
APage[4096] = '\0';
#if DEBUG
printf("APage: %s.\n", APage);
#endif
#endif /* INIT */
for (i=0; i<40*4096; i++) {
__clear_user_page_asm((unsigned long)APage);
}
#if DEBUG
for (i=0; i<4096; i++) {
printf("APage[%d] = %d\n", i, APage[i]);
}
#endif
return 0;
}
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: clup1.c --]
[-- Type: text/x-csrc, Size: 1324 bytes --]
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#define LP64 1
void __clear_user_page_asm(unsigned long vaddr)
{
asm volatile ("ldi 64,%%r1\n"
#ifdef LP64
"1: std %%r0,0(%0)\n"
" std %%r0,8(%0)\n"
" std %%r0,16(%0)\n"
" std %%r0,24(%0)\n"
" std %%r0,32(%0)\n"
" std %%r0,40(%0)\n"
" std %%r0,48(%0)\n"
" std %%r0,56(%0)\n"
#else
"1: stw %%r0,0(%0)\n"
" stw %%r0,4(%0)\n"
" stw %%r0,8(%0)\n"
" stw %%r0,12(%0)\n"
" stw %%r0,16(%0)\n"
" stw %%r0,20(%0)\n"
" stw %%r0,24(%0)\n"
" stw %%r0,28(%0)\n"
" stw %%r0,32(%0)\n"
" stw %%r0,36(%0)\n"
" stw %%r0,40(%0)\n"
" stw %%r0,44(%0)\n"
" stw %%r0,48(%0)\n"
" stw %%r0,52(%0)\n"
" stw %%r0,56(%0)\n"
" stw %%r0,60(%0)\n"
#endif
" addib,> -1,%%r1,1b\n"
" ldo 64(%0),%0"
:
: "r"(vaddr) );
}
int main(int argc, char * * argv, char * * env)
{
char APage[4096];
int i, j, k;
#if INIT
for (i=0; i<4096; i++) {
j = i % 26;
#if DEBUG
printf("%d, %d\n", i, j);
#endif
APage[i] = (char)(j+65);
#if DEBUG
printf("APage[%d] = %d\n", i, APage[i]);
#endif
};
APage[4096] = '\0';
#if DEBUG
printf("APage: %s.\n", APage);
#endif
#endif /* INIT */
for (i=0; i<40*4096; i++) {
__clear_user_page_asm((unsigned long)APage);
}
#if DEBUG
for (i=0; i<4096; i++) {
printf("APage[%d] = %d\n", i, APage[i]);
}
#endif
return 0;
}
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: clup0.c --]
[-- Type: text/x-csrc, Size: 1330 bytes --]
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
/* #define LP64 1 */
void __clear_user_page_asm(unsigned long vaddr)
{
asm volatile ("ldi 64,%%r1\n"
#ifdef LP64
"1: std %%r0,0(%0)\n"
" std %%r0,8(%0)\n"
" std %%r0,16(%0)\n"
" std %%r0,24(%0)\n"
" std %%r0,32(%0)\n"
" std %%r0,40(%0)\n"
" std %%r0,48(%0)\n"
" std %%r0,56(%0)\n"
#else
"1: stw %%r0,0(%0)\n"
" stw %%r0,4(%0)\n"
" stw %%r0,8(%0)\n"
" stw %%r0,12(%0)\n"
" stw %%r0,16(%0)\n"
" stw %%r0,20(%0)\n"
" stw %%r0,24(%0)\n"
" stw %%r0,28(%0)\n"
" stw %%r0,32(%0)\n"
" stw %%r0,36(%0)\n"
" stw %%r0,40(%0)\n"
" stw %%r0,44(%0)\n"
" stw %%r0,48(%0)\n"
" stw %%r0,52(%0)\n"
" stw %%r0,56(%0)\n"
" stw %%r0,60(%0)\n"
#endif
" addib,> -1,%%r1,1b\n"
" ldo 64(%0),%0"
:
: "r"(vaddr) );
}
int main(int argc, char * * argv, char * * env)
{
char APage[4096];
int i, j, k;
#if INIT
for (i=0; i<4096; i++) {
j = i % 26;
#if DEBUG
printf("%d, %d\n", i, j);
#endif
APage[i] = (char)(j+65);
#if DEBUG
printf("APage[%d] = %d\n", i, APage[i]);
#endif
};
APage[4096] = '\0';
#if DEBUG
printf("APage: %s.\n", APage);
#endif
#endif /* INIT */
for (i=0; i<40*4096; i++) {
__clear_user_page_asm((unsigned long)APage);
}
#if DEBUG
for (i=0; i<4096; i++) {
printf("APage[%d] = %d\n", i, APage[i]);
}
#endif
return 0;
}
[-- Attachment #5: Type: text/plain, Size: 169 bytes --]
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [parisc-linux] clerar user page test
2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
@ 2004-12-09 7:42 ` Grant Grundler
2004-12-09 17:32 ` Grant Grundler
1 sibling, 0 replies; 4+ messages in thread
From: Grant Grundler @ 2004-12-09 7:42 UTC (permalink / raw)
To: Joel Soete; +Cc: parisc-linux
On Tue, Dec 07, 2004 at 03:12:37PM +0100, Joel Soete wrote:
> Hello all,
>
> here are the results of some clup test:
> (run on b2k running 2.6.10-rc3-pa2 64bit)
> time ./clup0; time ./clup1 ; time ./clup2
>
> real 0m0.498s
> user 0m0.440s
> sys 0m0.014s
>
> real 0m0.277s
> user 0m0.229s
> sys 0m0.010s
>
> real 0m0.272s
> user 0m0.227s
> sys 0m0.013s
cool - these are good results for evaluating instruction pipeline.
Unless you are continuously clearing the new pages
I would expect your test is just pounding the cache
and not real memory.
I looked over the code and wasn't sure how big the "memory footprint"
your test had. But 40*PAGESIZE didn't seem like nearly enough.
It should walk through at least 32MB of RAM to be certain it's
not touching the same cachelines over again. For PA8800 it would
need to be 128MB or something like that.
>
> (the corresponding src are attached.
> Compile with (for remind:):
> ggc -o clup0 clup0.c
> gcc -mach=2.0 -o clup1 clup1.c
> gcc -mach=2.0 -o clup2 clup2.c)
>
> so real benefit to use double word insn on 64bit (clup0 verusu clup1)
>
> but not reducing the number of loop (clup1 versus clup2)
Well, that's still 5/270 or almost 2%.
Doubling the loop is worth doing IMHO in this case.
Do you also have time to add prefetching to clup2?
Look at the kernel prefetchw() implementation in include/asm/processor.h.
You want to use something that ends up looking like
__asm__("ldd L1_CACHE_BYTES*N(%0), %%r0" : : "r" (addr));
Vary the value "N" from 2 to 8 to see what's optimal.
Prefetching too much doesn't help either.
thanks,
grant
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [parisc-linux] clerar user page test
2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
2004-12-09 7:42 ` Grant Grundler
@ 2004-12-09 17:32 ` Grant Grundler
2004-12-09 18:31 ` Joel Soete
1 sibling, 1 reply; 4+ messages in thread
From: Grant Grundler @ 2004-12-09 17:32 UTC (permalink / raw)
To: Joel Soete; +Cc: parisc-linux
On Tue, Dec 07, 2004 at 03:12:37PM +0100, Joel Soete wrote:
> here are the results of some clup test:
...
Joel,
I also rewrote the test and parked it on:
http://iou.parisc-linux.org/hppa/clup.c
Here's the output from a a500-6x running 2.6.10-rc1-pa12-SMP:
grundler@iodine:~$ gcc-3.3 -o clup0 clup.c
grundler@iodine:~$ gcc-3.3 -DLOOP2X -o clup1 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -o clup2 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -DLOOP2X -o clup3 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -DLOOP2X -DPREFETCH -o clup4 clup.c
grundler@iodine:~$ for i in 0 1 2 3 4; do time ./clup$i ; done 2>&1 | fgrep user
user 0m2.393s
user 0m2.392s
user 0m1.238s
user 0m1.224s
user 0m1.225s
I've varied the prefetch values and nothing seems to be helping.
I have to wonder if it's because the kernel VM is interferring
with the test.
But doubling the stride for 64-bit is consistently ~1.8% improvement.
grant
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [parisc-linux] clerar user page test
2004-12-09 17:32 ` Grant Grundler
@ 2004-12-09 18:31 ` Joel Soete
0 siblings, 0 replies; 4+ messages in thread
From: Joel Soete @ 2004-12-09 18:31 UTC (permalink / raw)
To: Grant Grundler; +Cc: parisc-linux
> I also rewrote the test and parked it on:
> http://iou.parisc-linux.org/hppa/clup.c
>
Cool (thanks a lot)
> Here's the output from a a500-6x running 2.6.10-rc1-pa12-SMP:
> grundler@iodine:~$ gcc-3.3 -o clup0 clup.c
> grundler@iodine:~$ gcc-3.3 -DLOOP2X -o clup1 clup.c
> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -o clup2 clup.c
> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -DLOOP2X -o clup3 clup.c=
> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -DLOOP2X -DPREFETCH -o c=
lup4
> clup.c
> grundler@iodine:~$ for i in 0 1 2 3 4; do time ./clup$i ; done 2>&1 |
fgrep
> user
> user 0m2.393s
> user 0m2.392s
> user 0m1.238s
> user 0m1.224s
> user 0m1.225s
>
> I've varied the prefetch values and nothing seems to be helping.
> I have to wonder if it's because the kernel VM is interferring
> with the test.
>
> But doubling the stride for 64-bit is consistently ~1.8% improvement.
>
Ok wil I prepare a patch or you?
Thx again,
Joel
-------------------------------------------------------------------------=
--
Tiscali vous offre 3 mois d'ADSL et 3 mois de DVD gratuits...profitez-en.=
..
http://reg.tiscali.be/adsl/default.asp?lg=3DFR
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2004-12-09 18:31 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
2004-12-09 7:42 ` Grant Grundler
2004-12-09 17:32 ` Grant Grundler
2004-12-09 18:31 ` Joel Soete
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.