All of lore.kernel.org
 help / color / mirror / Atom feed
* [parisc-linux] clerar user page test
@ 2004-12-07 14:12 Joel Soete
  2004-12-09  7:42 ` Grant Grundler
  2004-12-09 17:32 ` Grant Grundler
  0 siblings, 2 replies; 4+ messages in thread
From: Joel Soete @ 2004-12-07 14:12 UTC (permalink / raw)
  To: parisc-linux

[-- Attachment #1: Type: text/plain, Size: 829 bytes --]

Hello all,

here are the results of some clup test:
(run on b2k running  2.6.10-rc3-pa2 64bit)
time ./clup0; time ./clup1 ; time ./clup2

real    0m0.498s
user    0m0.440s
sys     0m0.014s

real    0m0.277s
user    0m0.229s
sys     0m0.010s

real    0m0.272s
user    0m0.227s
sys     0m0.013s

(the corresponding src are attached.
Compile with (for remind:):
ggc -o clup0 clup0.c
gcc -mach=2.0 -o clup1 clup1.c
gcc -mach=2.0 -o clup2 clup2.c)

so real benefit to use double word insn on 64bit (clup0 verusu clup1)

but not reducing the number of loop (clup1 versus clup2)

hth,
    Joel


---------------------------------------------------------------------------
Tiscali vous offre 3 mois d'ADSL et 3 mois de DVD gratuits...profitez-en...
http://reg.tiscali.be/adsl/default.asp?lg=FR





[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: clup2.c --]
[-- Type: text/x-csrc, Size: 1584 bytes --]


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

#define LP64	1 

void __clear_user_page_asm(unsigned long vaddr)
{

asm volatile ("ldi	64,%%r1\n"
#ifdef LP64
"1:	std	%%r0,0(%0)\n"		
"	std	%%r0,8(%0)\n"		
"	std	%%r0,16(%0)\n"	
"	std	%%r0,24(%0)\n"
"	std	%%r0,32(%0)\n"
"	std	%%r0,40(%0)\n"
"	std	%%r0,48(%0)\n"	
"	std	%%r0,56(%0)\n"
"	std	%%r0,64(%0)\n"		
"	std	%%r0,72(%0)\n"		
"	std	%%r0,80(%0)\n"	
"	std	%%r0,88(%0)\n"
"	std	%%r0,96(%0)\n"
"	std	%%r0,104(%0)\n"
"	std	%%r0,112(%0)\n"	
"	std	%%r0,120(%0)\n"
"	addib,>  -2,%%r1,1b\n"
"	ldo	128(%0),%0"
#else
"1:	stw	%%r0,0(%0)\n"
"	stw	%%r0,4(%0)\n"
"	stw	%%r0,8(%0)\n"
"	stw	%%r0,12(%0)\n"
"	stw	%%r0,16(%0)\n"
"	stw	%%r0,20(%0)\n"
"	stw	%%r0,24(%0)\n"
"	stw	%%r0,28(%0)\n"
"	stw	%%r0,32(%0)\n"
"	stw	%%r0,36(%0)\n"	
"	stw	%%r0,40(%0)\n"
"	stw	%%r0,44(%0)\n"
"	stw	%%r0,48(%0)\n"	
"	stw	%%r0,52(%0)\n"
"	stw	%%r0,56(%0)\n"
"	stw	%%r0,60(%0)\n"
"	addib,>  -1,%%r1,1b\n"
"	ldo	64(%0),%0"
#endif
	:		
	: "r"(vaddr) );
}

/* 
#define	INIT	1
#define	DEBUG	1
 */

int main(int argc, char * * argv, char * * env)
{
	char APage[4096];
	int i, j, k;

#if INIT
	for (i=0; i<4096; i++) {
		j = i % 26;
#if DEBUG
		printf("%d, %d\n", i, j);
#endif
		APage[i] = (char)(j+65);
#if DEBUG
		printf("APage[%d] = %d\n", i, APage[i]);
#endif
	};
	APage[4096] = '\0';
#if DEBUG
	printf("APage: %s.\n", APage);
#endif
#endif	/* INIT */
	for (i=0; i<40*4096; i++) {
		__clear_user_page_asm((unsigned long)APage);
	}

#if DEBUG
	for (i=0; i<4096; i++) {
		printf("APage[%d] = %d\n", i, APage[i]);
	}
#endif
    return 0;
}


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: clup1.c --]
[-- Type: text/x-csrc, Size: 1324 bytes --]


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

#define LP64	1 

void __clear_user_page_asm(unsigned long vaddr)
{

asm volatile ("ldi	64,%%r1\n"
#ifdef LP64
"1:	std	%%r0,0(%0)\n"		
"	std	%%r0,8(%0)\n"		
"	std	%%r0,16(%0)\n"	
"	std	%%r0,24(%0)\n"
"	std	%%r0,32(%0)\n"
"	std	%%r0,40(%0)\n"
"	std	%%r0,48(%0)\n"	
"	std	%%r0,56(%0)\n"
#else
"1:	stw	%%r0,0(%0)\n"
"	stw	%%r0,4(%0)\n"
"	stw	%%r0,8(%0)\n"
"	stw	%%r0,12(%0)\n"
"	stw	%%r0,16(%0)\n"
"	stw	%%r0,20(%0)\n"
"	stw	%%r0,24(%0)\n"
"	stw	%%r0,28(%0)\n"
"	stw	%%r0,32(%0)\n"
"	stw	%%r0,36(%0)\n"	
"	stw	%%r0,40(%0)\n"
"	stw	%%r0,44(%0)\n"
"	stw	%%r0,48(%0)\n"	
"	stw	%%r0,52(%0)\n"
"	stw	%%r0,56(%0)\n"
"	stw	%%r0,60(%0)\n"
#endif
"	addib,>  -1,%%r1,1b\n"
"	ldo	64(%0),%0"
	:		
	: "r"(vaddr) );
}

int main(int argc, char * * argv, char * * env)
{
	char APage[4096];
	int i, j, k;

#if INIT
	for (i=0; i<4096; i++) {
		j = i % 26;
#if DEBUG
		printf("%d, %d\n", i, j);
#endif
		APage[i] = (char)(j+65);
#if DEBUG
		printf("APage[%d] = %d\n", i, APage[i]);
#endif
	};
	APage[4096] = '\0';
#if DEBUG
	printf("APage: %s.\n", APage);
#endif
#endif	/* INIT */
	for (i=0; i<40*4096; i++) {
		__clear_user_page_asm((unsigned long)APage);
	}

#if DEBUG
	for (i=0; i<4096; i++) {
		printf("APage[%d] = %d\n", i, APage[i]);
	}
#endif
    return 0;
}


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: clup0.c --]
[-- Type: text/x-csrc, Size: 1330 bytes --]


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

/* #define LP64	1  */

void __clear_user_page_asm(unsigned long vaddr)
{

asm volatile ("ldi	64,%%r1\n"
#ifdef LP64
"1:	std	%%r0,0(%0)\n"		
"	std	%%r0,8(%0)\n"		
"	std	%%r0,16(%0)\n"	
"	std	%%r0,24(%0)\n"
"	std	%%r0,32(%0)\n"
"	std	%%r0,40(%0)\n"
"	std	%%r0,48(%0)\n"	
"	std	%%r0,56(%0)\n"
#else
"1:	stw	%%r0,0(%0)\n"
"	stw	%%r0,4(%0)\n"
"	stw	%%r0,8(%0)\n"
"	stw	%%r0,12(%0)\n"
"	stw	%%r0,16(%0)\n"
"	stw	%%r0,20(%0)\n"
"	stw	%%r0,24(%0)\n"
"	stw	%%r0,28(%0)\n"
"	stw	%%r0,32(%0)\n"
"	stw	%%r0,36(%0)\n"	
"	stw	%%r0,40(%0)\n"
"	stw	%%r0,44(%0)\n"
"	stw	%%r0,48(%0)\n"	
"	stw	%%r0,52(%0)\n"
"	stw	%%r0,56(%0)\n"
"	stw	%%r0,60(%0)\n"
#endif
"	addib,>  -1,%%r1,1b\n"
"	ldo	64(%0),%0"
	:		
	: "r"(vaddr) );
}

int main(int argc, char * * argv, char * * env)
{
	char APage[4096];
	int i, j, k;

#if INIT
	for (i=0; i<4096; i++) {
		j = i % 26;
#if DEBUG
		printf("%d, %d\n", i, j);
#endif
		APage[i] = (char)(j+65);
#if DEBUG
		printf("APage[%d] = %d\n", i, APage[i]);
#endif
	};
	APage[4096] = '\0';
#if DEBUG
	printf("APage: %s.\n", APage);
#endif
#endif	/* INIT */
	for (i=0; i<40*4096; i++) {
		__clear_user_page_asm((unsigned long)APage);
	}

#if DEBUG
	for (i=0; i<4096; i++) {
		printf("APage[%d] = %d\n", i, APage[i]);
	}
#endif
    return 0;
}


[-- Attachment #5: Type: text/plain, Size: 169 bytes --]

_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [parisc-linux] clerar user page test
  2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
@ 2004-12-09  7:42 ` Grant Grundler
  2004-12-09 17:32 ` Grant Grundler
  1 sibling, 0 replies; 4+ messages in thread
From: Grant Grundler @ 2004-12-09  7:42 UTC (permalink / raw)
  To: Joel Soete; +Cc: parisc-linux

On Tue, Dec 07, 2004 at 03:12:37PM +0100, Joel Soete wrote:
> Hello all,
> 
> here are the results of some clup test:
> (run on b2k running  2.6.10-rc3-pa2 64bit)
> time ./clup0; time ./clup1 ; time ./clup2
> 
> real    0m0.498s
> user    0m0.440s
> sys     0m0.014s
> 
> real    0m0.277s
> user    0m0.229s
> sys     0m0.010s
>
> real    0m0.272s
> user    0m0.227s
> sys     0m0.013s

cool - these are good results for evaluating instruction pipeline.

Unless you are continuously clearing the new pages
I would expect your test is just pounding the cache
and not real memory.

I looked over the code and wasn't sure how big the "memory footprint"
your test had. But 40*PAGESIZE didn't seem like nearly enough.

It should walk through at least 32MB of RAM to be certain it's
not touching the same cachelines over again. For PA8800 it would
need to be 128MB or something like that.

> 
> (the corresponding src are attached.
> Compile with (for remind:):
> ggc -o clup0 clup0.c
> gcc -mach=2.0 -o clup1 clup1.c
> gcc -mach=2.0 -o clup2 clup2.c)
> 
> so real benefit to use double word insn on 64bit (clup0 verusu clup1)
> 
> but not reducing the number of loop (clup1 versus clup2)

Well, that's still 5/270 or almost 2%.
Doubling the loop is worth doing IMHO in this case.

Do you also have time to add prefetching to clup2?

Look at the kernel prefetchw() implementation in include/asm/processor.h.
You want to use something that ends up looking like
	__asm__("ldd L1_CACHE_BYTES*N(%0), %%r0" : : "r" (addr));

Vary the value "N" from 2 to 8 to see what's optimal.
Prefetching too much doesn't help either.

thanks,
grant
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [parisc-linux] clerar user page test
  2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
  2004-12-09  7:42 ` Grant Grundler
@ 2004-12-09 17:32 ` Grant Grundler
  2004-12-09 18:31   ` Joel Soete
  1 sibling, 1 reply; 4+ messages in thread
From: Grant Grundler @ 2004-12-09 17:32 UTC (permalink / raw)
  To: Joel Soete; +Cc: parisc-linux

On Tue, Dec 07, 2004 at 03:12:37PM +0100, Joel Soete wrote:
> here are the results of some clup test:
...

Joel,
I also rewrote the test and parked it on:
	http://iou.parisc-linux.org/hppa/clup.c

Here's the output from a a500-6x running 2.6.10-rc1-pa12-SMP:
grundler@iodine:~$ gcc-3.3 -o clup0 clup.c
grundler@iodine:~$ gcc-3.3 -DLOOP2X -o clup1 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -o clup2 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -DLOOP2X -o clup3 clup.c
grundler@iodine:~$ gcc-3.3 -march=2.0 -DLP64 -DLOOP2X -DPREFETCH -o clup4 clup.c
grundler@iodine:~$ for i in 0 1 2 3 4; do time ./clup$i ; done 2>&1 | fgrep user 
user    0m2.393s
user    0m2.392s
user    0m1.238s
user    0m1.224s
user    0m1.225s

I've varied the prefetch values and nothing seems to be helping.
I have to wonder if it's because the kernel VM is interferring
with the test.

But doubling the stride for 64-bit is consistently ~1.8% improvement.

grant
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [parisc-linux] clerar user page test
  2004-12-09 17:32 ` Grant Grundler
@ 2004-12-09 18:31   ` Joel Soete
  0 siblings, 0 replies; 4+ messages in thread
From: Joel Soete @ 2004-12-09 18:31 UTC (permalink / raw)
  To: Grant Grundler; +Cc: parisc-linux


> I also rewrote the test and parked it on:
> 	http://iou.parisc-linux.org/hppa/clup.c
> 
Cool (thanks a lot)

> Here's the output from a a500-6x running 2.6.10-rc1-pa12-SMP:
> grundler@iodine:~$ gcc-3.3 -o clup0 clup.c
> grundler@iodine:~$ gcc-3.3 -DLOOP2X -o clup1 clup.c
> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -o clup2 clup.c
> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -DLOOP2X -o clup3 clup.c=

> grundler@iodine:~$ gcc-3.3 -march=3D2.0 -DLP64 -DLOOP2X -DPREFETCH -o c=
lup4
> clup.c
> grundler@iodine:~$ for i in 0 1 2 3 4; do time ./clup$i ; done 2>&1 |
fgrep
> user 
> user    0m2.393s
> user    0m2.392s
> user    0m1.238s
> user    0m1.224s
> user    0m1.225s
> 
> I've varied the prefetch values and nothing seems to be helping.
> I have to wonder if it's because the kernel VM is interferring
> with the test.
> 
> But doubling the stride for 64-bit is consistently ~1.8% improvement.
> 
Ok wil I prepare a patch or you?

Thx again,
    Joel

-------------------------------------------------------------------------=
--
Tiscali vous offre 3 mois d'ADSL et 3 mois de DVD gratuits...profitez-en.=
..
http://reg.tiscali.be/adsl/default.asp?lg=3DFR




_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2004-12-09 18:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-12-07 14:12 [parisc-linux] clerar user page test Joel Soete
2004-12-09  7:42 ` Grant Grundler
2004-12-09 17:32 ` Grant Grundler
2004-12-09 18:31   ` Joel Soete

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.