All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joel Soete <soete.joel@tiscali.be>
To: Grant Grundler <grundler@parisc-linux.org>
Cc: parisc-linux <parisc-linux@lists.parisc-linux.org>
Subject: [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment (Test case)
Date: Tue, 28 Dec 2004 16:25:45 +0000	[thread overview]
Message-ID: <41D18909.9060308@tiscali.be> (raw)
In-Reply-To: <20041227073654.GI29492@colo.lackof.org>

[-- Attachment #1: Type: text/plain, Size: 2267 bytes --]

A test case may can help better to show improvement:

gcc -O2 -o cpup0 cpup0.c
gcc -march=2.0 -O2 -DLP64 -o cpup1 cpup0.c
gcc -march=2.0 -O2 -DLP64 -DV1 -o cpup2 cpup0.c
gcc -march=2.0 -O2 -DLP64 -DV2 -o cpup3 cpup0.c

Linux patst006 2.6.10-rc3-pa4-n4kmp #3 SMP Fri Dec 10 13:45:46 CET 2004 parisc64 GNU/Linux

# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3

real	0m2.294s
user	0m0.226s
sys	0m2.068s

real	0m2.213s
user	0m0.140s
sys	0m2.074s

real	0m2.217s
user	0m0.108s
sys	0m2.110s

real	0m2.208s
user	0m0.108s
sys	0m2.100s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3

real	0m2.316s
user	0m0.197s
sys	0m2.119s

real	0m2.217s
user	0m0.117s
sys	0m2.101s

real	0m2.203s
user	0m0.119s
sys	0m2.084s

real	0m2.205s
user	0m0.126s
sys	0m2.079s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3

real	0m2.316s
user	0m0.194s
sys	0m2.122s

real	0m2.211s
user	0m0.126s
sys	0m2.086s

real	0m2.208s
user	0m0.106s
sys	0m2.102s

real	0m2.217s
user	0m0.113s
sys	0m2.105s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3

real	0m2.311s
user	0m0.219s
sys	0m2.093s

real	0m2.222s
user	0m0.141s
sys	0m2.082s

real	0m2.207s
user	0m0.115s
sys	0m2.093s

real	0m2.208s
user	0m0.117s
sys	0m2.091s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3

real	0m2.310s
user	0m0.205s
sys	0m2.105s

real	0m2.213s
user	0m0.104s
sys	0m2.109s

real	0m2.207s
user	0m0.115s
sys	0m2.092s

real	0m2.205s
user	0m0.108s
sys	0m2.096s

I would like here to know if the order could have importance?

# time ./cpup0 ; time ./cpup1; time ./cpup3 ; time ./cpup2

real	0m2.294s
user	0m0.196s
sys	0m2.100s

real	0m2.221s
user	0m0.111s
sys	0m2.111s

real	0m2.226s
user	0m0.097s
sys	0m2.130s

real	0m2.208s
user	0m0.107s
sys	0m2.101s
# time ./cpup0 ; time ./cpup3; time ./cpup2 ; time ./cpup1

real	0m2.302s
user	0m0.200s
sys	0m2.102s

real	0m2.206s
user	0m0.110s
sys	0m2.097s

real	0m2.213s
user	0m0.108s
sys	0m2.106s

real	0m2.214s
user	0m0.123s
sys	0m2.092s
# time ./cpup3 ; time ./cpup2; time ./cpup1 ; time ./cpup0

real	0m2.209s
user	0m0.104s
sys	0m2.105s

real	0m2.221s
user	0m0.115s
sys	0m2.106s

real	0m2.227s
user	0m0.111s
sys	0m2.116s

real	0m2.296s
user	0m0.212s
sys	0m2.085s

May be more improvement in 'more register used' (i.e. V2 and cpup3)?

Joel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: cpup0.c --]
[-- Type: text/x-csrc; name="cpup0.c", Size: 8594 bytes --]


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <asm/page.h>

void __copy_user_page_asm(void *to, void *from)
{
	register unsigned long __to __asm__ ("r26") =  (unsigned long)to;
	register unsigned long __from __asm__ ("r25") =  (unsigned long)from;

#ifdef LP64

asm volatile ("ldi		32, %%r1\n"	/* PAGE_SIZE/128 == 32 */
#if V2
"1:	ldd		0(%0), %%r19\n"
"	ldd		8(%0), %%r20\n"
"	ldd		16(%0), %%r21\n"
"	ldd		24(%0), %%r22\n"
"	std		%%r19, 0(%1)\n"
"	std		%%r20, 8(%1)\n"
"	ldd		32(%0), %%r23\n"
"	ldd		40(%0), %%r24\n"
"	std		%%r21, 16(%1)\n"
"	std		%%r22, 24(%1)\n"
"	ldd		48(%0), %%r19\n"
"	ldd		56(%0), %%r20\n"
"	std		%%r23, 32(%1)\n"
"	std		%%r24, 40(%1)\n"
"	ldd		64(%0), %%r21\n"
"	ldd		72(%0), %%r22\n"
"	std		%%r19, 48(%1)\n"
"	std		%%r20, 56(%1)\n"
"	ldd		80(%0), %%r23\n"
"	ldd		88(%0), %%r24\n"
"	std		%%r21, 64(%1)\n"
"	std		%%r22, 72(%1)\n"
"	ldd		96(%0), %%r19\n"
"	ldd		104(%0), %%r20\n"
"	std		%%r23, 80(%1)\n"
"	std		%%r24, 88(%1)\n"
"	ldd		112(%0), %%r21\n"
"	ldd		120(%0), %%r22\n"
"	std		%%r19, 96(%1)\n"
"	std		%%r20, 104(%1)\n"
"	ldo		128(%0), %0\n"
"	std		%%r21, 112(%1)\n"
"	std		%%r22, 120(%1)\n"
"	addib,>		-1, %%r1, 1b\n"
"	ldo		128(%1), %1"
#else	/* !V2 */ 
"1:	ldd		0(%0), %%r19\n"
"	ldd		8(%0), %%r20\n"
"	ldd		16(%0), %%r21\n"
"	ldd		24(%0), %%r22\n"
"	std		%%r19, 0(%1)\n"
"	std		%%r20, 8(%1)\n"
#ifndef V1
"	std		%%r21, 16(%1)\n"
"	std		%%r22, 24(%1)\n"
"	ldd		32(%0), %%r19\n"
"	ldd		40(%0), %%r20\n"
"	ldd		48(%0), %%r21\n"
"	ldd		56(%0), %%r22\n"
"	std		%%r19, 32(%1)\n"
"	std		%%r20, 40(%1)\n"
"	std		%%r21, 48(%1)\n"
"	std		%%r22, 56(%1)\n"
"	ldd		64(%0), %%r19\n"
"	ldd		72(%0), %%r20\n"
"	ldd		80(%0), %%r21\n"
"	ldd		88(%0), %%r22\n"
"	std		%%r19, 64(%1)\n"
"	std		%%r20, 72(%1)\n"
"	std		%%r21, 80(%1)\n"
"	std		%%r22, 88(%1)\n"
"	ldd		96(%0), %%r19\n"
"	ldd		104(%0), %%r20\n"
"	ldd		112(%0), %%r21\n"
"	ldd		120(%0), %%r22\n"
"	std		%%r19, 96(%1)\n"
"	std		%%r20, 104(%1)\n"
"	std		%%r21, 112(%1)\n"
"	std		%%r22, 120(%1)\n"
"	ldo		128(%1), %1\n"
"	addib,>		-1, %%r1, 1b\n"
"	ldo		128(%0), %0"
#else	/* V1 */
"	ldd		32(%0), %%r19\n"
"	ldd		40(%0), %%r20\n"
"	std		%%r21, 16(%1)\n"
"	std		%%r22, 24(%1)\n"
"	ldd		48(%0), %%r21\n"
"	ldd		56(%0), %%r22\n"
"	std		%%r19, 32(%1)\n"
"	std		%%r20, 40(%1)\n"
"	ldd		64(%0), %%r19\n"
"	ldd		72(%0), %%r20\n"
"	std		%%r21, 48(%1)\n"
"	std		%%r22, 56(%1)\n"
"	ldd		80(%0), %%r21\n"
"	ldd		88(%0), %%r22\n"
"	std		%%r19, 64(%1)\n"
"	std		%%r20, 72(%1)\n"
"	ldd		96(%0), %%r19\n"
"	ldd		104(%0), %%r20\n"
"	std		%%r21, 80(%1)\n"
"	std		%%r22, 88(%1)\n"
"	ldd		112(%0), %%r21\n"
"	ldd		120(%0), %%r22\n"
"	std		%%r19, 96(%1)\n"
"	std		%%r20, 104(%1)\n"
"	ldo		128(%0), %0\n"
"	std		%%r21, 112(%1)\n"
"	std		%%r22, 120(%1)\n"
"	addib,>		-1, %%r1, 1b\n"
"	ldo		128(%1), %1"
#endif	/* V1 */

#endif	/* 0 */

#else	/* !__LP64__ */

asm volatile ("ldi		64, %%r1\n"
"1:	ldw		0(%0), %%r19\n"
"	ldw		4(%0), %%r20\n"
"	ldw		8(%0), %%r21\n"
"	ldw		12(%0), %%r22\n"
"	stw		%%r19, 0(%1)\n"
"	stw		%%r20, 4(%1)\n"
"	stw		%%r21, 8(%1)\n"
"	stw		%%r22, 12(%1)\n"
"	ldw		16(%0), %%r19\n"
"	ldw		20(%0), %%r20\n"
"	ldw		24(%0), %%r21\n"
"	ldw		28(%0), %%r22\n"
"	stw		%%r19, 16(%1)\n"
"	stw		%%r20, 20(%1)\n"
"	stw		%%r21, 24(%1)\n"
"	stw		%%r22, 28(%1)\n"
"	ldw		32(%0), %%r19\n"
"	ldw		36(%0), %%r20\n"
"	ldw		40(%0), %%r21\n"
"	ldw		44(%0), %%r22\n"
"	stw		%%r19, 32(%1)\n"
"	stw		%%r20, 36(%1)\n"
"	stw		%%r21, 40(%1)\n"
"	stw		%%r22, 44(%1)\n"
"	ldw		48(%0), %%r19\n"
"	ldw		52(%0), %%r20\n"
"	ldw		56(%0), %%r21\n"
"	ldw		60(%0), %%r22\n"
"	stw		%%r19, 48(%1)\n"
"	stw		%%r20, 52(%1)\n"
"	stw		%%r21, 56(%1)\n"
"	stw		%%r22, 60(%1)\n"
"	ldo		64(%1), %1\n"
"	addib,>		-1, %%r1, 1b\n"
"	ldo		64(%0), %0"
#endif	/* __LP64__ */
	:		
	: "r"(__from), "r"(__to) );
}

/* 
#define	INIT	1
#define	DEBUG	1
 */

#define BUFFSIZE	(1024*1024*256)
#define PPB		(BUFFSIZE/PAGE_SIZE)	/* Pages Per Buff */


int main(int argc, char * * argv, char * * env)
{
	char MemSrc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn" ;

	char *MemDst;
	int i, j, k;


	MemDst = malloc(BUFFSIZE);
	for (j = 0; j < PPB ; j++) {
		__copy_user_page_asm(MemDst+(j*PAGE_SIZE), MemSrc);
	}

	MemDst[BUFFSIZE] = '\0';

#if DEBUG
/*
	printf("MemDst = %s\n", MemDst);
 */
	for (i=0; i<BUFFSIZE; i++) {
		printf("MemDst[%d] = %c\n", i, MemDst[i]);
	}
#endif
	return 0;
}


[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

  parent reply	other threads:[~2004-12-28 16:25 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <418A80E8000124B5@mail-6-bnl.tiscali.it>
2004-12-27  7:36 ` copy_user_page_asm suggested 64bit improvment [Was: [parisc-linux] clear user page test] Grant Grundler
2004-12-27 10:40   ` Joel Soete
2004-12-27 15:08     ` James Bottomley
2004-12-31 20:26       ` Michael S. Zick
2004-12-31 20:56         ` Grant Grundler
2004-12-31 21:35           ` Michael S. Zick
     [not found]             ` <20041231225447.GC23592@colo.lackof.org>
2004-12-31 23:56               ` Michael S. Zick
2005-01-12 13:52               ` Michael S. Zick
2005-01-12 15:32                 ` Joel Soete
2004-12-31 21:21         ` James Bottomley
2004-12-27 17:34     ` Joel Soete
2004-12-27 18:32     ` Joel Soete
2004-12-28 16:25   ` Joel Soete [this message]
2004-12-29  5:46     ` [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment (Test case) Grant Grundler
2004-12-29 11:36       ` Joel Soete
2004-12-30  8:10   ` copy_user_page_asm suggested 64bit improvment [Was: [parisc-linux] clear user page test] Grant Grundler
2004-12-30 17:04     ` [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment [Was: [parisc-l John David Anglin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41D18909.9060308@tiscali.be \
    --to=soete.joel@tiscali.be \
    --cc=grundler@parisc-linux.org \
    --cc=parisc-linux@lists.parisc-linux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.