From: Joel Soete <soete.joel@tiscali.be>
To: Grant Grundler <grundler@parisc-linux.org>
Cc: parisc-linux <parisc-linux@lists.parisc-linux.org>
Subject: [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment (Test case)
Date: Tue, 28 Dec 2004 16:25:45 +0000 [thread overview]
Message-ID: <41D18909.9060308@tiscali.be> (raw)
In-Reply-To: <20041227073654.GI29492@colo.lackof.org>
[-- Attachment #1: Type: text/plain, Size: 2267 bytes --]
A test case may can help better to show improvement:
gcc -O2 -o cpup0 cpup0.c
gcc -march=2.0 -O2 -DLP64 -o cpup1 cpup0.c
gcc -march=2.0 -O2 -DLP64 -DV1 -o cpup2 cpup0.c
gcc -march=2.0 -O2 -DLP64 -DV2 -o cpup3 cpup0.c
Linux patst006 2.6.10-rc3-pa4-n4kmp #3 SMP Fri Dec 10 13:45:46 CET 2004 parisc64 GNU/Linux
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3
real 0m2.294s
user 0m0.226s
sys 0m2.068s
real 0m2.213s
user 0m0.140s
sys 0m2.074s
real 0m2.217s
user 0m0.108s
sys 0m2.110s
real 0m2.208s
user 0m0.108s
sys 0m2.100s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3
real 0m2.316s
user 0m0.197s
sys 0m2.119s
real 0m2.217s
user 0m0.117s
sys 0m2.101s
real 0m2.203s
user 0m0.119s
sys 0m2.084s
real 0m2.205s
user 0m0.126s
sys 0m2.079s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3
real 0m2.316s
user 0m0.194s
sys 0m2.122s
real 0m2.211s
user 0m0.126s
sys 0m2.086s
real 0m2.208s
user 0m0.106s
sys 0m2.102s
real 0m2.217s
user 0m0.113s
sys 0m2.105s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3
real 0m2.311s
user 0m0.219s
sys 0m2.093s
real 0m2.222s
user 0m0.141s
sys 0m2.082s
real 0m2.207s
user 0m0.115s
sys 0m2.093s
real 0m2.208s
user 0m0.117s
sys 0m2.091s
# time ./cpup0 ; time ./cpup1; time ./cpup2 ; time ./cpup3
real 0m2.310s
user 0m0.205s
sys 0m2.105s
real 0m2.213s
user 0m0.104s
sys 0m2.109s
real 0m2.207s
user 0m0.115s
sys 0m2.092s
real 0m2.205s
user 0m0.108s
sys 0m2.096s
I would like here to know if the order could have importance?
# time ./cpup0 ; time ./cpup1; time ./cpup3 ; time ./cpup2
real 0m2.294s
user 0m0.196s
sys 0m2.100s
real 0m2.221s
user 0m0.111s
sys 0m2.111s
real 0m2.226s
user 0m0.097s
sys 0m2.130s
real 0m2.208s
user 0m0.107s
sys 0m2.101s
# time ./cpup0 ; time ./cpup3; time ./cpup2 ; time ./cpup1
real 0m2.302s
user 0m0.200s
sys 0m2.102s
real 0m2.206s
user 0m0.110s
sys 0m2.097s
real 0m2.213s
user 0m0.108s
sys 0m2.106s
real 0m2.214s
user 0m0.123s
sys 0m2.092s
# time ./cpup3 ; time ./cpup2; time ./cpup1 ; time ./cpup0
real 0m2.209s
user 0m0.104s
sys 0m2.105s
real 0m2.221s
user 0m0.115s
sys 0m2.106s
real 0m2.227s
user 0m0.111s
sys 0m2.116s
real 0m2.296s
user 0m0.212s
sys 0m2.085s
May be more improvement in 'more register used' (i.e. V2 and cpup3)?
Joel
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: cpup0.c --]
[-- Type: text/x-csrc; name="cpup0.c", Size: 8594 bytes --]
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <asm/page.h>
void __copy_user_page_asm(void *to, void *from)
{
register unsigned long __to __asm__ ("r26") = (unsigned long)to;
register unsigned long __from __asm__ ("r25") = (unsigned long)from;
#ifdef LP64
asm volatile ("ldi 32, %%r1\n" /* PAGE_SIZE/128 == 32 */
#if V2
"1: ldd 0(%0), %%r19\n"
" ldd 8(%0), %%r20\n"
" ldd 16(%0), %%r21\n"
" ldd 24(%0), %%r22\n"
" std %%r19, 0(%1)\n"
" std %%r20, 8(%1)\n"
" ldd 32(%0), %%r23\n"
" ldd 40(%0), %%r24\n"
" std %%r21, 16(%1)\n"
" std %%r22, 24(%1)\n"
" ldd 48(%0), %%r19\n"
" ldd 56(%0), %%r20\n"
" std %%r23, 32(%1)\n"
" std %%r24, 40(%1)\n"
" ldd 64(%0), %%r21\n"
" ldd 72(%0), %%r22\n"
" std %%r19, 48(%1)\n"
" std %%r20, 56(%1)\n"
" ldd 80(%0), %%r23\n"
" ldd 88(%0), %%r24\n"
" std %%r21, 64(%1)\n"
" std %%r22, 72(%1)\n"
" ldd 96(%0), %%r19\n"
" ldd 104(%0), %%r20\n"
" std %%r23, 80(%1)\n"
" std %%r24, 88(%1)\n"
" ldd 112(%0), %%r21\n"
" ldd 120(%0), %%r22\n"
" std %%r19, 96(%1)\n"
" std %%r20, 104(%1)\n"
" ldo 128(%0), %0\n"
" std %%r21, 112(%1)\n"
" std %%r22, 120(%1)\n"
" addib,> -1, %%r1, 1b\n"
" ldo 128(%1), %1"
#else /* !V2 */
"1: ldd 0(%0), %%r19\n"
" ldd 8(%0), %%r20\n"
" ldd 16(%0), %%r21\n"
" ldd 24(%0), %%r22\n"
" std %%r19, 0(%1)\n"
" std %%r20, 8(%1)\n"
#ifndef V1
" std %%r21, 16(%1)\n"
" std %%r22, 24(%1)\n"
" ldd 32(%0), %%r19\n"
" ldd 40(%0), %%r20\n"
" ldd 48(%0), %%r21\n"
" ldd 56(%0), %%r22\n"
" std %%r19, 32(%1)\n"
" std %%r20, 40(%1)\n"
" std %%r21, 48(%1)\n"
" std %%r22, 56(%1)\n"
" ldd 64(%0), %%r19\n"
" ldd 72(%0), %%r20\n"
" ldd 80(%0), %%r21\n"
" ldd 88(%0), %%r22\n"
" std %%r19, 64(%1)\n"
" std %%r20, 72(%1)\n"
" std %%r21, 80(%1)\n"
" std %%r22, 88(%1)\n"
" ldd 96(%0), %%r19\n"
" ldd 104(%0), %%r20\n"
" ldd 112(%0), %%r21\n"
" ldd 120(%0), %%r22\n"
" std %%r19, 96(%1)\n"
" std %%r20, 104(%1)\n"
" std %%r21, 112(%1)\n"
" std %%r22, 120(%1)\n"
" ldo 128(%1), %1\n"
" addib,> -1, %%r1, 1b\n"
" ldo 128(%0), %0"
#else /* V1 */
" ldd 32(%0), %%r19\n"
" ldd 40(%0), %%r20\n"
" std %%r21, 16(%1)\n"
" std %%r22, 24(%1)\n"
" ldd 48(%0), %%r21\n"
" ldd 56(%0), %%r22\n"
" std %%r19, 32(%1)\n"
" std %%r20, 40(%1)\n"
" ldd 64(%0), %%r19\n"
" ldd 72(%0), %%r20\n"
" std %%r21, 48(%1)\n"
" std %%r22, 56(%1)\n"
" ldd 80(%0), %%r21\n"
" ldd 88(%0), %%r22\n"
" std %%r19, 64(%1)\n"
" std %%r20, 72(%1)\n"
" ldd 96(%0), %%r19\n"
" ldd 104(%0), %%r20\n"
" std %%r21, 80(%1)\n"
" std %%r22, 88(%1)\n"
" ldd 112(%0), %%r21\n"
" ldd 120(%0), %%r22\n"
" std %%r19, 96(%1)\n"
" std %%r20, 104(%1)\n"
" ldo 128(%0), %0\n"
" std %%r21, 112(%1)\n"
" std %%r22, 120(%1)\n"
" addib,> -1, %%r1, 1b\n"
" ldo 128(%1), %1"
#endif /* V1 */
#endif /* 0 */
#else /* !__LP64__ */
asm volatile ("ldi 64, %%r1\n"
"1: ldw 0(%0), %%r19\n"
" ldw 4(%0), %%r20\n"
" ldw 8(%0), %%r21\n"
" ldw 12(%0), %%r22\n"
" stw %%r19, 0(%1)\n"
" stw %%r20, 4(%1)\n"
" stw %%r21, 8(%1)\n"
" stw %%r22, 12(%1)\n"
" ldw 16(%0), %%r19\n"
" ldw 20(%0), %%r20\n"
" ldw 24(%0), %%r21\n"
" ldw 28(%0), %%r22\n"
" stw %%r19, 16(%1)\n"
" stw %%r20, 20(%1)\n"
" stw %%r21, 24(%1)\n"
" stw %%r22, 28(%1)\n"
" ldw 32(%0), %%r19\n"
" ldw 36(%0), %%r20\n"
" ldw 40(%0), %%r21\n"
" ldw 44(%0), %%r22\n"
" stw %%r19, 32(%1)\n"
" stw %%r20, 36(%1)\n"
" stw %%r21, 40(%1)\n"
" stw %%r22, 44(%1)\n"
" ldw 48(%0), %%r19\n"
" ldw 52(%0), %%r20\n"
" ldw 56(%0), %%r21\n"
" ldw 60(%0), %%r22\n"
" stw %%r19, 48(%1)\n"
" stw %%r20, 52(%1)\n"
" stw %%r21, 56(%1)\n"
" stw %%r22, 60(%1)\n"
" ldo 64(%1), %1\n"
" addib,> -1, %%r1, 1b\n"
" ldo 64(%0), %0"
#endif /* __LP64__ */
:
: "r"(__from), "r"(__to) );
}
/*
#define INIT 1
#define DEBUG 1
*/
#define BUFFSIZE (1024*1024*256)
#define PPB (BUFFSIZE/PAGE_SIZE) /* Pages Per Buff */
int main(int argc, char * * argv, char * * env)
{
char MemSrc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn" ;
char *MemDst;
int i, j, k;
MemDst = malloc(BUFFSIZE);
for (j = 0; j < PPB ; j++) {
__copy_user_page_asm(MemDst+(j*PAGE_SIZE), MemSrc);
}
MemDst[BUFFSIZE] = '\0';
#if DEBUG
/*
printf("MemDst = %s\n", MemDst);
*/
for (i=0; i<BUFFSIZE; i++) {
printf("MemDst[%d] = %c\n", i, MemDst[i]);
}
#endif
return 0;
}
[-- Attachment #3: Type: text/plain, Size: 169 bytes --]
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux
next prev parent reply other threads:[~2004-12-28 16:25 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <418A80E8000124B5@mail-6-bnl.tiscali.it>
2004-12-27 7:36 ` copy_user_page_asm suggested 64bit improvment [Was: [parisc-linux] clear user page test] Grant Grundler
2004-12-27 10:40 ` Joel Soete
2004-12-27 15:08 ` James Bottomley
2004-12-31 20:26 ` Michael S. Zick
2004-12-31 20:56 ` Grant Grundler
2004-12-31 21:35 ` Michael S. Zick
[not found] ` <20041231225447.GC23592@colo.lackof.org>
2004-12-31 23:56 ` Michael S. Zick
2005-01-12 13:52 ` Michael S. Zick
2005-01-12 15:32 ` Joel Soete
2004-12-31 21:21 ` James Bottomley
2004-12-27 17:34 ` Joel Soete
2004-12-27 18:32 ` Joel Soete
2004-12-28 16:25 ` Joel Soete [this message]
2004-12-29 5:46 ` [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment (Test case) Grant Grundler
2004-12-29 11:36 ` Joel Soete
2004-12-30 8:10 ` copy_user_page_asm suggested 64bit improvment [Was: [parisc-linux] clear user page test] Grant Grundler
2004-12-30 17:04 ` [parisc-linux] Re: copy_user_page_asm suggested 64bit improvment [Was: [parisc-l John David Anglin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41D18909.9060308@tiscali.be \
--to=soete.joel@tiscali.be \
--cc=grundler@parisc-linux.org \
--cc=parisc-linux@lists.parisc-linux.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.