From: Padraig@Linux.ie
To: root@chaos.analogic.com
Cc: linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: gcc 2.95 vs 3.21 performance
Date: Tue, 04 Feb 2003 10:57:06 +0000 [thread overview]
Message-ID: <3E3F9C82.7000607@Linux.ie> (raw)
In-Reply-To: <Pine.LNX.3.95.1030203182417.7651A-100000@chaos.analogic.com>
[-- Attachment #1: Type: text/plain, Size: 1156 bytes --]
Richard B. Johnson wrote:
> On Mon, 3 Feb 2003, Martin J. Bligh wrote:
>
>>People keep extolling the virtues of gcc 3.2 to me, which I'm
>>reluctant to switch to, since it compiles so much slower. But
>>it supposedly generates better code, so I thought I'd compile
>>the kernel with both and compare the results. This is gcc 2.95
>>and 3.2.1 from debian unstable on a 16-way NUMA-Q. The kernbench
>>tests still use 2.95 for the compile-time stuff.
>>
>
> [SNIPPED tests...]
>
> Don't let this get out, but egcs-2.91.66 compiled FFT code
> works about 50 percent of the speed of whatever M$ uses for
> Visual C++ Version 6.0
Interesting. I just noticed that I get 50% decrease in
the speed of my program if I just insert a printf(). I.E.
my program is like:
printf()
for(;;) {
do_sorting_loop_test();
}
If I remove the initial printf it doubles in speed?
I assume this is some weird caching thing?
gcc is 3.2.1 (same happens for 2.95..)
<boggle>
Note this is with -O3. If I don't specify -O then
leaving the printf in speeds things up by about 15%
</boggle>
attached is the assembly for the slow and fast
in case anyone's interested.
Pádraig.
[-- Attachment #2: slow.s --]
[-- Type: text/plain, Size: 4466 bytes --]
.file "testfunc.c"
.globl TEST_NUMBER
.data
.align 2
.type TEST_NUMBER,@object
.size TEST_NUMBER,2
TEST_NUMBER:
.value 256
.globl count
.align 4
.type count,@object
.size count,4
count:
.long 0
.globl exit_flag
.align 4
.type exit_flag,@object
.size exit_flag,4
exit_flag:
.long 0
.align 4
.type throttle_print.0,@object
.size throttle_print.0,4
throttle_print.0:
.long 0
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "\033[H\033[2J"
.section .rodata.str1.32,"aMS",@progbits,1
.align 32
.LC3:
.string "\nAdding & dropping random array elements,(from a set of 000..%03u)\n"
.section .rodata.str1.1
.LC4:
.string "Ctrl C to exit"
.section .rodata.str1.32
.align 32
.LC1:
.string "\n%lu array elements randomly dropped and added in %lus"
.align 32
.LC2:
.string " (%lu/s)\n \n"
.text
.p2align 2,,3
.globl main
.type main,@function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $12, %esp
andl $-16, %esp
cmpl $1, 8(%ebp)
movl $1, %edi
jle .L2
pushl $0
pushl $10
pushl $0
movl 12(%ebp), %eax
pushl 4(%eax)
call __strtol_internal
addl $16, %esp
testl %eax, %eax
jle .L2
movw %ax, TEST_NUMBER
.L2:
subl $12, %esp
pushl $.LC0
call printf
popl %eax
pushl stdout
call fflush
movzwl TEST_NUMBER, %edx
sall $1, %edx
movl %edx, (%esp)
call malloc
movl %eax, %esi
movl $0, (%esp)
call time
popl %ebx
movl %eax, start
popl %eax
pushl $exit_info_sig
pushl $2
call signal
xorl %edx, %edx
movw TEST_NUMBER, %cx
addl $16, %esp
cmpw %cx, %dx
jae .L24
.L10:
movzwl %dx, %ebx
movw %dx, (%esi,%ebx,2)
incl %edx
cmpw %cx, %dx
jb .L10
.p2align 2,,3
.L24:
incl count
call rand
movw TEST_NUMBER, %bx
movzwl %bx, %edx
movl %edx, %ecx
cltd
idivl %ecx
cmpw %bx, %dx
movl %edx, %ecx
jae .L27
.p2align 2,,3
.L18:
movzwl %cx, %edx
incl %ecx
movw (%esi,%edx,2), %ax
cmpw %bx, %cx
movw %ax, -2(%esi,%edx,2)
jb .L18
.L27:
leal -1(%ebx), %ecx
subl $8, %esp
movzwl %cx, %edx
pushl %edx
pushl %esi
call GetLowestValueAvailable
movzwl TEST_NUMBER, %edx
movw %ax, -2(%esi,%edx,2)
movl exit_flag, %eax
addl $16, %esp
testl %eax, %eax
jne .L28
testl %edi, %edi
je .L24
subl $8, %esp
leal -1(%edx), %ebx
pushl %ebx
pushl $.LC3
call printf
xorl %edi, %edi
movl $.LC4, (%esp)
call puts
addl $16, %esp
jmp .L24
.L28:
subl $12, %esp
pushl $0
call time
movl %eax, %esi
addl $12, %esp
subl start, %esi
pushl %esi
pushl count
pushl $.LC1
call printf
popl %eax
popl %edx
movl count, %eax
xorl %edx, %edx
divl %esi
pushl %eax
pushl $.LC2
call printf
movl $1, (%esp)
call exit
.Lfe1:
.size main,.Lfe1-main
.p2align 2,,3
.globl RemoveNumber
.type RemoveNumber,@function
RemoveNumber:
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %ecx
cmpw TEST_NUMBER, %cx
pushl %ebx
movl 8(%ebp), %ebx
jae .L69
.p2align 2,,3
.L67:
movzwl %cx, %edx
movw (%ebx,%edx,2), %ax
movw %ax, -2(%ebx,%edx,2)
incl %ecx
cmpw TEST_NUMBER, %cx
jb .L67
.L69:
popl %ebx
leave
ret
.Lfe2:
.size RemoveNumber,.Lfe2-RemoveNumber
.section .rodata.str1.1
.LC5:
.string "\033[H"
.LC6:
.string "%03d "
.text
.p2align 2,,3
.globl printArray
.type printArray,@function
printArray:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
subl $12, %esp
pushl $.LC5
movl 8(%ebp), %esi
call printf
popl %eax
pushl stdout
xorl %ebx, %ebx
call fflush
addl $16, %esp
cmpw TEST_NUMBER, %bx
jb .L75
.L77:
leal -8(%ebp), %esp
popl %ebx
popl %esi
leave
ret
.p2align 2,,3
.L75:
movzwl %bx, %ecx
subl $8, %esp
movzwl (%esi,%ecx,2), %edx
pushl %edx
pushl $.LC6
incl %ebx
call printf
addl $16, %esp
cmpw TEST_NUMBER, %bx
jb .L75
jmp .L77
.Lfe3:
.size printArray,.Lfe3-printArray
.p2align 2,,3
.globl exit_info
.type exit_info,@function
exit_info:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $16, %esp
pushl $0
call time
movl %eax, %ebx
addl $12, %esp
subl start, %ebx
pushl %ebx
pushl count
pushl $.LC1
call printf
popl %eax
popl %edx
movl count, %eax
xorl %edx, %edx
divl %ebx
pushl %eax
pushl $.LC2
call printf
movl $1, (%esp)
call exit
.Lfe4:
.size exit_info,.Lfe4-exit_info
.p2align 2,,3
.globl exit_info_sig
.type exit_info_sig,@function
exit_info_sig:
pushl %ebp
movl %esp, %ebp
movl $1, exit_flag
leave
ret
.Lfe5:
.size exit_info_sig,.Lfe5-exit_info_sig
.comm start,4,4
.ident "GCC: (GNU) 3.2.1 20021207 (Red Hat Linux 8.0 3.2.1-2)"
[-- Attachment #3: fast.s --]
[-- Type: text/plain, Size: 4339 bytes --]
.file "testfunc.c"
.globl TEST_NUMBER
.data
.align 2
.type TEST_NUMBER,@object
.size TEST_NUMBER,2
TEST_NUMBER:
.value 256
.globl count
.align 4
.type count,@object
.size count,4
count:
.long 0
.globl exit_flag
.align 4
.type exit_flag,@object
.size exit_flag,4
exit_flag:
.long 0
.align 4
.type throttle_print.0,@object
.size throttle_print.0,4
throttle_print.0:
.long 0
.section .rodata.str1.32,"aMS",@progbits,1
.align 32
.LC2:
.string "\nAdding & dropping random array elements,(from a set of 000..%03u)\n"
.section .rodata.str1.1,"aMS",@progbits,1
.LC3:
.string "Ctrl C to exit"
.section .rodata.str1.32
.align 32
.LC0:
.string "\n%lu array elements randomly dropped and added in %lus"
.align 32
.LC1:
.string " (%lu/s)\n \n"
.text
.p2align 2,,3
.globl main
.type main,@function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $12, %esp
andl $-16, %esp
cmpl $1, 8(%ebp)
movl $1, %edi
jle .L2
pushl $0
pushl $10
pushl $0
movl 12(%ebp), %eax
pushl 4(%eax)
call __strtol_internal
addl $16, %esp
testl %eax, %eax
jle .L2
movw %ax, TEST_NUMBER
.L2:
movzwl TEST_NUMBER, %edx
subl $12, %esp
sall $1, %edx
pushl %edx
call malloc
movl %eax, %esi
movl $0, (%esp)
call time
popl %ebx
movl %eax, start
popl %eax
pushl $exit_info_sig
pushl $2
call signal
xorl %edx, %edx
movw TEST_NUMBER, %cx
addl $16, %esp
cmpw %cx, %dx
jae .L24
.L10:
movzwl %dx, %ebx
movw %dx, (%esi,%ebx,2)
incl %edx
cmpw %cx, %dx
jb .L10
.p2align 2,,3
.L24:
incl count
call rand
movw TEST_NUMBER, %bx
movzwl %bx, %edx
movl %edx, %ecx
cltd
idivl %ecx
cmpw %bx, %dx
movl %edx, %ecx
jae .L27
.p2align 2,,3
.L18:
movzwl %cx, %edx
incl %ecx
movw (%esi,%edx,2), %ax
cmpw %bx, %cx
movw %ax, -2(%esi,%edx,2)
jb .L18
.L27:
leal -1(%ebx), %ecx
subl $8, %esp
movzwl %cx, %edx
pushl %edx
pushl %esi
call GetLowestValueAvailable
movzwl TEST_NUMBER, %edx
movw %ax, -2(%esi,%edx,2)
movl exit_flag, %eax
addl $16, %esp
testl %eax, %eax
jne .L28
testl %edi, %edi
je .L24
subl $8, %esp
leal -1(%edx), %ebx
pushl %ebx
pushl $.LC2
call printf
xorl %edi, %edi
movl $.LC3, (%esp)
call puts
addl $16, %esp
jmp .L24
.L28:
subl $12, %esp
pushl $0
call time
movl %eax, %esi
addl $12, %esp
subl start, %esi
pushl %esi
pushl count
pushl $.LC0
call printf
popl %eax
popl %edx
movl count, %eax
xorl %edx, %edx
divl %esi
pushl %eax
pushl $.LC1
call printf
movl $1, (%esp)
call exit
.Lfe1:
.size main,.Lfe1-main
.p2align 2,,3
.globl RemoveNumber
.type RemoveNumber,@function
RemoveNumber:
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %ecx
cmpw TEST_NUMBER, %cx
pushl %ebx
movl 8(%ebp), %ebx
jae .L69
.p2align 2,,3
.L67:
movzwl %cx, %edx
movw (%ebx,%edx,2), %ax
movw %ax, -2(%ebx,%edx,2)
incl %ecx
cmpw TEST_NUMBER, %cx
jb .L67
.L69:
popl %ebx
leave
ret
.Lfe2:
.size RemoveNumber,.Lfe2-RemoveNumber
.section .rodata.str1.1
.LC4:
.string "\033[H"
.LC5:
.string "%03d "
.text
.p2align 2,,3
.globl printArray
.type printArray,@function
printArray:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
subl $12, %esp
pushl $.LC4
movl 8(%ebp), %esi
call printf
popl %eax
pushl stdout
xorl %ebx, %ebx
call fflush
addl $16, %esp
cmpw TEST_NUMBER, %bx
jb .L75
.L77:
leal -8(%ebp), %esp
popl %ebx
popl %esi
leave
ret
.p2align 2,,3
.L75:
movzwl %bx, %ecx
subl $8, %esp
movzwl (%esi,%ecx,2), %edx
pushl %edx
pushl $.LC5
incl %ebx
call printf
addl $16, %esp
cmpw TEST_NUMBER, %bx
jb .L75
jmp .L77
.Lfe3:
.size printArray,.Lfe3-printArray
.p2align 2,,3
.globl exit_info
.type exit_info,@function
exit_info:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $16, %esp
pushl $0
call time
movl %eax, %ebx
addl $12, %esp
subl start, %ebx
pushl %ebx
pushl count
pushl $.LC0
call printf
popl %eax
popl %edx
movl count, %eax
xorl %edx, %edx
divl %ebx
pushl %eax
pushl $.LC1
call printf
movl $1, (%esp)
call exit
.Lfe4:
.size exit_info,.Lfe4-exit_info
.p2align 2,,3
.globl exit_info_sig
.type exit_info_sig,@function
exit_info_sig:
pushl %ebp
movl %esp, %ebp
movl $1, exit_flag
leave
ret
.Lfe5:
.size exit_info_sig,.Lfe5-exit_info_sig
.comm start,4,4
.ident "GCC: (GNU) 3.2.1 20021207 (Red Hat Linux 8.0 3.2.1-2)"
next prev parent reply other threads:[~2003-02-04 10:51 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-02-03 23:05 gcc 2.95 vs 3.21 performance Martin J. Bligh
2003-02-03 23:22 ` [Lse-tech] " Andi Kleen
2003-02-03 23:31 ` Richard B. Johnson
2003-02-04 0:43 ` J.A. Magallon
2003-02-04 13:42 ` Richard B. Johnson
2003-02-04 14:20 ` John Bradford
2003-02-04 6:54 ` Denis Vlasenko
2003-02-04 7:13 ` Martin J. Bligh
2003-02-04 12:25 ` Adrian Bunk
2003-02-04 15:51 ` Martin J. Bligh
2003-02-04 16:27 ` [Lse-tech] " Martin J. Bligh
2003-02-04 17:40 ` Patrick Mansfield
2003-02-04 17:55 ` Martin J. Bligh
2003-02-04 9:54 ` Bryan Andersen
2003-02-04 15:46 ` Martin J. Bligh
2003-02-04 19:09 ` Timothy D. Witham
2003-02-04 19:35 ` John Bradford
2003-02-04 19:44 ` Dave Jones
2003-02-04 20:11 ` John Bradford
2003-02-04 20:20 ` John Bradford
2003-02-04 20:45 ` Herman Oosthuysen
2003-02-04 21:44 ` Timothy D. Witham
2003-02-05 7:15 ` Denis Vlasenko
2003-02-05 10:36 ` Andreas Schwab
2003-02-05 11:41 ` Denis Vlasenko
2003-02-05 12:20 ` Dave Jones
2003-02-05 13:10 ` [Lse-tech] " Dipankar Sarma
2003-02-05 15:30 ` Martin J. Bligh
2003-02-04 21:38 ` Linus Torvalds
2003-02-04 21:54 ` John Bradford
2003-02-04 22:11 ` Linus Torvalds
2003-02-04 23:27 ` Timothy D. Witham
2003-02-04 23:21 ` Larry McVoy
2003-02-04 23:42 ` b_adlakha
2003-02-05 0:19 ` Andy Pfiffer
2003-02-04 23:51 ` Jakob Oestergaard
2003-02-05 1:03 ` Hugo Mills
2003-02-10 22:26 ` Andrea Arcangeli
2003-02-10 23:28 ` J.A. Magallon
2003-02-04 23:51 ` Eli Carter
2003-02-05 0:27 ` Larry McVoy
2003-02-06 20:42 ` Paul Jakma
2003-02-05 3:03 ` Tomas Szepe
2003-02-05 6:03 ` Mark Mielke
2003-02-07 16:09 ` Pavel Machek
2003-02-04 10:57 ` Padraig [this message]
2003-02-04 13:11 ` Helge Hafting
2003-02-04 13:29 ` Jörn Engel
2003-02-04 14:05 ` P
2003-02-04 20:36 ` Herman Oosthuysen
2003-02-04 12:20 ` [Lse-tech] " Dave Jones
2003-02-04 15:50 ` Martin J. Bligh
2003-02-10 12:13 ` Momchil Velikov
2003-02-06 15:42 ` gcc -O2 vs gcc -Os performance Martin J. Bligh
2003-02-06 15:51 ` [Lse-tech] " Andi Kleen
2003-02-06 17:48 ` Alan Cox
2003-02-06 17:06 ` Martin J. Bligh
2003-02-06 20:38 ` Martin J. Bligh
2003-02-06 21:32 ` John Bradford
2003-02-06 22:12 ` Linus Torvalds
2003-02-06 22:58 ` Martin J. Bligh
2003-02-06 23:16 ` Linus Torvalds
2003-02-06 23:59 ` Martin J. Bligh
2003-02-06 23:17 ` Roger Larsson
2003-02-06 23:33 ` Martin J. Bligh
[not found] <1044385759.1861.46.camel@localhost.localdomain.suse.lists.linux.kernel>
[not found] ` <200302041935.h14JZ69G002675@darkstar.example.net.suse.lists.linux.kernel>
[not found] ` <b1pbt8$2ll$1@penguin.transmeta.com.suse.lists.linux.kernel>
2003-02-04 22:05 ` gcc 2.95 vs 3.21 performance Andi Kleen
2003-02-04 22:14 ` Linus Torvalds
2003-02-05 10:04 ` Pavel Janík
2003-02-05 20:07 ` Linus Torvalds
2003-02-06 15:00 ` Horst von Brand
2003-02-04 22:59 ` Jeff Muizelaar
2003-02-04 23:12 ` b_adlakha
2003-02-05 8:41 ` Horst von Brand
2003-02-05 19:09 ` Linus Torvalds
2003-02-05 19:22 ` Randy.Dunlap
2003-02-05 19:24 ` John Bradford
2003-02-06 7:02 ` Neil Booth
[not found] ` <courier.3E423112.00007219@softhome.net>
[not found] ` <20030206212218.GA4891@daikokuya.co.uk>
2003-02-07 10:31 ` b_adlakha
2003-02-07 18:46 ` Horst von Brand
2003-02-07 21:49 ` Neil Booth
2003-02-10 2:14 ` Jeff Garzik
2003-02-10 9:19 ` Tomas Szepe
[not found] <120432836@toto.iv>
2003-02-05 2:45 ` Peter Chubb
[not found] <200302052021.h15KLrXv000881@darkstar.example.net>
2003-02-05 20:28 ` b_adlakha
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3E3F9C82.7000607@Linux.ie \
--to=padraig@linux.ie \
--cc=linux-kernel@vger.kernel.org \
--cc=root@chaos.analogic.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.