From: Wang Chen <wangchen@cn.fujitsu.com>
To: Ingo Oeser <netdev@axxeo.de>
Cc: "David S. Miller" <davem@davemloft.net>, NETDEV <netdev@vger.kernel.org>
Subject: Re: [PATCH 1/2] ipmr: delete redundant variable
Date: Thu, 24 Jul 2008 15:37:21 +0800 [thread overview]
Message-ID: <48883131.9070805@cn.fujitsu.com> (raw)
In-Reply-To: <200807231405.39515.netdev@axxeo.de>
Ingo Oeser said the following on 2008-7-23 20:05:
> But please check the generated assembly yourself on a CISC and RISC
> machine to get an idea of the effects. It will be a nice learning
> experience I enjoyed myself already.
>
I did the experiment.
I used the following C code to compare which approach is better and get
a result that two are same on performance.
----main.c
#define maxvif 32
struct vif {
int *dev;
unsigned long bytes_in, bytyes_out;
unsigned long pkt_in, pkt_out;
unsigned long rate_limit;
unsigned char threshhold;
unsigned short flags;
int local, remote;
int link;
};
struct vif vif_table[maxvif];
int main()
{
struct vif *v;
int ct;
v = &vif_table[0];
for (ct = 0; ct < maxvif; ct++, v++)
if(v->link==1)
break;
return 0;
}
---
---main2.c
#define maxvif 32
struct vif {
int *dev;
unsigned long bytes_in, bytyes_out;
unsigned long pkt_in, pkt_out;
unsigned long rate_limit;
unsigned char threshhold;
unsigned short flags;
int local, remote;
int link;
};
struct vif vif_table[maxvif];
int main()
{
struct vif *v;
int ct;
v = &vif_table[0];
for (ct = 0; ct < maxvif; ct++)
if(vif_table[ct].link==1)
break;
return 0;
}
---
Use gcc -S -O2 to compile:
---x86 asm main.s
.file "main.c"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
movl $vif_table, %eax
pushl %ebp
movl %esp, %ebp
pushl %ecx
jmp .L2
.p2align 4,,7
.L8:
cmpl $vif_table+1240, %eax
je .L3
addl $40, %eax
.L2:
cmpl $1, 36(%eax)
jne .L8
.L3:
popl %ecx
xorl %eax, %eax
popl %ebp
leal -4(%ecx), %esp
ret
.size main, .-main
.comm vif_table,1280,32
.ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
.section .note.GNU-stack,"",@progbits
---
---x86 asm main2.s
.file "main2.c"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
xorl %eax, %eax
pushl %ebp
movl %esp, %ebp
pushl %ecx
jmp .L2
.p2align 4,,7
.L8:
addl $40, %eax
cmpl $1280, %eax
je .L3
.L2:
cmpl $1, vif_table+36(%eax)
jne .L8
.L3:
popl %ecx
xorl %eax, %eax
popl %ebp
leal -4(%ecx), %esp
ret
.size main, .-main
.comm vif_table,1280,32
.ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
.section .note.GNU-stack,"",@progbits
---
In loop area, main.s and main2.s have the following difference:
main.s :
cmpl $vif_table+1240, %eax
cmpl $1, 36(%eax)
main2.s:
cmpl $1280, %eax
cmpl $1, vif_table+36(%eax)
The difference can't cause different performance.
OK. Here is the asm on SPARC(not cross compile)
---main.s
.global main
main:
/* 000000 21 */ sethi %hi(vif_table),%o5
/* 0x0004 22 */ or %g0,0,%o4
/* 0x0008 21 */ add %o5,%lo(vif_table),%o3
/* 0x000c 23 */ ld [%o3+36],%o5
.L900000106:
/* 0x0010 23 */ cmp %o5,1
/* 0x0014 */ be,pn %icc,.L77000028
/* 0x0018 22 */ add %o4,1,%o4
.L77000025:
/* 0x001c 22 */ add %o3,40,%o3
/* 0x0020 */ cmp %o4,32
/* 0x0024 */ bl,a,pt %icc,.L900000106
/* 0x0028 23 */ ld [%o3+36],%o5
.L77000028:
/* 0x002c 22 */ retl ! Result = %o0
/* 0x0030 */ or %g0,0,%o0
/* 0x0034 0 */ .type main,2
/* 0x0034 0 */ .size main,(.-main)
/* 0x0034 0 */ .global __fsr_init_value
/* 0x0034 */ __fsr_init_value=0
---
---main2.s
.global main
main:
/* 000000 22 */ sethi %hi(vif_table+36),%o5
/* 0x0004 */ or %g0,0,%o3
/* 0x0008 */ add %o5,%lo(vif_table+36),%o4
/* 0x000c 23 */ ld [%o5+%lo(vif_table+36)],%o5
.L900000106:
/* 0x0010 23 */ cmp %o5,1
/* 0x0014 */ be,pn %icc,.L77000028
/* 0x0018 22 */ add %o4,40,%o4
.L77000025:
/* 0x001c 22 */ add %o3,1,%o3
/* 0x0020 */ cmp %o3,32
/* 0x0024 */ bl,a,pt %icc,.L900000106
/* 0x0028 23 */ ld [%o4],%o5
.L77000028:
/* 0x002c 22 */ retl ! Result = %o0
/* 0x0030 */ or %g0,0,%o0
/* 0x0034 0 */ .type main,2
/* 0x0034 0 */ .size main,(.-main)
/* 0x0034 0 */ .global __fsr_init_value
/* 0x0034 */ __fsr_init_value=0
---
In loop area, they are both ptr+sizeof(struct).
Now, we can get a conclusion that current compiler can do optimize the index accessing.
:)
Ingo, if you have any different opinion, it will be appreciated that you can share. :)
next prev parent reply other threads:[~2008-07-24 7:38 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-07-23 1:45 [PATCH 1/2] ipmr: delete redundant variable Wang Chen
2008-07-23 8:03 ` Ingo Oeser
2008-07-23 9:35 ` Wang Chen
2008-07-23 12:05 ` Ingo Oeser
2008-07-23 15:16 ` Wang Chen
2008-07-24 7:37 ` Wang Chen [this message]
2008-07-25 17:36 ` Ingo Oeser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48883131.9070805@cn.fujitsu.com \
--to=wangchen@cn.fujitsu.com \
--cc=davem@davemloft.net \
--cc=netdev@axxeo.de \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.