From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wang Chen Subject: Re: [PATCH 1/2] ipmr: delete redundant variable Date: Thu, 24 Jul 2008 15:37:21 +0800 Message-ID: <48883131.9070805@cn.fujitsu.com> References: <48868D54.6050701@cn.fujitsu.com> <200807231003.05848.netdev@axxeo.de> <4886FB56.20905@cn.fujitsu.com> <200807231405.39515.netdev@axxeo.de> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Cc: "David S. Miller" , NETDEV To: Ingo Oeser Return-path: Received: from cn.fujitsu.com ([222.73.24.84]:60598 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1750878AbYGXHi7 (ORCPT ); Thu, 24 Jul 2008 03:38:59 -0400 In-Reply-To: <200807231405.39515.netdev@axxeo.de> Sender: netdev-owner@vger.kernel.org List-ID: Ingo Oeser said the following on 2008-7-23 20:05: > But please check the generated assembly yourself on a CISC and RISC > machine to get an idea of the effects. It will be a nice learning > experience I enjoyed myself already. > I did the experiment. I used the following C code to compare which approach is better and get a result that two are same on performance. ----main.c #define maxvif 32 struct vif { int *dev; unsigned long bytes_in, bytyes_out; unsigned long pkt_in, pkt_out; unsigned long rate_limit; unsigned char threshhold; unsigned short flags; int local, remote; int link; }; struct vif vif_table[maxvif]; int main() { struct vif *v; int ct; v = &vif_table[0]; for (ct = 0; ct < maxvif; ct++, v++) if(v->link==1) break; return 0; } --- ---main2.c #define maxvif 32 struct vif { int *dev; unsigned long bytes_in, bytyes_out; unsigned long pkt_in, pkt_out; unsigned long rate_limit; unsigned char threshhold; unsigned short flags; int local, remote; int link; }; struct vif vif_table[maxvif]; int main() { struct vif *v; int ct; v = &vif_table[0]; for (ct = 0; ct < maxvif; ct++) if(vif_table[ct].link==1) break; return 0; } --- Use gcc -S -O2 to compile: ---x86 asm main.s .file "main.c" .text .p2align 4,,15 .globl main .type main, @function main: leal 4(%esp), %ecx andl $-16, %esp pushl -4(%ecx) movl $vif_table, %eax pushl %ebp movl %esp, %ebp pushl %ecx jmp .L2 .p2align 4,,7 .L8: cmpl $vif_table+1240, %eax je .L3 addl $40, %eax .L2: cmpl $1, 36(%eax) jne .L8 .L3: popl %ecx xorl %eax, %eax popl %ebp leal -4(%ecx), %esp ret .size main, .-main .comm vif_table,1280,32 .ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)" .section .note.GNU-stack,"",@progbits --- ---x86 asm main2.s .file "main2.c" .text .p2align 4,,15 .globl main .type main, @function main: leal 4(%esp), %ecx andl $-16, %esp pushl -4(%ecx) xorl %eax, %eax pushl %ebp movl %esp, %ebp pushl %ecx jmp .L2 .p2align 4,,7 .L8: addl $40, %eax cmpl $1280, %eax je .L3 .L2: cmpl $1, vif_table+36(%eax) jne .L8 .L3: popl %ecx xorl %eax, %eax popl %ebp leal -4(%ecx), %esp ret .size main, .-main .comm vif_table,1280,32 .ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)" .section .note.GNU-stack,"",@progbits --- In loop area, main.s and main2.s have the following difference: main.s : cmpl $vif_table+1240, %eax cmpl $1, 36(%eax) main2.s: cmpl $1280, %eax cmpl $1, vif_table+36(%eax) The difference can't cause different performance. OK. Here is the asm on SPARC(not cross compile) ---main.s .global main main: /* 000000 21 */ sethi %hi(vif_table),%o5 /* 0x0004 22 */ or %g0,0,%o4 /* 0x0008 21 */ add %o5,%lo(vif_table),%o3 /* 0x000c 23 */ ld [%o3+36],%o5 .L900000106: /* 0x0010 23 */ cmp %o5,1 /* 0x0014 */ be,pn %icc,.L77000028 /* 0x0018 22 */ add %o4,1,%o4 .L77000025: /* 0x001c 22 */ add %o3,40,%o3 /* 0x0020 */ cmp %o4,32 /* 0x0024 */ bl,a,pt %icc,.L900000106 /* 0x0028 23 */ ld [%o3+36],%o5 .L77000028: /* 0x002c 22 */ retl ! Result = %o0 /* 0x0030 */ or %g0,0,%o0 /* 0x0034 0 */ .type main,2 /* 0x0034 0 */ .size main,(.-main) /* 0x0034 0 */ .global __fsr_init_value /* 0x0034 */ __fsr_init_value=0 --- ---main2.s .global main main: /* 000000 22 */ sethi %hi(vif_table+36),%o5 /* 0x0004 */ or %g0,0,%o3 /* 0x0008 */ add %o5,%lo(vif_table+36),%o4 /* 0x000c 23 */ ld [%o5+%lo(vif_table+36)],%o5 .L900000106: /* 0x0010 23 */ cmp %o5,1 /* 0x0014 */ be,pn %icc,.L77000028 /* 0x0018 22 */ add %o4,40,%o4 .L77000025: /* 0x001c 22 */ add %o3,1,%o3 /* 0x0020 */ cmp %o3,32 /* 0x0024 */ bl,a,pt %icc,.L900000106 /* 0x0028 23 */ ld [%o4],%o5 .L77000028: /* 0x002c 22 */ retl ! Result = %o0 /* 0x0030 */ or %g0,0,%o0 /* 0x0034 0 */ .type main,2 /* 0x0034 0 */ .size main,(.-main) /* 0x0034 0 */ .global __fsr_init_value /* 0x0034 */ __fsr_init_value=0 --- In loop area, they are both ptr+sizeof(struct). Now, we can get a conclusion that current compiler can do optimize the index accessing. :) Ingo, if you have any different opinion, it will be appreciated that you can share. :)