All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wang Chen <wangchen@cn.fujitsu.com>
To: Ingo Oeser <netdev@axxeo.de>
Cc: "David S. Miller" <davem@davemloft.net>, NETDEV <netdev@vger.kernel.org>
Subject: Re: [PATCH 1/2] ipmr: delete redundant variable
Date: Thu, 24 Jul 2008 15:37:21 +0800	[thread overview]
Message-ID: <48883131.9070805@cn.fujitsu.com> (raw)
In-Reply-To: <200807231405.39515.netdev@axxeo.de>

Ingo Oeser said the following on 2008-7-23 20:05:
> But please check the generated assembly yourself on a CISC and RISC
> machine to get an idea of the effects. It will be a nice learning 
> experience I enjoyed myself already.
> 

I did the experiment.

I used the following C code to compare which approach is better and get
a result that two are same on performance.

----main.c
#define maxvif 32

struct vif {
	int *dev;
	unsigned long bytes_in, bytyes_out;
	unsigned long pkt_in, pkt_out;
	unsigned long rate_limit;
	unsigned char threshhold;
	unsigned short flags;
	int	local, remote;
	int	link;
};

struct vif vif_table[maxvif];

int main()
{
	struct vif *v;
	int ct;

	v = &vif_table[0];
	for (ct = 0; ct < maxvif; ct++, v++)
		if(v->link==1)
			break;
	return 0;
}
---

---main2.c
#define maxvif 32

struct vif {
	int *dev;
	unsigned long bytes_in, bytyes_out;
	unsigned long pkt_in, pkt_out;
	unsigned long rate_limit;
	unsigned char threshhold;
	unsigned short flags;
	int	local, remote;
	int	link;
};

struct vif vif_table[maxvif];

int main()
{
	struct vif *v;
	int ct;

	v = &vif_table[0];
	for (ct = 0; ct < maxvif; ct++)
		if(vif_table[ct].link==1)
			break;
	return 0;
}
---

Use gcc -S -O2 to compile:
---x86 asm main.s
	.file	"main.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
	leal	4(%esp), %ecx
	andl	$-16, %esp
	pushl	-4(%ecx)
	movl	$vif_table, %eax
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	jmp	.L2
	.p2align 4,,7
.L8:
	cmpl	$vif_table+1240, %eax
	je	.L3
	addl	$40, %eax
.L2:
	cmpl	$1, 36(%eax)
	jne	.L8
.L3:
	popl	%ecx
	xorl	%eax, %eax
	popl	%ebp
	leal	-4(%ecx), %esp
	ret
	.size	main, .-main
	.comm	vif_table,1280,32
	.ident	"GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
	.section	.note.GNU-stack,"",@progbits
---

---x86 asm main2.s
	.file	"main2.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
	leal	4(%esp), %ecx
	andl	$-16, %esp
	pushl	-4(%ecx)
	xorl	%eax, %eax
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	jmp	.L2
	.p2align 4,,7
.L8:
	addl	$40, %eax
	cmpl	$1280, %eax
	je	.L3
.L2:
	cmpl	$1, vif_table+36(%eax)
	jne	.L8
.L3:
	popl	%ecx
	xorl	%eax, %eax
	popl	%ebp
	leal	-4(%ecx), %esp
	ret
	.size	main, .-main
	.comm	vif_table,1280,32
	.ident	"GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
	.section	.note.GNU-stack,"",@progbits
---

In loop area, main.s and main2.s have the following difference:
main.s :
	cmpl	$vif_table+1240, %eax
	cmpl	$1, 36(%eax)
main2.s:
	cmpl	$1280, %eax
	cmpl	$1, vif_table+36(%eax)
The difference can't cause different performance.

OK. Here is the asm on SPARC(not cross compile)
---main.s
                       	.global main                

			main:
/* 000000	  21 */		sethi	%hi(vif_table),%o5
/* 0x0004	  22 */		or	%g0,0,%o4
/* 0x0008	  21 */		add	%o5,%lo(vif_table),%o3
/* 0x000c	  23 */		ld	[%o3+36],%o5

			.L900000106:
/* 0x0010	  23 */		cmp	%o5,1
/* 0x0014	     */		be,pn	%icc,.L77000028
/* 0x0018	  22 */		add	%o4,1,%o4

			.L77000025:
/* 0x001c	  22 */		add	%o3,40,%o3
/* 0x0020	     */		cmp	%o4,32
/* 0x0024	     */		bl,a,pt	%icc,.L900000106
/* 0x0028	  23 */		ld	[%o3+36],%o5

			.L77000028:
/* 0x002c	  22 */		retl	! Result =  %o0
/* 0x0030	     */		or	%g0,0,%o0
/* 0x0034	   0 */		.type	main,2
/* 0x0034	   0 */		.size	main,(.-main)
/* 0x0034	   0 */		.global	__fsr_init_value
/* 0x0034	     */		 __fsr_init_value=0
---

---main2.s
                       	.global main   

			main:
/* 000000	  22 */		sethi	%hi(vif_table+36),%o5
/* 0x0004	     */		or	%g0,0,%o3
/* 0x0008	     */		add	%o5,%lo(vif_table+36),%o4
/* 0x000c	  23 */		ld	[%o5+%lo(vif_table+36)],%o5

			.L900000106:
/* 0x0010	  23 */		cmp	%o5,1
/* 0x0014	     */		be,pn	%icc,.L77000028
/* 0x0018	  22 */		add	%o4,40,%o4

			.L77000025:
/* 0x001c	  22 */		add	%o3,1,%o3
/* 0x0020	     */		cmp	%o3,32
/* 0x0024	     */		bl,a,pt	%icc,.L900000106
/* 0x0028	  23 */		ld	[%o4],%o5

			.L77000028:
/* 0x002c	  22 */		retl	! Result =  %o0
/* 0x0030	     */		or	%g0,0,%o0
/* 0x0034	   0 */		.type	main,2
/* 0x0034	   0 */		.size	main,(.-main)
/* 0x0034	   0 */		.global	__fsr_init_value
/* 0x0034	     */		 __fsr_init_value=0
---

In loop area, they are both ptr+sizeof(struct).

Now, we can get a conclusion that current compiler can do optimize the index accessing.
:)

Ingo, if you have any different opinion, it will be appreciated that you can share. :)


  parent reply	other threads:[~2008-07-24  7:38 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-23  1:45 [PATCH 1/2] ipmr: delete redundant variable Wang Chen
2008-07-23  8:03 ` Ingo Oeser
2008-07-23  9:35   ` Wang Chen
2008-07-23 12:05     ` Ingo Oeser
2008-07-23 15:16       ` Wang Chen
2008-07-24  7:37       ` Wang Chen [this message]
2008-07-25 17:36         ` Ingo Oeser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48883131.9070805@cn.fujitsu.com \
    --to=wangchen@cn.fujitsu.com \
    --cc=davem@davemloft.net \
    --cc=netdev@axxeo.de \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.