netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: code cleanups
@ 2010-09-30  2:24 Changli Gao
  2010-09-30  2:49 ` Joe Perches
  2010-09-30  5:16 ` Eric Dumazet
  0 siblings, 2 replies; 7+ messages in thread
From: Changli Gao @ 2010-09-30  2:24 UTC (permalink / raw)
  To: David S. Miller
  Cc: Alexey Kuznetsov, Pekka Savola (ipv6), James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, Changli Gao

Compare operations are more readable, and compilers generate the same code
for the both.

Use the macros fl4_* to shrink the length of the lines.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
 net/ipv4/af_inet.c |    7 +++----
 net/ipv4/route.c   |   27 ++++++++++++---------------
 2 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f581f77..ef26640 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 		iph2 = ip_hdr(p);
 
-		if ((iph->protocol ^ iph2->protocol) |
-		    (iph->tos ^ iph2->tos) |
-		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
-		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
+		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
+		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
+		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98beda4..6b00fde 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
 					const struct flowi *fl2)
 {
-	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
-		(fl1->iif ^ fl2->iif)) == 0);
+	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
+	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
+	       fl1->iif == fl2->iif;
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
-		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
-		(fl1->oif ^ fl2->oif) |
-		(fl1->iif ^ fl2->iif)) == 0;
+	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
+	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
+	       fl1->mark == fl2->mark &&
+	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
+	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
 }
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->dst.rt_next)) {
-		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
-		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
-		     (rth->fl.iif ^ iif) |
-		     rth->fl.oif |
-		     (rth->fl.fl4_tos ^ tos)) == 0 &&
-		    rth->fl.mark == skb->mark &&
+		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
+		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
+		    rth->fl.iif == iif && rth->fl.oif == 0 &&
+		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  2:24 [PATCH] net: code cleanups Changli Gao
@ 2010-09-30  2:49 ` Joe Perches
  2010-09-30  3:07   ` Changli Gao
  2010-09-30  5:16 ` Eric Dumazet
  1 sibling, 1 reply; 7+ messages in thread
From: Joe Perches @ 2010-09-30  2:49 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
> Compare operations are more readable, and compilers generate the same code
> for the both.

As far as I know, not all supported versions of gcc
generate the same code.

Also, you could probably now remove the (__force u32) casts.

> Use the macros fl4_* to shrink the length of the lines.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  net/ipv4/af_inet.c |    7 +++----
>  net/ipv4/route.c   |   27 ++++++++++++---------------
>  2 files changed, 15 insertions(+), 19 deletions(-)
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f581f77..ef26640 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
>  
>  		iph2 = ip_hdr(p);
>  
> -		if ((iph->protocol ^ iph2->protocol) |
> -		    (iph->tos ^ iph2->tos) |
> -		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
> -		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
> +		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
> +		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
> +		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
>  			NAPI_GRO_CB(p)->same_flow = 0;
>  			continue;
>  		}
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 98beda4..6b00fde 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net)
>  static inline bool compare_hash_inputs(const struct flowi *fl1,
>  					const struct flowi *fl2)
>  {
> -	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->iif ^ fl2->iif)) == 0);
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
>  {
> -	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->mark ^ fl2->mark) |
> -		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
> -		(fl1->oif ^ fl2->oif) |
> -		(fl1->iif ^ fl2->iif)) == 0;
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->mark == fl2->mark &&
> +	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
> +	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
> @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
>  
>  	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>  	     rth = rcu_dereference(rth->dst.rt_next)) {
> -		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
> -		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
> -		     (rth->fl.iif ^ iif) |
> -		     rth->fl.oif |
> -		     (rth->fl.fl4_tos ^ tos)) == 0 &&
> -		    rth->fl.mark == skb->mark &&
> +		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
> +		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
> +		    rth->fl.iif == iif && rth->fl.oif == 0 &&
> +		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
>  		    net_eq(dev_net(rth->dst.dev), net) &&
>  		    !rt_is_expired(rth)) {
>  			if (noref) {




^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  2:49 ` Joe Perches
@ 2010-09-30  3:07   ` Changli Gao
  2010-09-30  3:13     ` Joe Perches
  0 siblings, 1 reply; 7+ messages in thread
From: Changli Gao @ 2010-09-30  3:07 UTC (permalink / raw)
  To: Joe Perches, Eric Dumazet
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote:
> On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
>> Compare operations are more readable, and compilers generate the same code
>> for the both.
>
> As far as I know, not all supported versions of gcc
> generate the same code.

Is the former better for the compilers?

>
> Also, you could probably now remove the (__force u32) casts.
>

Maybe Eric doesn't think so.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  3:07   ` Changli Gao
@ 2010-09-30  3:13     ` Joe Perches
  0 siblings, 0 replies; 7+ messages in thread
From: Joe Perches @ 2010-09-30  3:13 UTC (permalink / raw)
  To: Changli Gao
  Cc: Eric Dumazet, David S. Miller, Alexey Kuznetsov,
	Pekka Savola (ipv6), James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy, netdev

On Thu, 2010-09-30 at 11:07 +0800, Changli Gao wrote:
> On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote:
> > On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
> >> Compare operations are more readable, and compilers generate the same code
> >> for the both.
> > As far as I know, not all supported versions of gcc
> > generate the same code.
> Is the former better for the compilers?

Yes.  I don't know how much it matters though.

> > Also, you could probably now remove the (__force u32) casts.
> Maybe Eric doesn't think so.

Comparisons of equal types don't need (__force u32) casts. 

They needed to be cast to u32 for the bitwise or's to avoid
compiler warnings.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  2:24 [PATCH] net: code cleanups Changli Gao
  2010-09-30  2:49 ` Joe Perches
@ 2010-09-30  5:16 ` Eric Dumazet
  2010-09-30  6:09   ` Changli Gao
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2010-09-30  5:16 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
> Compare operations are more readable, and compilers generate the same code
> for the both.
> 

You have a buggy compiler then.

I know this code is ugly, but please keep it as is, dont add conditional
branches on hot paths.

Thanks

> Use the macros fl4_* to shrink the length of the lines.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  net/ipv4/af_inet.c |    7 +++----
>  net/ipv4/route.c   |   27 ++++++++++++---------------
>  2 files changed, 15 insertions(+), 19 deletions(-)
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f581f77..ef26640 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
>  
>  		iph2 = ip_hdr(p);
>  
> -		if ((iph->protocol ^ iph2->protocol) |
> -		    (iph->tos ^ iph2->tos) |
> -		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
> -		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
> +		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
> +		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
> +		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
>  			NAPI_GRO_CB(p)->same_flow = 0;
>  			continue;
>  		}
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 98beda4..6b00fde 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net)
>  static inline bool compare_hash_inputs(const struct flowi *fl1,
>  					const struct flowi *fl2)
>  {
> -	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->iif ^ fl2->iif)) == 0);
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
>  {
> -	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->mark ^ fl2->mark) |
> -		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
> -		(fl1->oif ^ fl2->oif) |
> -		(fl1->iif ^ fl2->iif)) == 0;
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->mark == fl2->mark &&
> +	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
> +	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
> @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
>  
>  	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>  	     rth = rcu_dereference(rth->dst.rt_next)) {
> -		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
> -		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
> -		     (rth->fl.iif ^ iif) |
> -		     rth->fl.oif |
> -		     (rth->fl.fl4_tos ^ tos)) == 0 &&
> -		    rth->fl.mark == skb->mark &&
> +		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
> +		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
> +		    rth->fl.iif == iif && rth->fl.oif == 0 &&
> +		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
>  		    net_eq(dev_net(rth->dst.dev), net) &&
>  		    !rt_is_expired(rth)) {
>  			if (noref) {
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  5:16 ` Eric Dumazet
@ 2010-09-30  6:09   ` Changli Gao
  2010-09-30  6:31     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Changli Gao @ 2010-09-30  6:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
>> Compare operations are more readable, and compilers generate the same code
>> for the both.
>>
>
> You have a buggy compiler then.

gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2)

             rth = rcu_dereference(rth->dst.rt_next)) {
                if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
                     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
                     (rth->fl.iif ^ iif) |
    2f12:       44 3b 80 dc 00 00 00    cmp    0xdc(%rax),%r8d
    2f19:       0f 85 a2 00 00 00       jne    2fc1 <ip_route_input_common+0x145
>
                     rth->fl.oif |
    2f1f:       83 b8 d8 00 00 00 00    cmpl   $0x0,0xd8(%rax)
    2f26:       0f 85 95 00 00 00       jne    2fc1 <ip_route_input_common+0x145
>
        tos &= IPTOS_RT_MASK;
        hash = rt_hash(daddr, saddr, iif, rt_genid(net));

        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
             rth = rcu_dereference(rth->dst.rt_next)) {
                if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
    2f2c:       44 3b b8 e4 00 00 00    cmp    0xe4(%rax),%r15d
    2f33:       0f 85 88 00 00 00       jne    2fc1
<ip_route_input_common+0x145>
                     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
    2f39:       44 3b b0 e8 00 00 00    cmp    0xe8(%rax),%r14d
    2f40:       75 7f                   jne    2fc1
<ip_route_input_common+0x145>


>
> I know this code is ugly, but please keep it as is, dont add conditional
> branches on hot paths.
>

If the compiler doesn't generate conditional branches, we have to
touch every necessary field of all the cache entries in one hash
bucket. Is it better than condition branch? I think the compiler
developers know it better.

And the compiler reorders the conditional branches, is it expected?

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: code cleanups
  2010-09-30  6:09   ` Changli Gao
@ 2010-09-30  6:31     ` Eric Dumazet
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2010-09-30  6:31 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

Le jeudi 30 septembre 2010 à 14:09 +0800, Changli Gao a écrit :
> On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
> >> Compare operations are more readable, and compilers generate the same code
> >> for the both.
> >>
> >
> > You have a buggy compiler then.
> 
> gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2)
> 
>              rth = rcu_dereference(rth->dst.rt_next)) {
>                 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
>                      ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
>                      (rth->fl.iif ^ iif) |
>     2f12:       44 3b 80 dc 00 00 00    cmp    0xdc(%rax),%r8d
>     2f19:       0f 85 a2 00 00 00       jne    2fc1 <ip_route_input_common+0x145
> >
>                      rth->fl.oif |
>     2f1f:       83 b8 d8 00 00 00 00    cmpl   $0x0,0xd8(%rax)
>     2f26:       0f 85 95 00 00 00       jne    2fc1 <ip_route_input_common+0x145
> >
>         tos &= IPTOS_RT_MASK;
>         hash = rt_hash(daddr, saddr, iif, rt_genid(net));
> 
>         for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>              rth = rcu_dereference(rth->dst.rt_next)) {
>                 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
>     2f2c:       44 3b b8 e4 00 00 00    cmp    0xe4(%rax),%r15d
>     2f33:       0f 85 88 00 00 00       jne    2fc1
> <ip_route_input_common+0x145>
>                      ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
>     2f39:       44 3b b0 e8 00 00 00    cmp    0xe8(%rax),%r14d
>     2f40:       75 7f                   jne    2fc1
> <ip_route_input_common+0x145>
> 
> 
> >
> > I know this code is ugly, but please keep it as is, dont add conditional
> > branches on hot paths.
> >
> 
> If the compiler doesn't generate conditional branches, we have to
> touch every necessary field of all the cache entries in one hash
> bucket. Is it better than condition branch? I think the compiler
> developers know it better.

Last famous words.

Are you aware of cache lines (64 bytes at least on typical cpus), and
that all fields are already in CPU L1 cache ? I (and others) worked hard
in the past.

> 
> And the compiler reorders the conditional branches, is it expected?
> 

Your compiler added conditional branches on a code not wanting them,
only because on _your_ cpu, these conditional branches might be cheap.

Now, try to compile for an i686 target and see the difference.

If there was no difference, your compiler would be _buggy_, because not
generating optimal assembly.

Here I get :

c141dda9:       8b 55 e8                mov    -0x18(%ebp),%edx
c141ddac:       8b 81 9c 00 00 00       mov    0x9c(%ecx),%eax
c141ddb2:       33 91 a0 00 00 00       xor    0xa0(%ecx),%edx
c141ddb8:       31 f0                   xor    %esi,%eax
c141ddba:       09 d0                   or     %edx,%eax
c141ddbc:       8b 55 e0                mov    -0x20(%ebp),%edx
c141ddbf:       33 91 94 00 00 00       xor    0x94(%ecx),%edx
c141ddc5:       09 d0                   or     %edx,%eax
c141ddc7:       0f b6 55 e7             movzbl -0x19(%ebp),%edx
c141ddcb:       0b 81 90 00 00 00       or     0x90(%ecx),%eax
c141ddd1:       32 91 a4 00 00 00       xor    0xa4(%ecx),%dl
c141ddd7:       0f b6 d2                movzbl %dl,%edx
c141ddda:       09 d0                   or     %edx,%eax
c141dddc:       0f 85 9d 00 00 00       jne    c141de7f <ip_route_input_common+0x1b4>

As you can see, only one conditional branch.

Your patch is not welcomed, thanks.



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-09-30  6:31 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-30  2:24 [PATCH] net: code cleanups Changli Gao
2010-09-30  2:49 ` Joe Perches
2010-09-30  3:07   ` Changli Gao
2010-09-30  3:13     ` Joe Perches
2010-09-30  5:16 ` Eric Dumazet
2010-09-30  6:09   ` Changli Gao
2010-09-30  6:31     ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).