netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Use random32() in net/ipv4/multipath
@ 2007-02-15 23:46 Joe Perches
  2007-02-22  9:26 ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Joe Perches @ 2007-02-15 23:46 UTC (permalink / raw)
  To: netdev

Einar Lueck's email addresses bounce
<elueck@de.ibm.com><lkml@einar-lueck.de>

Removed local random number generator function

Signed-off-by: Joe Perches <joe@perches.com>

diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
index b8c289f..6448e6c 100644
--- a/net/ipv4/multipath_random.c
+++ b/net/ipv4/multipath_random.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/mroute.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -49,21 +50,6 @@
 
 #define MULTIPATH_MAX_CANDIDATES 40
 
-/* interface to random number generation */
-static unsigned int RANDOM_SEED = 93186752;
-
-static inline unsigned int random(unsigned int ubound)
-{
-	static unsigned int a = 1588635695,
-		q = 2,
-		r = 1117695901;
-
-	RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
-
-	return RANDOM_SEED % ubound;
-}
-
-
 static void random_select_route(const struct flowi *flp,
 				struct rtable *first,
 				struct rtable **rp)
@@ -85,7 +71,7 @@ static void random_select_route(const struct flowi *flp,
 	if (candidate_count > 1) {
 		unsigned char i = 0;
 		unsigned char candidate_no = (unsigned char)
-			random(candidate_count);
+			(random32() % candidate_count);
 
 		/* find chosen candidate and adjust GC data for all candidates
 		 * to ensure they stay in cache
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index 92b0482..d6115a3 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/mroute.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -85,18 +86,6 @@ struct multipath_route {
 /* state: primarily weight per route information */
 static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
 
-/* interface to random number generation */
-static unsigned int RANDOM_SEED = 93186752;
-
-static inline unsigned int random(unsigned int ubound)
-{
-	static unsigned int a = 1588635695,
-		q = 2,
-		r = 1117695901;
-	RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
-	return RANDOM_SEED % ubound;
-}
-
 static unsigned char __multipath_lookup_weight(const struct flowi *fl,
 					       const struct rtable *rt)
 {
@@ -194,7 +183,7 @@ static void wrandom_select_route(const struct flowi *flp,
 
 	/* choose a weighted random candidate */
 	decision = first;
-	selector = random(power);
+	selector = random32() % power;
 	last_power = 0;
 
 	/* select candidate, adjust GC data and cleanup local state */



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use random32() in net/ipv4/multipath
  2007-02-15 23:46 [PATCH] Use random32() in net/ipv4/multipath Joe Perches
@ 2007-02-22  9:26 ` David Miller
  2007-02-22 10:22   ` [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2007-02-22  9:26 UTC (permalink / raw)
  To: joe; +Cc: netdev

From: Joe Perches <joe@perches.com>
Date: Thu, 15 Feb 2007 15:46:44 -0800

> Einar Lueck's email addresses bounce
> <elueck@de.ibm.com><lkml@einar-lueck.de>
> 
> Removed local random number generator function
> 
> Signed-off-by: Joe Perches <joe@perches.com>

Applied, thanks Joe.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22  9:26 ` David Miller
@ 2007-02-22 10:22   ` Eric Dumazet
  2007-02-22 10:32     ` David Miller
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Eric Dumazet @ 2007-02-22 10:22 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 790 bytes --]

Hi David

I noticed in oprofile study a cache miss in tcp_rcv_established() to read 
copied_seq.


ffffffff80400a80 <tcp_rcv_established>: /* tcp_rcv_established total: 4034293  
2.0400 */

 55493  0.0281 :ffffffff80400bc9:   mov    0x4c8(%r12),%eax copied_seq
543103  0.2746 :ffffffff80400bd1:   cmp    0x3e0(%r12),%eax   rcv_nxt    

if (tp->copied_seq == tp->rcv_nxt &&
	len - tcp_header_len <= tp->ucopy.len) {


In this function, the cache line 0x4c0 -> 0x500 is used only for this 
reading 'copied_seq' field.

rcv_wup and copied_seq should be next to rcv_nxt field, to lower number of 
active cache lines in hot paths. (tcp_rcv_established(), tcp_poll(), ...)

Patch is 64bit friendly (no new hole because of alignment constraints)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: tcp_sock_reorder1.patch --]
[-- Type: text/plain, Size: 982 bytes --]

--- linux/include/linux/tcp.h	2007-02-22 11:53:22.000000000 +0100
+++ linux-ed/include/linux/tcp.h	2007-02-22 12:03:19.000000000 +0100
@@ -242,6 +242,8 @@ struct tcp_sock {
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
  */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
+	u32	copied_seq;	/* Head of yet unread data		*/
+	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
 
  	u32	snd_una;	/* First byte we want an ack for	*/
@@ -307,10 +309,8 @@ struct tcp_sock {
 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
-	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
-	u32	copied_seq;	/* Head of yet unread data		*/
 
 /*	SACKs data	*/
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22 10:22   ` [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together Eric Dumazet
@ 2007-02-22 10:32     ` David Miller
  2007-02-22 10:43       ` Eric Dumazet
  2007-02-22 11:11     ` [PATCH, take 2] " Eric Dumazet
  2007-02-22 14:50     ` [PATCH] NET : keep sk_backlog near sk_lock Eric Dumazet
  2 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2007-02-22 10:32 UTC (permalink / raw)
  To: dada1; +Cc: netdev

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Feb 2007 11:22:02 +0100

> rcv_wup and copied_seq should be next to rcv_nxt field, to lower number of 
> active cache lines in hot paths. (tcp_rcv_established(), tcp_poll(), ...)

Please fixup the assignment order in tcp_create_openreq_child() else
we'll get a new store buffer stall during socket creation :-)

Thanks Eric.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22 10:32     ` David Miller
@ 2007-02-22 10:43       ` Eric Dumazet
  2007-02-22 11:02         ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2007-02-22 10:43 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Thursday 22 February 2007 11:32, David Miller wrote:
> From: Eric Dumazet <dada1@cosmosbay.com>
> Date: Thu, 22 Feb 2007 11:22:02 +0100
>
> > rcv_wup and copied_seq should be next to rcv_nxt field, to lower number
> > of active cache lines in hot paths. (tcp_rcv_established(), tcp_poll(),
> > ...)
>
> Please fixup the assignment order in tcp_create_openreq_child() else
> we'll get a new store buffer stall during socket creation :-)

OK :)

I wonder if :

newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;

is correct, since compiler will write snd_sml, then snd_una, and snd_nxt

Maybe we should also reorder this too ?

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22 10:43       ` Eric Dumazet
@ 2007-02-22 11:02         ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2007-02-22 11:02 UTC (permalink / raw)
  To: dada1; +Cc: netdev

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Feb 2007 11:43:32 +0100

> I wonder if :
> 
> newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
> 
> is correct, since compiler will write snd_sml, then snd_una, and snd_nxt
> 
> Maybe we should also reorder this too ?

Yes, that appears necessary.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, take 2] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22 10:22   ` [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together Eric Dumazet
  2007-02-22 10:32     ` David Miller
@ 2007-02-22 11:11     ` Eric Dumazet
  2007-02-22 11:22       ` David Miller
  2007-02-22 14:50     ` [PATCH] NET : keep sk_backlog near sk_lock Eric Dumazet
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2007-02-22 11:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 988 bytes --]

Hi David

I noticed in oprofile study a cache miss in tcp_rcv_established() to read 
copied_seq.


ffffffff80400a80 <tcp_rcv_established>: /* tcp_rcv_established total: 4034293  
2.0400 */

 55493  0.0281 :ffffffff80400bc9:   mov    0x4c8(%r12),%eax copied_seq
543103  0.2746 :ffffffff80400bd1:   cmp    0x3e0(%r12),%eax   rcv_nxt    

if (tp->copied_seq == tp->rcv_nxt &&
        len - tcp_header_len <= tp->ucopy.len) {


In this function, the cache line 0x4c0 -> 0x500 is used only for this 
reading 'copied_seq' field.

rcv_wup and copied_seq should be next to rcv_nxt field, to lower number of 
active cache lines in hot paths. (tcp_rcv_established(), tcp_poll(), ...)

As you suggested, I changed tcp_create_openreq_child() so that these fields 
are changed together, to avoid adding a new store buffer stall.

Patch is 64bit friendly (no new hole because of alignment constraints)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: tcp_sock_reorder1.patch --]
[-- Type: text/plain, Size: 1949 bytes --]

--- linux/include/linux/tcp.h	2007-02-22 11:53:22.000000000 +0100
+++ linux-ed/include/linux/tcp.h	2007-02-22 12:03:19.000000000 +0100
@@ -242,6 +242,8 @@ struct tcp_sock {
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
  */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
+	u32	copied_seq;	/* Head of yet unread data		*/
+	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
 
  	u32	snd_una;	/* First byte we want an ack for	*/
@@ -307,10 +309,8 @@ struct tcp_sock {
 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
-	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
-	u32	copied_seq;	/* Head of yet unread data		*/
 
 /*	SACKs data	*/
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
--- linux/net/ipv4/tcp_minisocks.c	2007-02-22 12:41:26.000000000 +0100
+++ linux-ed/net/ipv4/tcp_minisocks.c	2007-02-22 12:49:49.000000000 +0100
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(st
 		/* Now setup tcp_sock */
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
+		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(st
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->rcv_wup = treq->rcv_isn + 1;
 		newtp->write_seq = treq->snt_isn + 1;
 		newtp->pushed_seq = newtp->write_seq;
-		newtp->copied_seq = treq->rcv_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, take 2] TCP : keep copied_seq, rcv_wup and rcv_next together
  2007-02-22 11:11     ` [PATCH, take 2] " Eric Dumazet
@ 2007-02-22 11:22       ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2007-02-22 11:22 UTC (permalink / raw)
  To: dada1; +Cc: netdev

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Feb 2007 12:11:50 +0100

> I noticed in oprofile study a cache miss in tcp_rcv_established() to read 
> copied_seq.
 ...
> As you suggested, I changed tcp_create_openreq_child() so that these fields 
> are changed together, to avoid adding a new store buffer stall.
> 
> Patch is 64bit friendly (no new hole because of alignment constraints)
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

Looks great, applied to my tcp-2.6 GIT tree, thanks a lot!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] NET : keep sk_backlog near sk_lock
  2007-02-22 10:22   ` [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together Eric Dumazet
  2007-02-22 10:32     ` David Miller
  2007-02-22 11:11     ` [PATCH, take 2] " Eric Dumazet
@ 2007-02-22 14:50     ` Eric Dumazet
  2007-03-05  0:05       ` David Miller
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2007-02-22 14:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 616 bytes --]

Hi David

sk_backlog is a critical field of struct sock. (known famous words)

It is (ab)used in hot paths, in particular in release_sock(), tcp_recvmsg(), 
tcp_v4_rcv(), sk_receive_skb().

It really makes sense to place it next to sk_lock, because sk_backlog is only 
used after sk_lock locked (and thus memory cache line in L1 cache). This 
should reduce cache misses and sk_lock acquisition time.

(In theory, we could only move the head pointer near sk_lock, and leaving tail 
far away, because 'tail' is normally not so hot, but keep it simple :) )

Thank you

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: sk_backlog.patch --]
[-- Type: text/plain, Size: 1829 bytes --]

--- linux/include/net/sock.h	2007-02-22 16:21:36.000000000 +0100
+++ linux-ed/include/net/sock.h	2007-02-22 16:21:36.000000000 +0100
@@ -202,6 +202,15 @@ struct sock {
 	unsigned short		sk_type;
 	int			sk_rcvbuf;
 	socket_lock_t		sk_lock;
+	/*
+	 * The backlog queue is special, it is always used with
+	 * the per-socket spinlock held and requires low latency
+	 * access. Therefore we special case it's implementation.
+	 */
+	struct {
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
 	struct xfrm_policy	*sk_policy[2];
@@ -221,15 +230,6 @@ struct sock {
 	int			sk_rcvlowat;
 	unsigned long 		sk_flags;
 	unsigned long	        sk_lingertime;
-	/*
-	 * The backlog queue is special, it is always used with
-	 * the per-socket spinlock held and requires low latency
-	 * access. Therefore we special case it's implementation.
-	 */
-	struct {
-		struct sk_buff *head;
-		struct sk_buff *tail;
-	} sk_backlog;
 	struct sk_buff_head	sk_error_queue;
 	struct proto		*sk_prot_creator;
 	rwlock_t		sk_callback_lock;
--- linux/net/core/sock.c	2007-02-22 16:21:36.000000000 +0100
+++ linux-ed/net/core/sock.c	2007-02-22 16:21:36.000000000 +0100
@@ -904,6 +904,7 @@ struct sock *sk_clone(const struct sock 
 		sk_node_init(&newsk->sk_node);
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +924,6 @@ struct sock *sk_clone(const struct sock 
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
 		newsk->sk_send_head	= NULL;
-		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 
 		sock_reset_flag(newsk, SOCK_DONE);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NET : keep sk_backlog near sk_lock
  2007-02-22 14:50     ` [PATCH] NET : keep sk_backlog near sk_lock Eric Dumazet
@ 2007-03-05  0:05       ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2007-03-05  0:05 UTC (permalink / raw)
  To: dada1; +Cc: netdev

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Feb 2007 15:50:15 +0100

> sk_backlog is a critical field of struct sock. (known famous words)
> 
> It is (ab)used in hot paths, in particular in release_sock(), tcp_recvmsg(), 
> tcp_v4_rcv(), sk_receive_skb().
> 
> It really makes sense to place it next to sk_lock, because sk_backlog is only 
> used after sk_lock locked (and thus memory cache line in L1 cache). This 
> should reduce cache misses and sk_lock acquisition time.
> 
> (In theory, we could only move the head pointer near sk_lock, and leaving tail 
> far away, because 'tail' is normally not so hot, but keep it simple :) )
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

Applied, thanks a lot Eric.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-03-05  0:05 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-02-15 23:46 [PATCH] Use random32() in net/ipv4/multipath Joe Perches
2007-02-22  9:26 ` David Miller
2007-02-22 10:22   ` [PATCH] TCP : keep copied_seq, rcv_wup and rcv_next together Eric Dumazet
2007-02-22 10:32     ` David Miller
2007-02-22 10:43       ` Eric Dumazet
2007-02-22 11:02         ` David Miller
2007-02-22 11:11     ` [PATCH, take 2] " Eric Dumazet
2007-02-22 11:22       ` David Miller
2007-02-22 14:50     ` [PATCH] NET : keep sk_backlog near sk_lock Eric Dumazet
2007-03-05  0:05       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).