netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Tcp port selection for IPV6.
@ 2005-01-21  0:45 Stephen Hemminger
  2005-01-21  0:53 ` Arnaldo Carvalho de Melo
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Stephen Hemminger @ 2005-01-21  0:45 UTC (permalink / raw)
  To: David S. Miller, YOSHIFUJI Hideaki / _$B5HF#1QL@; +Cc: netdev

This patch makes TCP over IPV6 select ports the same way the current
TCPv4 code does. It uses a hash function to provide a starting offset
and a free running counter to provide seed.

This changes the port selection semantics to match TCPv4 as well.
If the port is in use but to a different remote address, it will get
reused. It looks like the TCPv6 code was not updated when the TCPv4
code changed. Now the code in ipv4/tcp_ipv4.c and ipv6/tcp_ipv6.c are
almost identical for tcp_hash_connect.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>


diff -Nru a/drivers/char/random.c b/drivers/char/random.c
--- a/drivers/char/random.c	2005-01-18 14:06:54 -08:00
+++ b/drivers/char/random.c	2005-01-18 14:06:54 -08:00
@@ -2283,6 +2283,21 @@
 	return halfMD4Transform(hash, keyptr->secret);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport)
+{
+	struct keydata *keyptr = get_keyptr();
+	u32 hash[12];
+
+	memcpy(hash, saddr, 16);
+	hash[4] = dport;
+	memcpy(&hash[5],keyptr->secret,sizeof(__u32) * 7);
+
+	return twothirdsMD4Transform(daddr, hash);
+}
+EXPORT_SYMBOL(secure_tcpv6_port_ephemeral);
+#endif
+
 #ifdef CONFIG_SYN_COOKIES
 /*
  * Secure SYN cookie computation. This is the algorithm worked out by
diff -Nru a/include/linux/random.h b/include/linux/random.h
--- a/include/linux/random.h	2005-01-18 14:06:54 -08:00
+++ b/include/linux/random.h	2005-01-18 14:06:54 -08:00
@@ -53,6 +53,8 @@
 
 extern __u32 secure_ip_id(__u32 daddr);
 extern u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport);
+extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, 
+				       __u16 dport);
 extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
 					__u16 sport, __u16 dport);
 extern __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr,
diff -Nru a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
--- a/net/ipv4/tcp_ipv4.c	2005-01-18 14:06:54 -08:00
+++ b/net/ipv4/tcp_ipv4.c	2005-01-18 14:06:54 -08:00
@@ -2663,4 +2663,5 @@
 EXPORT_SYMBOL(sysctl_local_port_range);
 EXPORT_SYMBOL(sysctl_max_syn_backlog);
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
+EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
 
diff -Nru a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
--- a/net/ipv6/tcp_ipv6.c	2005-01-18 14:06:54 -08:00
+++ b/net/ipv6/tcp_ipv6.c	2005-01-18 14:06:54 -08:00
@@ -441,21 +441,22 @@
 	}
 }
 
-static int tcp_v6_check_established(struct sock *sk)
+static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
+				      struct tcp_tw_bucket **twp)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *daddr = &np->rcv_saddr;
 	struct in6_addr *saddr = &np->daddr;
 	int dif = sk->sk_bound_dev_if;
-	u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
+	u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
 	int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 	struct sock *sk2;
 	struct hlist_node *node;
 	struct tcp_tw_bucket *tw;
 
-	write_lock_bh(&head->lock);
+	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
@@ -468,7 +469,10 @@
 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 			struct tcp_sock *tp = tcp_sk(sk);
 
-			if (tw->tw_ts_recent_stamp) {
+			if (tw->tw_ts_recent_stamp &&
+			    (!twp || (sysctl_tcp_tw_reuse &&
+				      xtime.tv_sec - 
+				      tw->tw_ts_recent_stamp > 1))) {
 				/* See comment in tcp_ipv4.c */
 				tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 				if (!tp->write_seq)
@@ -494,40 +498,113 @@
 	__sk_add_node(sk, &head->chain);
 	sk->sk_hashent = hash;
 	sock_prot_inc_use(sk->sk_prot);
-	write_unlock_bh(&head->lock);
+	write_unlock(&head->lock);
 
-	if (tw) {
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
 		/* Silly. Should hash-dance instead... */
-		local_bh_disable();
 		tcp_tw_deschedule(tw);
 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-		local_bh_enable();
 
 		tcp_tw_put(tw);
 	}
 	return 0;
 
 not_unique:
-	write_unlock_bh(&head->lock);
+	write_unlock(&head->lock);
 	return -EADDRNOTAVAIL;
 }
 
-static int tcp_v6_hash_connect(struct sock *sk)
+static inline u32 tcpv6_port_offset(const struct sock *sk)
 {
-	struct tcp_bind_hashbucket *head;
-	struct tcp_bind_bucket *tb;
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 
-	/* XXX */
-	if (inet_sk(sk)->num == 0) { 
-		int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
-		if (err)
-			return err;
-		inet_sk(sk)->sport = htons(inet_sk(sk)->num);
-	}
+	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+					   np->daddr.s6_addr32,
+					   inet->dport);
+}
 
-	head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
-	tb = tb_head(head);
+static int tcp_v6_hash_connect(struct sock *sk)
+{
+	unsigned short snum = inet_sk(sk)->num;
+ 	struct tcp_bind_hashbucket *head;
+ 	struct tcp_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + tcpv6_port_offset(sk);
+		struct hlist_node *node;
+ 		struct tcp_tw_bucket *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &tcp_bhash[tcp_bhashfn(port)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			tb_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__tcp_v6_check_established(sk,
+									port,
+									&tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = tcp_bucket_create(head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		tcp_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__tcp_v6_hash(sk);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			tcp_tw_deschedule(tw);
+ 			tcp_tw_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
 
+ 	head  = &tcp_bhash[tcp_bhashfn(snum)];
+ 	tb  = tcp_sk(sk)->bind_hash;
 	spin_lock_bh(&head->lock);
 
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -535,8 +612,12 @@
 		spin_unlock_bh(&head->lock);
 		return 0;
 	} else {
-		spin_unlock_bh(&head->lock);
-		return tcp_v6_check_established(sk);
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __tcp_v6_check_established(sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
 	}
 }
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Tcp port selection for IPV6.
  2005-01-21  0:45 [PATCH] Tcp port selection for IPV6 Stephen Hemminger
@ 2005-01-21  0:53 ` Arnaldo Carvalho de Melo
  2005-01-26  6:25 ` David S. Miller
  2005-02-09  4:38 ` David S. Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Arnaldo Carvalho de Melo @ 2005-01-21  0:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David S. Miller, YOSHIFUJI Hideaki / _$B5HF#1QL@, netdev

On Thu, 20 Jan 2005 16:45:29 -0800, Stephen Hemminger
<shemminger@osdl.org> wrote:

> Now the code in ipv4/tcp_ipv4.c and ipv6/tcp_ipv6.c are
> almost identical for tcp_hash_connect.

Humm,  dccp_hash_connect is almost identical to tcp_hash_connect too...  /me
adds an entry to his TODO list... :-)

- Arnaldo

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Tcp port selection for IPV6.
  2005-01-21  0:45 [PATCH] Tcp port selection for IPV6 Stephen Hemminger
  2005-01-21  0:53 ` Arnaldo Carvalho de Melo
@ 2005-01-26  6:25 ` David S. Miller
  2005-02-09  4:38 ` David S. Miller
  2 siblings, 0 replies; 4+ messages in thread
From: David S. Miller @ 2005-01-26  6:25 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: yoshfuji, netdev

On Thu, 20 Jan 2005 16:45:29 -0800
Stephen Hemminger <shemminger@osdl.org> wrote:

> This patch makes TCP over IPV6 select ports the same way the current
> TCPv4 code does. It uses a hash function to provide a starting offset
> and a free running counter to provide seed.
> 
> This changes the port selection semantics to match TCPv4 as well.
> If the port is in use but to a different remote address, it will get
> reused. It looks like the TCPv6 code was not updated when the TCPv4
> code changed. Now the code in ipv4/tcp_ipv4.c and ipv6/tcp_ipv6.c are
> almost identical for tcp_hash_connect.
> 
> Signed-off-by: Stephen Hemminger <shemminger@osdl.org>

Looks good, sorry for taking so long to review.

I think I'll push this into my 2.6.12 pending queue since 2.6.11
is very much in a bug-fix-only type mode.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Tcp port selection for IPV6.
  2005-01-21  0:45 [PATCH] Tcp port selection for IPV6 Stephen Hemminger
  2005-01-21  0:53 ` Arnaldo Carvalho de Melo
  2005-01-26  6:25 ` David S. Miller
@ 2005-02-09  4:38 ` David S. Miller
  2 siblings, 0 replies; 4+ messages in thread
From: David S. Miller @ 2005-02-09  4:38 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: yoshfuji, netdev

On Thu, 20 Jan 2005 16:45:29 -0800
Stephen Hemminger <shemminger@osdl.org> wrote:

> This patch makes TCP over IPV6 select ports the same way the current
> TCPv4 code does. It uses a hash function to provide a starting offset
> and a free running counter to provide seed.
> 
> This changes the port selection semantics to match TCPv4 as well.
> If the port is in use but to a different remote address, it will get
> reused. It looks like the TCPv6 code was not updated when the TCPv4
> code changed. Now the code in ipv4/tcp_ipv4.c and ipv6/tcp_ipv6.c are
> almost identical for tcp_hash_connect.
> 
> Signed-off-by: Stephen Hemminger <shemminger@osdl.org>

I've tossed this into my 2.6.12-pending tree.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-02-09  4:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-01-21  0:45 [PATCH] Tcp port selection for IPV6 Stephen Hemminger
2005-01-21  0:53 ` Arnaldo Carvalho de Melo
2005-01-26  6:25 ` David S. Miller
2005-02-09  4:38 ` David S. Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).