All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: kapil dakhane <kdakhane@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org, netfilter@vger.kernel.org,
	Evgeniy Polyakov <zbr@ioremap.net>
Subject: [PATCH 1/2] tcp: Fix a connect() race with timewait sockets
Date: Fri, 04 Dec 2009 14:46:54 +0100	[thread overview]
Message-ID: <4B1912CE.5040304@gmail.com> (raw)
In-Reply-To: <4B152F97.1090409@gmail.com>

First patch changes __inet_hash_nolisten() and __inet6_hash()
to get a timewait parameter to be able to unhash it from ehash
at same time the new socket is inserted in hash.

This makes sure timewait socket wont be found by a concurrent
writer in __inet_check_established()

Reported-by: kapil dakhane <kdakhane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/inet6_hashtables.h |    2 +-
 include/net/inet_hashtables.h  |    8 +++++---
 net/dccp/ipv4.c                |    2 +-
 net/dccp/ipv6.c                |    4 ++--
 net/ipv4/inet_hashtables.c     |   22 ++++++++++++++++------
 net/ipv4/tcp_ipv4.c            |    2 +-
 net/ipv6/inet6_hashtables.c    |    8 +++++++-
 net/ipv6/tcp_ipv6.c            |    4 ++--
 8 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 92838d3..e46674d 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -53,7 +53,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk)
 	return inet6_ehashfn(net, laddr, lport, faddr, fport);
 }
 
-extern void __inet6_hash(struct sock *sk);
+extern int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp);
 
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 41cbddd..74358d1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -251,7 +251,7 @@ extern void inet_put_port(struct sock *sk);
 
 void inet_hashinfo_init(struct inet_hashinfo *h);
 
-extern void __inet_hash_nolisten(struct sock *sk);
+extern int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
 extern void inet_hash(struct sock *sk);
 extern void inet_unhash(struct sock *sk);
 
@@ -391,10 +391,12 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 }
 
 extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
-		struct sock *sk, u32 port_offset,
+		struct sock *sk,
+		u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
 			struct sock *, __u16, struct inet_timewait_sock **),
-			       void (*hash)(struct sock *sk));
+		int (*hash)(struct sock *sk, struct inet_timewait_sock *twp));
+
 extern int inet_hash_connect(struct inet_timewait_death_row *death_row,
 			     struct sock *sk);
 #endif /* _INET_HASHTABLES_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index efbcfdc..dad7bc4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
-	__inet_hash_nolisten(newsk);
+	__inet_hash_nolisten(newsk, NULL);
 	__inet_inherit_port(sk, newsk);
 
 	return newsk;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6574215..baf05cf 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -46,7 +46,7 @@ static void dccp_v6_hash(struct sock *sk)
 			return;
 		}
 		local_bh_disable();
-		__inet6_hash(sk);
+		__inet6_hash(sk, NULL);
 		local_bh_enable();
 	}
 }
@@ -644,7 +644,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
-	__inet6_hash(newsk);
+	__inet6_hash(newsk, NULL);
 	__inet_inherit_port(sk, newsk);
 
 	return newsk;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 21e5e32..c4201b7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -351,12 +351,13 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
 					  inet->inet_dport);
 }
 
-void __inet_hash_nolisten(struct sock *sk)
+int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct hlist_nulls_head *list;
 	spinlock_t *lock;
 	struct inet_ehash_bucket *head;
+	int twrefcnt = 0;
 
 	WARN_ON(!sk_unhashed(sk));
 
@@ -367,8 +368,13 @@ void __inet_hash_nolisten(struct sock *sk)
 
 	spin_lock(lock);
 	__sk_nulls_add_node_rcu(sk, list);
+	if (tw) {
+		WARN_ON(sk->sk_hash != tw->tw_hash);
+		twrefcnt = inet_twsk_unhash(tw);
+	}
 	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	return twrefcnt;
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
@@ -378,7 +384,7 @@ static void __inet_hash(struct sock *sk)
 	struct inet_listen_hashbucket *ilb;
 
 	if (sk->sk_state != TCP_LISTEN) {
-		__inet_hash_nolisten(sk);
+		__inet_hash_nolisten(sk, NULL);
 		return;
 	}
 
@@ -427,7 +433,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
 			struct sock *, __u16, struct inet_timewait_sock **),
-		void (*hash)(struct sock *sk))
+		int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	const unsigned short snum = inet_sk(sk)->inet_num;
@@ -435,6 +441,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 	struct inet_bind_bucket *tb;
 	int ret;
 	struct net *net = sock_net(sk);
+	int twrefcnt = 1;
 
 	if (!snum) {
 		int i, remaining, low, high, port;
@@ -493,13 +500,16 @@ ok:
 		inet_bind_hash(sk, tb, port);
 		if (sk_unhashed(sk)) {
 			inet_sk(sk)->inet_sport = htons(port);
-			hash(sk);
+			twrefcnt += hash(sk, tw);
 		}
 		spin_unlock(&head->lock);
 
 		if (tw) {
 			inet_twsk_deschedule(tw, death_row);
-			inet_twsk_put(tw);
+			while (twrefcnt) {
+				twrefcnt--;
+				inet_twsk_put(tw);
+			}
 		}
 
 		ret = 0;
@@ -510,7 +520,7 @@ ok:
 	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		hash(sk);
+		hash(sk, NULL);
 		spin_unlock_bh(&head->lock);
 		return 0;
 	} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 29002ab..15e9603 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1464,7 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
-	__inet_hash_nolisten(newsk);
+	__inet_hash_nolisten(newsk, NULL);
 	__inet_inherit_port(sk, newsk);
 
 	return newsk;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index c813e29..633a6c2 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -22,9 +22,10 @@
 #include <net/inet6_hashtables.h>
 #include <net/ip.h>
 
-void __inet6_hash(struct sock *sk)
+int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+	int twrefcnt = 0;
 
 	WARN_ON(!sk_unhashed(sk));
 
@@ -45,10 +46,15 @@ void __inet6_hash(struct sock *sk)
 		lock = inet_ehash_lockp(hashinfo, hash);
 		spin_lock(lock);
 		__sk_nulls_add_node_rcu(sk, list);
+		if (tw) {
+			WARN_ON(sk->sk_hash != tw->tw_hash);
+			twrefcnt = inet_twsk_unhash(tw);
+		}
 		spin_unlock(lock);
 	}
 
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	return twrefcnt;
 }
 EXPORT_SYMBOL(__inet6_hash);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index aadd7ce..ee9cf62 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -96,7 +96,7 @@ static void tcp_v6_hash(struct sock *sk)
 			return;
 		}
 		local_bh_disable();
-		__inet6_hash(sk);
+		__inet6_hash(sk, NULL);
 		local_bh_enable();
 	}
 }
@@ -1496,7 +1496,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
-	__inet6_hash(newsk);
+	__inet6_hash(newsk, NULL);
 	__inet_inherit_port(sk, newsk);
 
 	return newsk;


  parent reply	other threads:[~2009-12-04 13:46 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-01  2:02 soft lockup in inet_csk_get_port kapil dakhane
2009-12-01  6:10 ` Eric Dumazet
2009-12-03  4:13   ` kapil dakhane
2009-12-01 15:00 ` [PATCH] tcp: Fix a connect() race with timewait sockets Eric Dumazet
2009-12-02  8:59   ` David Miller
2009-12-02  9:23     ` Eric Dumazet
2009-12-02 10:33       ` Eric Dumazet
2009-12-02 11:32         ` Evgeniy Polyakov
2009-12-02 19:18           ` kapil dakhane
2009-12-03  2:43             ` kapil dakhane
2009-12-03 10:49               ` [PATCH] tcp: fix a timewait refcnt race Eric Dumazet
2009-12-04  0:19                 ` David Miller
2009-12-04  3:20                 ` kapil dakhane
2009-12-04  6:29                   ` Eric Dumazet
2009-12-04  6:39                     ` David Miller
2009-12-04  6:39                       ` David Miller
2009-12-02 15:08         ` [PATCH net-next-2.6] tcp: connect() race with timewait reuse Eric Dumazet
2009-12-02 22:15           ` Evgeniy Polyakov
2009-12-03  6:44             ` Eric Dumazet
2009-12-03  8:31               ` Eric Dumazet
2009-12-03 23:22                 ` Evgeniy Polyakov
2009-12-04  0:18                 ` David Miller
2009-12-02 16:05     ` [PATCH] tcp: Fix a connect() race with timewait sockets Ashwani Wason
2009-12-03  6:38       ` David Miller
2009-12-04 13:45   ` [PATCH 0/2] tcp: Fix connect() races " Eric Dumazet
2009-12-04 13:46   ` Eric Dumazet [this message]
2009-12-05 21:21     ` [PATCH 1/2] tcp: Fix a connect() race " Evgeniy Polyakov
2009-12-07  9:59       ` [PATCH] tcp: documents timewait refcnt tricks Eric Dumazet
2009-12-07 16:06         ` Randy Dunlap
2009-12-09  4:20           ` David Miller
2009-12-09  4:20             ` David Miller
2009-12-09  4:18     ` [PATCH 1/2] tcp: Fix a connect() race with timewait sockets David Miller
2009-12-04 13:47   ` [PATCH 2/2] " Eric Dumazet
2009-12-09  4:19     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B1912CE.5040304@gmail.com \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=kdakhane@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter@vger.kernel.org \
    --cc=zbr@ioremap.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.