* [PATCH net-next 1/8] UDP: introduce a udp_hashfn function
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
@ 2008-06-16 9:38 ` Pavel Emelyanov
2008-06-16 9:40 ` [PATCH net-next 2/8] UDP: provide a struct net pointer for __udp[46]_lib_mcast_deliver Pavel Emelyanov
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:38 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Currently the chain to store a UDP socket is calculated with
simple (x & (UDP_HTABLE_SIZE - 1)). But taking net into account
would make this calculation a bit more complex, so moving it into
a function would help.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/udp.h | 5 +++++
net/ipv4/udp.c | 12 ++++++------
net/ipv6/udp.c | 4 ++--
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 581ca2c..9c94312 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -46,6 +46,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
#define UDP_HTABLE_SIZE 128
+static inline int udp_hashfn(const unsigned num)
+{
+ return num & (UDP_HTABLE_SIZE - 1);
+}
+
struct udp_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 56fcda3..34818c2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -136,7 +136,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
struct sock *sk;
struct hlist_node *node;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+ sk_for_each(sk, node, &udptable[udp_hashfn(num)])
if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
return 1;
return 0;
@@ -176,7 +176,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++) {
int size = 0;
- head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(rover)];
if (hlist_empty(head))
goto gotit;
@@ -213,7 +213,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
snum = rover;
} else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(snum)];
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
@@ -229,7 +229,7 @@ gotit:
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(snum)];
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
@@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -1070,7 +1070,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dd30962..61d3c99 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -67,7 +67,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -363,7 +363,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk) {
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 2/8] UDP: provide a struct net pointer for __udp[46]_lib_mcast_deliver
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
2008-06-16 9:38 ` [PATCH net-next 1/8] UDP: introduce a udp_hashfn function Pavel Emelyanov
@ 2008-06-16 9:40 ` Pavel Emelyanov
2008-06-16 9:41 ` [PATCH net-next 3/8] UDP: add struct net argument to udp_hashfn Pavel Emelyanov
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:40 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
They both calculate the hash chain, but currently do not have
a struct net pointer, so pass one there via additional argument,
all the more so their callers already have such.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
net/ipv4/udp.c | 9 ++++++---
net/ipv6/udp.c | 12 ++++++++----
2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 34818c2..86755e0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1061,7 +1061,7 @@ drop:
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
+static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
@@ -1158,6 +1158,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
struct rtable *rt = (struct rtable*)skb->dst;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
+ struct net *net;
/*
* Validate the packet.
@@ -1179,10 +1180,12 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
+ net = dev_net(skb->dev);
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
- sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
+ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 61d3c99..e25fe4b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -355,8 +355,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
- struct in6_addr *daddr, struct hlist_head udptable[])
+static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ struct hlist_head udptable[])
{
struct sock *sk, *sk2;
const struct udphdr *uh = udp_hdr(skb);
@@ -437,6 +438,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
struct net_device *dev = skb->dev;
struct in6_addr *saddr, *daddr;
u32 ulen = 0;
+ struct net *net;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto short_packet;
@@ -471,11 +473,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp6_csum_init(skb, uh, proto))
goto discard;
+ net = dev_net(skb->dev);
/*
* Multicast receive code
*/
if (ipv6_addr_is_multicast(daddr))
- return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable);
+ return __udp6_lib_mcast_deliver(net, skb,
+ saddr, daddr, udptable);
/* Unicast */
@@ -483,7 +487,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
* check socket cache ... must talk to Alan about his plans
* for sock caches... i'll skip this for now.
*/
- sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source,
+ sk = __udp6_lib_lookup(net, saddr, uh->source,
daddr, uh->dest, inet6_iif(skb), udptable);
if (sk == NULL) {
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 3/8] UDP: add struct net argument to udp_hashfn
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
2008-06-16 9:38 ` [PATCH net-next 1/8] UDP: introduce a udp_hashfn function Pavel Emelyanov
2008-06-16 9:40 ` [PATCH net-next 2/8] UDP: provide a struct net pointer for __udp[46]_lib_mcast_deliver Pavel Emelyanov
@ 2008-06-16 9:41 ` Pavel Emelyanov
2008-06-16 9:43 ` [PATCH net-next 4/8] Inet: add struct net argument to inet_bhashfn Pavel Emelyanov
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:41 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Every caller already has this one. The new argument is currently
unused, but this will be fixed shortly.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/udp.h | 2 +-
net/ipv4/udp.c | 12 ++++++------
net/ipv6/udp.c | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 9c94312..3deccac 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -46,7 +46,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
#define UDP_HTABLE_SIZE 128
-static inline int udp_hashfn(const unsigned num)
+static inline int udp_hashfn(struct net *net, const unsigned num)
{
return num & (UDP_HTABLE_SIZE - 1);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 86755e0..8b96189 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -136,7 +136,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
struct sock *sk;
struct hlist_node *node;
- sk_for_each(sk, node, &udptable[udp_hashfn(num)])
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
return 1;
return 0;
@@ -176,7 +176,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++) {
int size = 0;
- head = &udptable[udp_hashfn(rover)];
+ head = &udptable[udp_hashfn(net, rover)];
if (hlist_empty(head))
goto gotit;
@@ -213,7 +213,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
snum = rover;
} else {
- head = &udptable[udp_hashfn(snum)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
@@ -229,7 +229,7 @@ gotit:
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- head = &udptable[udp_hashfn(snum)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
@@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -1070,7 +1070,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
+ sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e25fe4b..6b962b0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -67,7 +67,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -364,7 +364,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
+ sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk) {
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 4/8] Inet: add struct net argument to inet_bhashfn
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (2 preceding siblings ...)
2008-06-16 9:41 ` [PATCH net-next 3/8] UDP: add struct net argument to udp_hashfn Pavel Emelyanov
@ 2008-06-16 9:43 ` Pavel Emelyanov
2008-06-16 9:44 ` [PATCH net-next 5/8] Inet: add struct net argument to inet_lhashfn Pavel Emelyanov
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:43 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Binding to some port in many namespaces may create too long
chains in bhash-es, so prepare the hashfn to take struct net
into account.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_hashtables.h | 3 ++-
net/ipv4/inet_connection_sock.c | 6 ++++--
net/ipv4/inet_hashtables.c | 11 +++++++----
net/ipv4/inet_timewait_sock.c | 6 ++++--
4 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 735b926..61dd331 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -201,7 +201,8 @@ extern struct inet_bind_bucket *
extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+static inline int inet_bhashfn(struct net *net,
+ const __u16 lport, const int bhash_size)
{
return lport & (bhash_size - 1);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea21..5c74f99 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
rover = net_random() % remaining + low;
do {
- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, rover,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == rover)
@@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
*/
snum = rover;
} else {
- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, snum,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == snum)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2023d37..dc1b78d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
static void __inet_put_port(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
+ hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port);
void __inet_inherit_port(struct sock *sk, struct sock *child)
{
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
@@ -493,7 +496,7 @@ ok:
goto out;
}
- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ce16e9a..06006a5 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
write_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
@@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
BUG_TRAP(icsk->icsk_bind_hash);
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 5/8] Inet: add struct net argument to inet_lhashfn
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (3 preceding siblings ...)
2008-06-16 9:43 ` [PATCH net-next 4/8] Inet: add struct net argument to inet_bhashfn Pavel Emelyanov
@ 2008-06-16 9:44 ` Pavel Emelyanov
2008-06-16 9:47 ` [PATCH net-next 6/8] Inet: add struct net argument to inet_ehashfn Pavel Emelyanov
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:44 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Listening-on-one-port sockets in many namespaces produce long
chains in the listening_hash-es, so prepare the inet_lhashfn to
take struct net into account.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_hashtables.h | 4 ++--
net/ipv4/inet_hashtables.c | 2 +-
net/ipv6/inet6_hashtables.c | 3 ++-
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 61dd331..26336cd 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -211,14 +211,14 @@ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum);
/* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(const unsigned short num)
+static inline int inet_lhashfn(struct net *net, const unsigned short num)
{
return num & (INET_LHTABLE_SIZE - 1);
}
static inline int inet_sk_listen_hashfn(const struct sock *sk)
{
- return inet_lhashfn(inet_sk(sk)->num);
+ return inet_lhashfn(sock_net(sk), inet_sk(sk)->num);
}
/* Caller must disable local BH processing. */
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index dc1b78d..4f597b3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -194,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net,
const struct hlist_head *head;
read_lock(&hashinfo->lhash_lock);
- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+ head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
if (!hlist_empty(head)) {
const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 580014a..b940156 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -104,7 +104,8 @@ struct sock *inet6_lookup_listener(struct net *net,
int score, hiscore = 0;
read_lock(&hashinfo->lhash_lock);
- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+ sk_for_each(sk, node,
+ &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
sk->sk_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 6/8] Inet: add struct net argument to inet_ehashfn
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (4 preceding siblings ...)
2008-06-16 9:44 ` [PATCH net-next 5/8] Inet: add struct net argument to inet_lhashfn Pavel Emelyanov
@ 2008-06-16 9:47 ` Pavel Emelyanov
2008-06-16 9:48 ` [PATCH net-next 7/8] Inet6: add struct net argument to inet6_ehashfn Pavel Emelyanov
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:47 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Although this hash takes addresses into account, the ehash chains
can also be too long when, for instance, communications via lo occur.
So, prepare the inet_hashfn to take struct net into account.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_sock.h | 6 ++++--
net/ipv4/inet_hashtables.c | 6 +++---
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a42cd63..ab8e19d 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -171,7 +171,8 @@ extern int inet_sk_rebuild_header(struct sock *sk);
extern u32 inet_ehash_secret;
extern void build_ehash_secret(void);
-static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
+static inline unsigned int inet_ehashfn(struct net *net,
+ const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
return jhash_3words((__force __u32) laddr,
@@ -187,8 +188,9 @@ static inline int inet_sk_ehashfn(const struct sock *sk)
const __u16 lport = inet->num;
const __be32 faddr = inet->daddr;
const __be16 fport = inet->dport;
+ struct net *net = sock_net(sk);
- return inet_ehashfn(laddr, lport, faddr, fport);
+ return inet_ehashfn(net, laddr, lport, faddr, fport);
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4f597b3..eca5899 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -227,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net,
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
@@ -267,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct net *net = sock_net(sk);
+ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 7/8] Inet6: add struct net argument to inet6_ehashfn
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (5 preceding siblings ...)
2008-06-16 9:47 ` [PATCH net-next 6/8] Inet: add struct net argument to inet_ehashfn Pavel Emelyanov
@ 2008-06-16 9:48 ` Pavel Emelyanov
2008-06-16 9:51 ` [PATCH net-next 8/8] Netns: introduce the net_hash_mix "salt" for hashes Pavel Emelyanov
2008-06-17 0:14 ` [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces David Miller
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:48 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
Same as for inet_hashfn, prepare its ipv6 incarnation.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet6_hashtables.h | 7 +++++--
net/ipv6/inet6_hashtables.c | 6 +++---
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 62a5b69..72f13a9 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -28,7 +28,8 @@
struct inet_hashinfo;
/* I have no idea if this is a good hash for v6 or not. -DaveM */
-static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
+static inline unsigned int inet6_ehashfn(struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
u32 ports = (lport ^ (__force u16)fport);
@@ -46,7 +47,9 @@ static inline int inet6_sk_ehashfn(const struct sock *sk)
const struct in6_addr *faddr = &np->daddr;
const __u16 lport = inet->num;
const __be16 fport = inet->dport;
- return inet6_ehashfn(laddr, lport, faddr, fport);
+ struct net *net = sock_net(sk);
+
+ return inet6_ehashfn(net, laddr, lport, faddr, fport);
}
extern void __inet6_hash(struct sock *sk);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b940156..a9cc8ab 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -68,7 +68,7 @@ struct sock *__inet6_lookup_established(struct net *net,
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
@@ -166,14 +166,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *saddr = &np->daddr;
const int dif = sk->sk_bound_dev_if;
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
+ struct net *net = sock_net(sk);
+ const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next 8/8] Netns: introduce the net_hash_mix "salt" for hashes
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (6 preceding siblings ...)
2008-06-16 9:48 ` [PATCH net-next 7/8] Inet6: add struct net argument to inet6_ehashfn Pavel Emelyanov
@ 2008-06-16 9:51 ` Pavel Emelyanov
2008-06-17 0:14 ` [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces David Miller
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2008-06-16 9:51 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
There are many possible ways to add this "salt", thus I made this
patch to be the last in the series to change it if required.
Currently I propose to use the struct net pointer itself as this
salt, but since this pointer is most often cache-line aligned, shift
this right to eliminate the bits, that are most often zeroed.
After this, simply add this mix to prepared hashfn-s.
For CONFIG_NET_NS=n case this salt is 0 and no changes in hashfn
appear.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/udp.h | 3 ++-
include/net/inet6_hashtables.h | 3 ++-
include/net/inet_hashtables.h | 5 +++--
include/net/inet_sock.h | 3 ++-
include/net/netns/hash.h | 21 +++++++++++++++++++++
5 files changed, 30 insertions(+), 5 deletions(-)
create mode 100644 include/net/netns/hash.h
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 3deccac..0cf5c4c 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -38,6 +38,7 @@ struct udphdr {
#ifdef __KERNEL__
#include <net/inet_sock.h>
#include <linux/skbuff.h>
+#include <net/netns/hash.h>
static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
{
@@ -48,7 +49,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
static inline int udp_hashfn(struct net *net, const unsigned num)
{
- return num & (UDP_HTABLE_SIZE - 1);
+ return (num + net_hash_mix(net)) & (UDP_HTABLE_SIZE - 1);
}
struct udp_sock {
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 72f13a9..e48989f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -24,6 +24,7 @@
#include <net/inet_sock.h>
#include <net/ipv6.h>
+#include <net/netns/hash.h>
struct inet_hashinfo;
@@ -36,7 +37,7 @@ static inline unsigned int inet6_ehashfn(struct net *net,
return jhash_3words((__force u32)laddr->s6_addr32[3],
(__force u32)faddr->s6_addr32[3],
- ports, inet_ehash_secret);
+ ports, inet_ehash_secret + net_hash_mix(net));
}
static inline int inet6_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 26336cd..bb619d8 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -29,6 +29,7 @@
#include <net/inet_sock.h>
#include <net/sock.h>
#include <net/tcp_states.h>
+#include <net/netns/hash.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@@ -204,7 +205,7 @@ extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
static inline int inet_bhashfn(struct net *net,
const __u16 lport, const int bhash_size)
{
- return lport & (bhash_size - 1);
+ return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
@@ -213,7 +214,7 @@ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
/* These can have wildcards, don't try too hard. */
static inline int inet_lhashfn(struct net *net, const unsigned short num)
{
- return num & (INET_LHTABLE_SIZE - 1);
+ return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
}
static inline int inet_sk_listen_hashfn(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ab8e19d..508fb95 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -25,6 +25,7 @@
#include <net/sock.h>
#include <net/request_sock.h>
#include <net/route.h>
+#include <net/netns/hash.h>
/** struct ip_options - IP Options
*
@@ -178,7 +179,7 @@ static inline unsigned int inet_ehashfn(struct net *net,
return jhash_3words((__force __u32) laddr,
(__force __u32) faddr,
((__u32) lport) << 16 | (__force __u32)fport,
- inet_ehash_secret);
+ inet_ehash_secret + net_hash_mix(net));
}
static inline int inet_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
new file mode 100644
index 0000000..548d78f
--- /dev/null
+++ b/include/net/netns/hash.h
@@ -0,0 +1,21 @@
+#ifndef __NET_NS_HASH_H__
+#define __NET_NS_HASH_H__
+
+#include <asm/cache.h>
+
+struct net;
+
+static inline unsigned net_hash_mix(struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ /*
+ * shift this right to eliminate bits, that are
+ * always zeroed
+ */
+
+ return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
+#else
+ return 0;
+#endif
+}
+#endif
--
1.5.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces
2008-06-16 9:35 [PATCH net-next 0/8] netns: optimize tcp and udp hashtables wrt net namespaces Pavel Emelyanov
` (7 preceding siblings ...)
2008-06-16 9:51 ` [PATCH net-next 8/8] Netns: introduce the net_hash_mix "salt" for hashes Pavel Emelyanov
@ 2008-06-17 0:14 ` David Miller
8 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2008-06-17 0:14 UTC (permalink / raw)
To: xemul; +Cc: netdev
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 13:35:32 +0400
> Currently hash functions to store udp sockets, bind buckets,
> listening and established tcp sockets take only ports and/or
> addresses into account.
>
> The result is nasty. Consider processes in different namespaces
> communicate to each other or the outer world with same ports
> or addresses (e.g. dns server run in each namespace, apache server
> listening a 80 port in each ns or communications via lo). All
> these sockets will all be linked in a single hash chain, thus
> producing too long chains.
>
> To avoid this situation the proposal is to add some "salt" to
> hash functions taking the namespace into account.
>
> I made this "salt" disappear for CONFIG_NET_NS=n case.
>
> Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Looks great. I've applied this to net-next-2.6 and will push
back out to kernel.org after some build testing.
Thanks!
^ permalink raw reply [flat|nested] 10+ messages in thread