From: Corey Minyard <minyard@acm.org>
To: Linux Kernel <linux-kernel@vger.kernel.org>, netdev@vger.kernel.org
Cc: shemminger@vyatta.com, paulmck@linux.vnet.ibm.com
Subject: [PATCH 3/3] Convert the UDP hash lock to RCU
Date: Mon, 06 Oct 2008 13:50:26 -0500 [thread overview]
Message-ID: <20081006185026.GA10383@minyard.local> (raw)
Change the UDP hash lock from an rwlock to RCU.
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
include/net/udp.h | 9 +++++----
net/ipv4/udp.c | 47 +++++++++++++++++++++++++++--------------------
net/ipv6/udp.c | 17 +++++++++--------
3 files changed, 41 insertions(+), 32 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index addcdc6..35aa104 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -51,7 +51,7 @@ struct udp_skb_cb {
#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
-extern rwlock_t udp_hash_lock;
+extern spinlock_t udp_hash_wlock;
/* Note: this must match 'valbool' in sock_setsockopt */
@@ -112,12 +112,13 @@ static inline void udp_lib_hash(struct sock *sk)
static inline void udp_lib_unhash(struct sock *sk)
{
- write_lock_bh(&udp_hash_lock);
- if (sk_del_node_init(sk)) {
+ spin_lock_bh(&udp_hash_wlock);
+ if (sk_del_node_init_rcu(sk)) {
inet_sk(sk)->num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
- write_unlock_bh(&udp_hash_lock);
+ spin_unlock_bh(&udp_hash_wlock);
+ synchronize_rcu();
}
static inline void udp_lib_close(struct sock *sk, long timeout)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 57e26fa..1b65cb6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -112,7 +112,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
EXPORT_SYMBOL(udp_stats_in6);
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
-DEFINE_RWLOCK(udp_hash_lock);
+DEFINE_SPINLOCK(udp_hash_wlock);
+EXPORT_SYMBOL(udp_hash_wlock);
int sysctl_udp_mem[3] __read_mostly;
int sysctl_udp_rmem_min __read_mostly;
@@ -155,7 +156,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int error = 1;
struct net *net = sock_net(sk);
- write_lock_bh(&udp_hash_lock);
+ spin_lock_bh(&udp_hash_wlock);
if (!snum) {
int i, low, high, remaining;
@@ -225,12 +226,12 @@ gotit:
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
head = &udptable[udp_hashfn(net, snum)];
- sk_add_node(sk, head);
+ sk_add_node_rcu(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
error = 0;
fail:
- write_unlock_bh(&udp_hash_lock);
+ spin_unlock_bh(&udp_hash_wlock);
return error;
}
@@ -260,8 +261,8 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned short hnum = ntohs(dport);
int badness = -1;
- read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
+ rcu_read_lock();
+ sk_for_each_rcu(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -296,9 +297,17 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
}
}
}
+ /*
+ * Note that this is safe, even with an RCU lock.
+ * udp_lib_unhash() is the removal function, it calls
+ * synchronize_rcu() and the socket counter cannot go to
+ * zero until it returns. So if we increment it inside the
+ * RCU read lock, it should never go to zero and then be
+ * incremented again.
+ */
if (result)
sock_hold(result);
- read_unlock(&udp_hash_lock);
+ rcu_read_unlock();
return result;
}
@@ -311,7 +320,7 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk,
struct sock *s = sk;
unsigned short hnum = ntohs(loc_port);
- sk_for_each_from(s, node) {
+ sk_for_each_from_rcu(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
@@ -1094,8 +1103,8 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sock *sk;
int dif;
- read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
+ rcu_read_lock();
+ sk = sk_head_rcu(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
@@ -1104,8 +1113,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
+ sknext = udp_v4_mcast_next(sk_next_rcu(sk), uh->dest,
+ daddr, uh->source, saddr,
+ dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
@@ -1120,7 +1130,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
} while (sknext);
} else
kfree_skb(skb);
- read_unlock(&udp_hash_lock);
+ rcu_read_unlock();
return 0;
}
@@ -1543,13 +1553,13 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
struct net *net = seq_file_net(seq);
do {
- sk = sk_next(sk);
+ sk = sk_next_rcu(sk);
try_again:
;
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
- sk = sk_head(state->hashtable + state->bucket);
+ sk = sk_head_rcu(state->hashtable + state->bucket);
goto try_again;
}
return sk;
@@ -1566,9 +1576,8 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
}
static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(udp_hash_lock)
{
- read_lock(&udp_hash_lock);
+ rcu_read_lock();
return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
@@ -1586,9 +1595,8 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void udp_seq_stop(struct seq_file *seq, void *v)
- __releases(udp_hash_lock)
{
- read_unlock(&udp_hash_lock);
+ rcu_read_unlock();
}
static int udp_seq_open(struct inode *inode, struct file *file)
@@ -1732,7 +1740,6 @@ void __init udp_init(void)
EXPORT_SYMBOL(udp_disconnect);
EXPORT_SYMBOL(udp_hash);
-EXPORT_SYMBOL(udp_hash_lock);
EXPORT_SYMBOL(udp_ioctl);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a6aecf7..b807de7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -64,8 +64,8 @@ static struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
int badness = -1;
- read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
+ rcu_read_lock();
+ sk_for_each_rcu(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -101,9 +101,10 @@ static struct sock *__udp6_lib_lookup(struct net *net,
}
}
}
+ /* See comment in __udp4_lib_lookup on why this is safe. */
if (result)
sock_hold(result);
- read_unlock(&udp_hash_lock);
+ rcu_read_unlock();
return result;
}
@@ -322,7 +323,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
struct sock *s = sk;
unsigned short num = ntohs(loc_port);
- sk_for_each_from(s, node) {
+ sk_for_each_from_rcu(s, node) {
struct inet_sock *inet = inet_sk(s);
if (sock_net(s) != sock_net(sk))
@@ -365,8 +366,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct udphdr *uh = udp_hdr(skb);
int dif;
- read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
+ rcu_read_lock();
+ sk = sk_head_rcu(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk) {
@@ -375,7 +376,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
}
sk2 = sk;
- while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
+ while ((sk2 = udp_v6_mcast_next(sk_next_rcu(sk2), uh->dest, daddr,
uh->source, saddr, dif))) {
struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
if (buff) {
@@ -394,7 +395,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
out:
- read_unlock(&udp_hash_lock);
+ rcu_read_unlock();
return 0;
}
--
1.5.6.5
next reply other threads:[~2008-10-06 19:50 UTC|newest]
Thread overview: 134+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-06 18:50 Corey Minyard [this message]
2008-10-06 21:22 ` [PATCH 3/3] Convert the UDP hash lock to RCU Eric Dumazet
2008-10-06 21:40 ` David Miller
2008-10-06 23:08 ` Corey Minyard
2008-10-07 8:37 ` Evgeniy Polyakov
2008-10-07 14:16 ` Christoph Lameter
2008-10-07 14:29 ` Evgeniy Polyakov
2008-10-07 14:38 ` Christoph Lameter
2008-10-07 14:33 ` Paul E. McKenney
2008-10-07 14:45 ` Christoph Lameter
2008-10-07 15:07 ` Eric Dumazet
2008-10-07 15:07 ` Paul E. McKenney
2008-10-07 5:24 ` Eric Dumazet
2008-10-07 8:54 ` Benny Amorsen
2008-10-07 12:59 ` Eric Dumazet
2008-10-07 14:07 ` Stephen Hemminger
2008-10-07 20:55 ` David Miller
2008-10-07 21:20 ` Stephen Hemminger
2008-10-08 13:55 ` Eric Dumazet
2008-10-08 18:45 ` David Miller
2008-10-28 20:37 ` [PATCH 1/2] udp: introduce struct udp_table and multiple rwlocks Eric Dumazet
2008-10-28 21:23 ` Christian Bell
2008-10-28 21:31 ` Evgeniy Polyakov
2008-10-28 21:48 ` Eric Dumazet
2008-10-28 21:28 ` Evgeniy Polyakov
2008-10-28 20:42 ` [PATCH 2/2] udp: RCU handling for Unicast packets Eric Dumazet
2008-10-28 22:45 ` Eric Dumazet
2008-10-29 5:05 ` David Miller
2008-10-29 8:23 ` Eric Dumazet
2008-10-29 8:56 ` David Miller
2008-10-29 10:19 ` Eric Dumazet
2008-10-29 18:19 ` David Miller
2008-10-29 9:04 ` Eric Dumazet
2008-10-29 9:17 ` David Miller
2008-10-29 13:17 ` Corey Minyard
2008-10-29 14:36 ` Eric Dumazet
2008-10-29 15:34 ` Corey Minyard
2008-10-29 16:09 ` Eric Dumazet
2008-10-29 16:37 ` Paul E. McKenney
2008-10-29 17:22 ` Corey Minyard
2008-10-29 17:45 ` Eric Dumazet
2008-10-29 18:28 ` Corey Minyard
2008-10-29 18:52 ` Paul E. McKenney
2008-10-29 20:00 ` Eric Dumazet
2008-10-29 20:17 ` Paul E. McKenney
2008-10-29 21:29 ` Corey Minyard
2008-10-29 21:57 ` Eric Dumazet
2008-10-29 21:58 ` Paul E. McKenney
2008-10-29 22:08 ` Eric Dumazet
2008-10-30 3:22 ` Corey Minyard
2008-10-30 5:50 ` Eric Dumazet
2008-11-02 4:19 ` David Miller
2008-10-30 5:40 ` David Miller
2008-10-30 5:51 ` Eric Dumazet
2008-10-30 7:04 ` Eric Dumazet
2008-10-30 7:05 ` David Miller
2008-10-30 15:40 ` [PATCH] udp: Introduce special NULL pointers for hlist termination Eric Dumazet
2008-10-30 15:51 ` Stephen Hemminger
2008-10-30 16:28 ` Corey Minyard
2008-10-31 14:37 ` Eric Dumazet
2008-10-31 14:55 ` Pavel Emelyanov
2008-11-02 4:22 ` David Miller
2008-10-30 17:12 ` Eric Dumazet
2008-10-31 7:51 ` David Miller
2008-10-30 16:01 ` Peter Zijlstra
2008-10-31 0:14 ` Keith Owens
2008-11-13 13:13 ` [PATCH 0/3] net: RCU lookups for UDP, DCCP and TCP protocol Eric Dumazet
2008-11-13 17:20 ` Andi Kleen
2008-11-17 3:41 ` David Miller
2008-11-19 19:52 ` Christoph Lameter
2008-11-13 13:14 ` [PATCH 1/3] rcu: Introduce hlist_nulls variant of hlist Eric Dumazet
2008-11-13 13:29 ` Peter Zijlstra
2008-11-13 13:44 ` Eric Dumazet
2008-11-13 16:02 ` [PATCH 4/3] rcu: documents rculist_nulls Eric Dumazet
2008-11-14 15:16 ` Peter Zijlstra
2008-11-17 3:36 ` David Miller
2008-11-19 17:07 ` Paul E. McKenney
2008-11-14 15:16 ` [PATCH 1/3] rcu: Introduce hlist_nulls variant of hlist Peter Zijlstra
2008-11-19 17:01 ` Paul E. McKenney
2008-11-19 17:53 ` Eric Dumazet
2008-11-19 18:46 ` Paul E. McKenney
2008-11-19 18:53 ` Arnaldo Carvalho de Melo
2008-11-19 21:17 ` Paul E. McKenney
2008-11-19 20:39 ` Eric Dumazet
2008-11-19 21:21 ` Paul E. McKenney
2008-11-13 13:15 ` [PATCH 2/3] udp: Use hlist_nulls in UDP RCU code Eric Dumazet
2008-11-19 17:29 ` Paul E. McKenney
2008-11-19 17:53 ` Eric Dumazet
2008-11-13 13:15 ` [PATCH 3/3] net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls Eric Dumazet
2008-11-13 13:34 ` Peter Zijlstra
2008-11-13 13:51 ` Eric Dumazet
2008-11-13 14:08 ` Christoph Lameter
2008-11-13 14:22 ` Peter Zijlstra
2008-11-13 14:27 ` Christoph Lameter
2008-11-19 17:53 ` Paul E. McKenney
2008-11-23 9:33 ` [PATCH] net: Convert TCP/DCCP listening hash tables to use RCU Eric Dumazet
2008-11-23 15:59 ` Paul E. McKenney
2008-11-23 18:42 ` Eric Dumazet
2008-11-23 19:17 ` Paul E. McKenney
2008-11-23 20:18 ` Eric Dumazet
2008-11-23 22:33 ` Paul E. McKenney
2008-11-24 1:23 ` David Miller
2008-10-30 11:04 ` [PATCH 2/2] udp: RCU handling for Unicast packets Peter Zijlstra
2008-10-30 11:30 ` Eric Dumazet
2008-10-30 18:25 ` Paul E. McKenney
2008-10-31 16:40 ` Eric Dumazet
2008-11-01 3:10 ` Paul E. McKenney
2008-10-29 17:32 ` Eric Dumazet
2008-10-29 18:11 ` Paul E. McKenney
2008-10-29 18:29 ` David Miller
2008-10-29 18:38 ` Paul E. McKenney
2008-10-29 18:36 ` Eric Dumazet
2008-10-29 18:20 ` David Miller
2008-10-30 11:12 ` Peter Zijlstra
2008-10-30 11:29 ` Eric Dumazet
2008-10-28 20:37 ` [PATCH 0/2] udp: Convert the UDP hash lock to RCU Eric Dumazet
2008-10-28 21:28 ` Stephen Hemminger
2008-10-28 21:50 ` Eric Dumazet
2008-10-07 16:43 ` [PATCH 3/3] " Corey Minyard
2008-10-07 18:26 ` David Miller
2008-10-08 8:35 ` Eric Dumazet
2008-10-08 16:38 ` David Miller
2008-10-07 8:31 ` Peter Zijlstra
2008-10-07 14:36 ` Paul E. McKenney
2008-10-07 18:29 ` David Miller
2008-10-06 22:07 ` Corey Minyard
2008-10-07 8:17 ` Peter Zijlstra
2008-10-07 9:24 ` Eric Dumazet
2008-10-07 14:15 ` Christoph Lameter
2008-10-07 14:38 ` Paul E. McKenney
2008-10-07 14:50 ` Eric Dumazet
2008-10-07 15:05 ` Paul E. McKenney
2008-10-07 15:09 ` Peter Zijlstra
2008-10-07 15:23 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081006185026.GA10383@minyard.local \
--to=minyard@acm.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=shemminger@vyatta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.