From: Stephen Hemminger <shemminger@osdl.org>
To: "David S. Miller" <davem@redhat.com>
Cc: netdev@oss.sgi.com
Subject: [PATCH 2.5.69] IPV4/6 inetsw using RCU
Date: Thu, 8 May 2003 15:33:07 -0700 [thread overview]
Message-ID: <20030508153307.73d16f5e.shemminger@osdl.org> (raw)
This patch replaces the brlock with RCU for the IPV4 and IPV6 protocol
switch (inetsw) with Read Copy Update (RCU). This gets rid of one of
the last two uses of brlock in the kernel.
Tested on 8-way SMP and it is marginally faster when running on 1G Ethernet
using netperf.
It changes inetsw from a public variable to static since there is a well
define register/unregister interface. IPV6 unregister needs to be a seperate
function because it has a seperate lock.
This is a revised version of the earlier brlock removal patch, and addresses
all comments received so far. It is stable and works reliably, but
Dave, you may want to wait for any additional comments before applying.
diff -urNp -X dontdiff linux-2.5/include/net/protocol.h linux-2.5-nbr/include/net/protocol.h
--- linux-2.5/include/net/protocol.h 2003-04-17 09:05:10.000000000 -0700
+++ linux-2.5-nbr/include/net/protocol.h 2003-04-30 14:32:25.000000000 -0700
@@ -80,11 +80,9 @@ struct inet_protosw {
extern struct inet_protocol *inet_protocol_base;
extern struct inet_protocol *inet_protos[MAX_INET_PROTOS];
-extern struct list_head inetsw[SOCK_MAX];
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
-extern struct list_head inetsw6[SOCK_MAX];
#endif
extern int inet_add_protocol(struct inet_protocol *prot, unsigned char num);
diff -urNp -X dontdiff linux-2.5/net/ipv4/af_inet.c linux-2.5-nbr/net/ipv4/af_inet.c
--- linux-2.5/net/ipv4/af_inet.c 2003-05-05 09:41:03.000000000 -0700
+++ linux-2.5-nbr/net/ipv4/af_inet.c 2003-05-05 09:44:36.000000000 -0700
@@ -94,7 +94,6 @@
#include <linux/inet.h>
#include <linux/igmp.h>
#include <linux/netdevice.h>
-#include <linux/brlock.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
@@ -129,7 +128,8 @@ static kmem_cache_t *raw4_sk_cachep;
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
*/
-struct list_head inetsw[SOCK_MAX];
+static struct list_head inetsw[SOCK_MAX];
+static spinlock_t inetsw_lock = SPIN_LOCK_UNLOCKED;
/* New destruction routine */
@@ -337,8 +337,8 @@ static int inet_create(struct socket *so
/* Look for the requested type/protocol pair. */
answer = NULL;
- br_read_lock_bh(BR_NETPROTO_LOCK);
- list_for_each(p, &inetsw[sock->type]) {
+ rcu_read_lock();
+ list_for_each_rcu(p, &inetsw[sock->type]) {
answer = list_entry(p, struct inet_protosw, list);
/* Check the non-wild match. */
@@ -356,7 +356,6 @@ static int inet_create(struct socket *so
}
answer = NULL;
}
- br_read_unlock_bh(BR_NETPROTO_LOCK);
err = -ESOCKTNOSUPPORT;
if (!answer)
@@ -373,6 +372,7 @@ static int inet_create(struct socket *so
sk->no_check = answer->no_check;
if (INET_PROTOSW_REUSE & answer->flags)
sk->reuse = 1;
+ rcu_read_unlock();
inet = inet_sk(sk);
@@ -427,6 +427,7 @@ static int inet_create(struct socket *so
out:
return err;
out_sk_free:
+ rcu_read_unlock();
sk_free(sk);
goto out;
}
@@ -978,7 +979,7 @@ void inet_register_protosw(struct inet_p
int protocol = p->protocol;
struct list_head *last_perm;
- br_write_lock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&inetsw_lock);
if (p->type > SOCK_MAX)
goto out_illegal;
@@ -1007,9 +1008,12 @@ void inet_register_protosw(struct inet_p
* non-permanent entry. This means that when we remove this entry, the
* system automatically returns to the old behavior.
*/
- list_add(&p->list, last_perm);
+ list_add_rcu(&p->list, last_perm);
out:
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&inetsw_lock);
+
+ synchronize_kernel();
+
return;
out_permanent:
@@ -1031,9 +1035,11 @@ void inet_unregister_protosw(struct inet
"Attempt to unregister permanent protocol %d.\n",
p->protocol);
} else {
- br_write_lock_bh(BR_NETPROTO_LOCK);
- list_del(&p->list);
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&inetsw_lock);
+ list_del_rcu(&p->list);
+ spin_unlock_bh(&inetsw_lock);
+
+ synchronize_kernel();
}
}
diff -urNp -X dontdiff linux-2.5/net/ipv4/icmp.c linux-2.5-nbr/net/ipv4/icmp.c
--- linux-2.5/net/ipv4/icmp.c 2003-04-14 13:32:26.000000000 -0700
+++ linux-2.5-nbr/net/ipv4/icmp.c 2003-05-01 09:54:44.000000000 -0700
@@ -695,15 +695,12 @@ static void icmp_unreach(struct sk_buff
}
read_unlock(&raw_v4_lock);
- /*
- * This can't change while we are doing it.
- * Callers have obtained BR_NETPROTO_LOCK so
- * we are OK.
- */
-
+ rcu_read_lock();
ipprot = inet_protos[hash];
+ smp_read_barrier_depends();
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
+ rcu_read_unlock();
out:
return;
diff -urNp -X dontdiff linux-2.5/net/ipv4/ip_input.c linux-2.5-nbr/net/ipv4/ip_input.c
--- linux-2.5/net/ipv4/ip_input.c 2003-04-14 13:32:26.000000000 -0700
+++ linux-2.5-nbr/net/ipv4/ip_input.c 2003-05-01 09:54:44.000000000 -0700
@@ -215,6 +215,7 @@ static inline int ip_local_deliver_finis
/* Point into the IP datagram, just past the header. */
skb->h.raw = skb->data;
+ rcu_read_lock();
{
/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
int protocol = skb->nh.iph->protocol;
@@ -235,10 +236,11 @@ static inline int ip_local_deliver_finis
if ((ipprot = inet_protos[hash]) != NULL) {
int ret;
+ smp_read_barrier_depends();
if (!ipprot->no_policy &&
!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
kfree_skb(skb);
- return 0;
+ goto out;
}
ret = ipprot->handler(skb);
if (ret < 0) {
@@ -258,6 +260,8 @@ static inline int ip_local_deliver_finis
kfree_skb(skb);
}
}
+ out:
+ rcu_read_unlock();
return 0;
}
diff -urNp -X dontdiff linux-2.5/net/ipv4/protocol.c linux-2.5-nbr/net/ipv4/protocol.c
--- linux-2.5/net/ipv4/protocol.c 2003-04-14 13:32:26.000000000 -0700
+++ linux-2.5-nbr/net/ipv4/protocol.c 2003-04-30 16:30:33.000000000 -0700
@@ -37,7 +37,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/timer.h>
-#include <linux/brlock.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/tcp.h>
@@ -49,6 +48,7 @@
#include <linux/igmp.h>
struct inet_protocol *inet_protos[MAX_INET_PROTOS];
+static spinlock_t inet_proto_lock = SPIN_LOCK_UNLOCKED;
/*
* Add a protocol handler to the hash tables
@@ -60,16 +60,14 @@ int inet_add_protocol(struct inet_protoc
hash = protocol & (MAX_INET_PROTOS - 1);
- br_write_lock_bh(BR_NETPROTO_LOCK);
-
+ spin_lock_bh(&inet_proto_lock);
if (inet_protos[hash]) {
ret = -1;
} else {
inet_protos[hash] = prot;
ret = 0;
}
-
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&inet_proto_lock);
return ret;
}
@@ -84,16 +82,15 @@ int inet_del_protocol(struct inet_protoc
hash = protocol & (MAX_INET_PROTOS - 1);
- br_write_lock_bh(BR_NETPROTO_LOCK);
-
+ spin_lock_bh(&inet_proto_lock);
if (inet_protos[hash] == prot) {
inet_protos[hash] = NULL;
ret = 0;
} else {
ret = -1;
}
+ spin_unlock_bh(&inet_proto_lock);
- br_write_unlock_bh(BR_NETPROTO_LOCK);
return ret;
}
diff -urNp -X dontdiff linux-2.5/net/ipv6/af_inet6.c linux-2.5-nbr/net/ipv6/af_inet6.c
--- linux-2.5/net/ipv6/af_inet6.c 2003-05-05 09:41:03.000000000 -0700
+++ linux-2.5-nbr/net/ipv6/af_inet6.c 2003-05-05 09:44:36.000000000 -0700
@@ -45,7 +45,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>
-#include <linux/brlock.h>
#include <linux/smp_lock.h>
#include <net/ip.h>
@@ -102,7 +101,8 @@ kmem_cache_t *raw6_sk_cachep;
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
*/
-struct list_head inetsw6[SOCK_MAX];
+static struct list_head inetsw6[SOCK_MAX];
+static spinlock_t inetsw6_lock = SPIN_LOCK_UNLOCKED;
static void inet6_sock_destruct(struct sock *sk)
{
@@ -163,8 +163,8 @@ static int inet6_create(struct socket *s
/* Look for the requested type/protocol pair. */
answer = NULL;
- br_read_lock_bh(BR_NETPROTO_LOCK);
- list_for_each(p, &inetsw6[sock->type]) {
+ rcu_read_lock();
+ list_for_each_rcu(p, &inetsw6[sock->type]) {
answer = list_entry(p, struct inet_protosw, list);
/* Check the non-wild match. */
@@ -182,7 +182,6 @@ static int inet6_create(struct socket *s
}
answer = NULL;
}
- br_read_unlock_bh(BR_NETPROTO_LOCK);
if (!answer)
goto free_and_badtype;
@@ -199,6 +198,7 @@ static int inet6_create(struct socket *s
sk->no_check = answer->no_check;
if (INET_PROTOSW_REUSE & answer->flags)
sk->reuse = 1;
+ rcu_read_unlock();
inet = inet_sk(sk);
@@ -267,12 +267,15 @@ static int inet6_create(struct socket *s
return 0;
free_and_badtype:
+ rcu_read_unlock();
sk_free(sk);
return -ESOCKTNOSUPPORT;
free_and_badperm:
+ rcu_read_unlock();
sk_free(sk);
return -EPERM;
free_and_noproto:
+ rcu_read_unlock();
sk_free(sk);
return -EPROTONOSUPPORT;
do_oom:
@@ -580,7 +583,7 @@ inet6_register_protosw(struct inet_proto
int protocol = p->protocol;
struct list_head *last_perm;
- br_write_lock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&inetsw6_lock);
if (p->type > SOCK_MAX)
goto out_illegal;
@@ -609,9 +612,9 @@ inet6_register_protosw(struct inet_proto
* non-permanent entry. This means that when we remove this entry, the
* system automatically returns to the old behavior.
*/
- list_add(&p->list, last_perm);
+ list_add_rcu(&p->list, last_perm);
out:
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&inetsw6_lock);
return;
out_permanent:
@@ -629,7 +632,17 @@ out_illegal:
void
inet6_unregister_protosw(struct inet_protosw *p)
{
- inet_unregister_protosw(p);
+ if (INET_PROTOSW_PERMANENT & p->flags) {
+ printk(KERN_ERR
+ "Attempt to unregister permanent protocol %d.\n",
+ p->protocol);
+ } else {
+ spin_lock_bh(&inetsw6_lock);
+ list_del_rcu(&p->list);
+ spin_unlock_bh(&inetsw6_lock);
+
+ synchronize_kernel();
+ }
}
int
diff -urNp -X dontdiff linux-2.5/net/ipv6/icmp.c linux-2.5-nbr/net/ipv6/icmp.c
--- linux-2.5/net/ipv6/icmp.c 2003-04-29 09:57:41.000000000 -0700
+++ linux-2.5-nbr/net/ipv6/icmp.c 2003-05-01 09:54:44.000000000 -0700
@@ -456,9 +456,12 @@ static void icmpv6_notify(struct sk_buff
hash = nexthdr & (MAX_INET_PROTOS - 1);
+ rcu_read_lock();
ipprot = inet6_protos[hash];
+ smp_read_barrier_depends();
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+ rcu_read_unlock();
read_lock(&raw_v6_lock);
if ((sk = raw_v6_htable[hash]) != NULL) {
diff -urNp -X dontdiff linux-2.5/net/ipv6/ip6_input.c linux-2.5-nbr/net/ipv6/ip6_input.c
--- linux-2.5/net/ipv6/ip6_input.c 2003-04-17 09:05:10.000000000 -0700
+++ linux-2.5-nbr/net/ipv6/ip6_input.c 2003-05-01 09:54:44.000000000 -0700
@@ -152,6 +152,7 @@ static inline int ip6_input_finish(struc
skb->h.raw += (skb->h.raw[1]+1)<<3;
}
+ rcu_read_lock();
resubmit:
if (!pskb_pull(skb, skb->h.raw - skb->data))
goto discard;
@@ -165,6 +166,7 @@ resubmit:
if ((ipprot = inet6_protos[hash]) != NULL) {
int ret;
+ smp_read_barrier_depends();
if (ipprot->flags & INET6_PROTO_FINAL) {
if (!cksum_sub && skb->ip_summed == CHECKSUM_HW) {
skb->csum = csum_sub(skb->csum,
@@ -173,10 +175,8 @@ resubmit:
}
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
- !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- return 0;
- }
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard;
ret = ipprot->handler(&skb, &nhoff);
if (ret > 0)
@@ -194,10 +194,11 @@ resubmit:
kfree_skb(skb);
}
}
-
+ rcu_read_unlock();
return 0;
discard:
+ rcu_read_unlock();
kfree_skb(skb);
return 0;
}
diff -urNp -X dontdiff linux-2.5/net/ipv6/protocol.c linux-2.5-nbr/net/ipv6/protocol.c
--- linux-2.5/net/ipv6/protocol.c 2003-04-14 13:32:27.000000000 -0700
+++ linux-2.5-nbr/net/ipv6/protocol.c 2003-04-30 14:39:23.000000000 -0700
@@ -32,7 +32,6 @@
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
-#include <linux/brlock.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -41,12 +40,14 @@
#include <net/protocol.h>
struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+static spinlock_t inet6_proto_lock = SPIN_LOCK_UNLOCKED;
+
int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- br_write_lock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&inet6_proto_lock);
if (inet6_protos[hash]) {
ret = -1;
@@ -55,7 +56,7 @@ int inet6_add_protocol(struct inet6_prot
ret = 0;
}
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&inet6_proto_lock);
return ret;
}
@@ -68,7 +69,7 @@ int inet6_del_protocol(struct inet6_prot
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- br_write_lock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&inet6_proto_lock);
if (inet6_protos[hash] != prot) {
ret = -1;
@@ -77,7 +78,7 @@ int inet6_del_protocol(struct inet6_prot
ret = 0;
}
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&inet6_proto_lock);
return ret;
}
next reply other threads:[~2003-05-08 22:33 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-05-08 22:33 Stephen Hemminger [this message]
2003-05-08 22:43 ` [PATCH 2.5.69] IPV4/6 inetsw using RCU David S. Miller
2003-05-09 5:49 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030508153307.73d16f5e.shemminger@osdl.org \
--to=shemminger@osdl.org \
--cc=davem@redhat.com \
--cc=netdev@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).