public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "David S. Miller" <davem@redhat.com>
To: ak@suse.de
Cc: sim@netnation.com, linux-kernel@vger.kernel.org,
	linux-net@vger.kernel.org
Subject: Re: Longstanding networking / SMP issue? (duplextest)
Date: Sun, 23 Feb 2003 01:12:17 -0800 (PST)	[thread overview]
Message-ID: <20030223.011217.04700323.davem@redhat.com> (raw)
In-Reply-To: <20030221104541.GA18417@wotan.suse.de>

   From: Andi Kleen <ak@suse.de>
   Date: Fri, 21 Feb 2003 11:45:41 +0100
   
   at least ipip.c will send icmps from process context (ipip_tunnel_xmit)
   
   netfilter will probably do too.

We can easily make them all run in BH (protected) context :-)

This makes the locking much simpler.  Add to this the per-cpu socket
idea I proposed and we arrive at the patch below (against 2.5.x,
easily ported to 2.4.x just remove the cpu_possible() stuff).

Comments?

--- net/ipv4/icmp.c.~1~	Sat Feb 22 23:41:53 2003
+++ net/ipv4/icmp.c	Sun Feb 23 01:25:43 2003
@@ -223,57 +223,29 @@
 static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
 /*
- *	The ICMP socket. This is the most convenient way to flow control
+ *	The ICMP socket(s). This is the most convenient way to flow control
  *	our ICMP output as well as maintain a clean interface throughout
  *	all layers. All Socketless IP sends will soon be gone.
+ *
+ *	On SMP we have one ICMP socket per-cpu.
  */
-struct socket *icmp_socket;
-
-/* ICMPv4 socket is only a bit non-reenterable (unlike ICMPv6,
-   which is strongly non-reenterable). A bit later it will be made
-   reenterable and the lock may be removed then.
- */
-
-static int icmp_xmit_holder = -1;
-
-static int icmp_xmit_lock_bh(void)
-{
-	int rc;
-	if (!spin_trylock(&icmp_socket->sk->lock.slock)) {
-		rc = -EAGAIN;
-		if (icmp_xmit_holder == smp_processor_id())
-			goto out;
-		spin_lock(&icmp_socket->sk->lock.slock);
-	}
-	rc = 0;
-	icmp_xmit_holder = smp_processor_id();
-out:
-	return rc;
-}
+static struct socket *__icmp_socket[NR_CPUS];
+#define icmp_socket	__icmp_socket[smp_processor_id()]
 
-static __inline__ int icmp_xmit_lock(void)
+static __inline__ void icmp_xmit_lock(void)
 {
-	int ret;
 	local_bh_disable();
-	ret = icmp_xmit_lock_bh();
-	if (ret)
-		local_bh_enable();
-	return ret;
-}
 
-static void icmp_xmit_unlock_bh(void)
-{
-	icmp_xmit_holder = -1;
-	spin_unlock(&icmp_socket->sk->lock.slock);
+	if (!spin_trylock(&icmp_socket->sk->lock.slock))
+		BUG();
 }
 
-static __inline__ void icmp_xmit_unlock(void)
+static void icmp_xmit_unlock(void)
 {
-	icmp_xmit_unlock_bh();
+	spin_unlock(&icmp_socket->sk->lock.slock);
 	local_bh_enable();
 }
 
-
 /*
  *	Send an ICMP frame.
  */
@@ -404,10 +376,11 @@
 	struct rtable *rt = (struct rtable *)skb->dst;
 	u32 daddr;
 
-	if (ip_options_echo(&icmp_param->replyopts, skb) ||
-	    icmp_xmit_lock_bh())
+	if (ip_options_echo(&icmp_param->replyopts, skb))
 		goto out;
 
+	icmp_xmit_lock();
+
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 
@@ -434,7 +407,7 @@
 		icmp_push_reply(icmp_param, &ipc, rt);
 	ip_rt_put(rt);
 out_unlock:
-	icmp_xmit_unlock_bh();
+	icmp_xmit_unlock();
 out:;
 }
 
@@ -519,8 +492,7 @@
 		}
 	}
 
-	if (icmp_xmit_lock())
-		goto out;
+	icmp_xmit_lock();
 
 	/*
 	 *	Construct source address and options.
@@ -1141,19 +1113,30 @@
 void __init icmp_init(struct net_proto_family *ops)
 {
 	struct inet_opt *inet;
-	int err = sock_create(PF_INET, SOCK_RAW, IPPROTO_ICMP, &icmp_socket);
+	int i;
 
-	if (err < 0)
-		panic("Failed to create the ICMP control socket.\n");
-	icmp_socket->sk->allocation = GFP_ATOMIC;
-	icmp_socket->sk->sndbuf	    = SK_WMEM_MAX * 2;
-	inet	       = inet_sk(icmp_socket->sk);
-	inet->ttl      = MAXTTL;
-	inet->pmtudisc = IP_PMTUDISC_DONT;
-
-	/* Unhash it so that IP input processing does not even
-	 * see it, we do not wish this socket to see incoming
-	 * packets.
-	 */
-	icmp_socket->sk->prot->unhash(icmp_socket->sk);
+	for (i = 0; i < NR_CPUS; i++) {
+		int err;
+
+		if (!cpu_possible(i))
+			continue;
+
+		err = sock_create(PF_INET, SOCK_RAW, IPPROTO_ICMP,
+				  &__icmp_socket[i]);
+
+		if (err < 0)
+			panic("Failed to create the ICMP control socket.\n");
+
+		__icmp_socket[i]->sk->allocation = GFP_ATOMIC;
+		__icmp_socket[i]->sk->sndbuf = SK_WMEM_MAX * 2;
+		inet = inet_sk(__icmp_socket[i]->sk);
+		inet->ttl = MAXTTL;
+		inet->pmtudisc = IP_PMTUDISC_DONT;
+
+		/* Unhash it so that IP input processing does not even
+		 * see it, we do not wish this socket to see incoming
+		 * packets.
+		 */
+		__icmp_socket[i]->sk->prot->unhash(__icmp_socket[i]->sk);
+	}
 }
--- net/ipv6/icmp.c.~1~	Sat Feb 22 23:48:49 2003
+++ net/ipv6/icmp.c	Sun Feb 23 01:27:09 2003
@@ -67,10 +67,11 @@
 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 
 /*
- *	ICMP socket for flow control.
+ *	ICMP socket(s) for flow control.
  */
 
-struct socket *icmpv6_socket;
+static struct socket *__icmpv6_socket[NR_CPUS];
+#define icmpv6_socket	__icmpv6_socket[smp_processor_id()]
 
 static int icmpv6_rcv(struct sk_buff *skb);
 
@@ -87,39 +88,16 @@
 	__u32			csum;
 };
 
-
-static int icmpv6_xmit_holder = -1;
-
-static int icmpv6_xmit_lock_bh(void)
+static __inline__ void icmpv6_xmit_lock(void)
 {
-	if (!spin_trylock(&icmpv6_socket->sk->lock.slock)) {
-		if (icmpv6_xmit_holder == smp_processor_id())
-			return -EAGAIN;
-		spin_lock(&icmpv6_socket->sk->lock.slock);
-	}
-	icmpv6_xmit_holder = smp_processor_id();
-	return 0;
-}
-
-static __inline__ int icmpv6_xmit_lock(void)
-{
-	int ret;
 	local_bh_disable();
-	ret = icmpv6_xmit_lock_bh();
-	if (ret)
-		local_bh_enable();
-	return ret;
-}
-
-static void icmpv6_xmit_unlock_bh(void)
-{
-	icmpv6_xmit_holder = -1;
-	spin_unlock(&icmpv6_socket->sk->lock.slock);
+	if (!spin_trylock(&icmpv6_socket->sk->lock.slock))
+		BUG();
 }
 
 static __inline__ void icmpv6_xmit_unlock(void)
 {
-	icmpv6_xmit_unlock_bh();
+	spin_unlock(&icmpv6_socket->sk->lock.slock);
 	local_bh_enable();
 }
 
@@ -341,8 +319,7 @@
 	fl.uli_u.icmpt.type = type;
 	fl.uli_u.icmpt.code = code;
 
-	if (icmpv6_xmit_lock())
-		return;
+	icmpv6_xmit_lock();
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl))
 		goto out;
@@ -415,15 +392,14 @@
 	fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY;
 	fl.uli_u.icmpt.code = 0;
 
-	if (icmpv6_xmit_lock_bh())
-		return;
+	icmpv6_xmit_lock();
 
 	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
 		       MSG_DONTWAIT);
 	ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
 	ICMP6_INC_STATS_BH(Icmp6OutMsgs);
 
-	icmpv6_xmit_unlock_bh();
+	icmpv6_xmit_unlock();
 }
 
 static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
@@ -626,26 +602,47 @@
 int __init icmpv6_init(struct net_proto_family *ops)
 {
 	struct sock *sk;
-	int err;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		int err;
+
+		if (!cpu_possible(i))
+			continue;
 
-	err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &icmpv6_socket);
-	if (err < 0) {
-		printk(KERN_ERR
-		       "Failed to initialize the ICMP6 control socket (err %d).\n",
-		       err);
-		icmpv6_socket = NULL; /* for safety */
-		return err;
+		err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
+				  &__icmpv6_socket[i]);
+		if (err < 0) {
+			int j;
+
+			printk(KERN_ERR
+			       "Failed to initialize the ICMP6 control socket "
+			       "(err %d).\n",
+			       err);
+			for (j = 0; j < i; j++) {
+				if (!cpu_possible(j))
+					continue;
+				sock_release(__icmpv6_socket[j]);
+				__icmpv6_socket[j] = NULL; /* for safety */
+			}
+			return err;
+		}
+
+		sk = __icmpv6_socket[i]->sk;
+		sk->allocation = GFP_ATOMIC;
+		sk->sndbuf = SK_WMEM_MAX*2;
+		sk->prot->unhash(sk);
 	}
 
-	sk = icmpv6_socket->sk;
-	sk->allocation = GFP_ATOMIC;
-	sk->sndbuf = SK_WMEM_MAX*2;
-	sk->prot->unhash(sk);
 
 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
 		printk(KERN_ERR "Failed to register ICMP6 protocol\n");
-		sock_release(icmpv6_socket);
-		icmpv6_socket = NULL;
+		for (i = 0; i < NR_CPUS; i++) {
+			if (!cpu_possible(i))
+				continue;
+			sock_release(__icmpv6_socket[i]);
+			__icmpv6_socket[i] = NULL;
+		}
 		return -EAGAIN;
 	}
 
@@ -654,8 +651,14 @@
 
 void icmpv6_cleanup(void)
 {
-	sock_release(icmpv6_socket);
-	icmpv6_socket = NULL; /* For safety. */
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i))
+			continue;
+		sock_release(__icmpv6_socket[i]);
+		__icmpv6_socket[i] = NULL; /* For safety. */
+	}
 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 

  reply	other threads:[~2003-02-23  9:18 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20030219174757.GA5373@netnation.com.suse.lists.linux.kernel>
2003-02-20  7:52 ` Longstanding networking / SMP issue? (duplextest) Andi Kleen
2003-02-20  7:38   ` David S. Miller
2003-02-20  9:20   ` Simon Kirby
2003-02-20  9:34     ` Andi Kleen
2003-02-20 10:12       ` dada1
2003-02-20 10:54         ` Andi Kleen
2003-02-20 11:03           ` dada1
2003-02-21  4:24       ` David S. Miller
2003-02-21  7:27         ` Andi Kleen
2003-02-21  9:43           ` David S. Miller
2003-02-21 10:22             ` Andi Kleen
2003-02-21 10:11               ` David S. Miller
2003-02-21 10:45                 ` Andi Kleen
2003-02-23  9:12                   ` David S. Miller [this message]
2003-02-23 10:02                     ` Christoph Hellwig
2003-02-23  9:55                       ` David S. Miller
2003-02-23 10:30                         ` Andi Kleen
2003-02-23 10:23                           ` David S. Miller
2003-02-23 10:37                           ` Christoph Hellwig
2003-02-23  9:58                       ` David S. Miller
2003-02-21 15:15       ` Bill Davidsen
2003-02-19 17:47 Simon Kirby
2003-02-19 21:17 ` David S. Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030223.011217.04700323.davem@redhat.com \
    --to=davem@redhat.com \
    --cc=ak@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-net@vger.kernel.org \
    --cc=sim@netnation.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox