Netdev List

Netdev List
 help / color / mirror / Atom feed

* [patch net-2.6.25 10/10][NETNS][IPV6] make icmpv6_time sysctl per namespace
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/move-sysctl-icmp-to-netns.patch --]
[-- Type: text/plain, Size: 2806 bytes --]

This patch moves the icmpv6_time sysctl to the network namespace
structure.

Because the ipv6 protocol is not yet per namespace, the variable is
accessed relatively to the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/netns/ipv6.h   |    1 +
 net/ipv6/af_inet6.c        |    1 +
 net/ipv6/icmp.c            |    6 ++----
 net/ipv6/sysctl_net_ipv6.c |    1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/netns/ipv6.h
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -23,6 +23,7 @@ struct netns_sysctl_ipv6 {
    	int ip6_rt_gc_elasticity;
    	int ip6_rt_mtu_expires;
    	int ip6_rt_min_advmss;
+	int icmpv6_time;
 };
 
 struct netns_ipv6 {
Index: net-2.6.25/net/ipv6/icmp.c
===================================================================
--- net-2.6.25.orig/net/ipv6/icmp.c
+++ net-2.6.25/net/ipv6/icmp.c
@@ -154,8 +154,6 @@ static int is_ineligible(struct sk_buff 
 	return 0;
 }
 
-static int sysctl_icmpv6_time __read_mostly = 1*HZ;
-
 /*
  * Check the ICMP output rate limit
  */
@@ -186,7 +184,7 @@ static inline int icmpv6_xrlim_allow(str
 		res = 1;
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
-		int tmo = sysctl_icmpv6_time;
+		int tmo = init_net.ipv6.sysctl.icmpv6_time;
 
 		/* Give more bandwidth to wider prefixes. */
 		if (rt->rt6i_dst.plen < 128)
@@ -913,7 +911,7 @@ ctl_table ipv6_icmp_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
 		.procname	= "ratelimit",
-		.data		= &sysctl_icmpv6_time,
+		.data		= &init_net.ipv6.sysctl.icmpv6_time,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -125,6 +125,7 @@ static int ipv6_sysctl_net_init(struct n
      	ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
      	ipv6_table[0].child = ipv6_route_table;
 
+ 	ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time;
      	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -734,6 +734,7 @@ static int inet6_net_init(struct net *ne
  	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
  	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
  	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	ipv6_frag_sysctl_init(net);
 
 	return 0;

-- 

^ permalink raw reply

* [patch net-2.6.25 09/10][NETNS][IPV6] make sysctls route per namespace
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/move-sysctl-route-to-netns.patch --]
[-- Type: text/plain, Size: 11452 bytes --]

All the sysctl concerning the routes are moved to the network namespace
structure. A helper function is called to initialize the variables.

Because the ipv6 protocol is not yet per namespace, the variables are
accessed relatively from the network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/ip6_route.h    |    2 -
 include/net/netns/ipv6.h   |    8 +++++++
 net/ipv6/af_inet6.c        |    8 +++++++
 net/ipv6/ip6_fib.c         |   14 ++++++++----
 net/ipv6/route.c           |   49 ++++++++++++++++++---------------------------
 net/ipv6/sysctl_net_ipv6.c |   11 ++++++++++
 6 files changed, 56 insertions(+), 36 deletions(-)

Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/netns/ipv6.h
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -15,6 +15,14 @@ struct netns_sysctl_ipv6 {
 #endif
 	struct inet_frags_ctl frags;
 	int bindv6only;
+   	int flush_delay;
+   	int ip6_rt_max_size;
+   	int ip6_rt_gc_min_interval;
+   	int ip6_rt_gc_timeout;
+   	int ip6_rt_gc_interval;
+   	int ip6_rt_gc_elasticity;
+   	int ip6_rt_mtu_expires;
+   	int ip6_rt_min_advmss;
 };
 
 struct netns_ipv6 {
Index: net-2.6.25/net/ipv6/route.c
===================================================================
--- net-2.6.25.orig/net/ipv6/route.c
+++ net-2.6.25/net/ipv6/route.c
@@ -73,14 +73,6 @@
 
 #define CLONE_OFFLINK_ROUTE 0
 
-static int ip6_rt_max_size = 4096;
-static int ip6_rt_gc_min_interval = HZ / 2;
-static int ip6_rt_gc_timeout = 60*HZ;
-int ip6_rt_gc_interval = 30*HZ;
-static int ip6_rt_gc_elasticity = 9;
-static int ip6_rt_mtu_expires = 10*60*HZ;
-static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
-
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
@@ -889,8 +881,8 @@ static inline unsigned int ipv6_advmss(u
 {
 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
-	if (mtu < ip6_rt_min_advmss)
-		mtu = ip6_rt_min_advmss;
+	if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
+		mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
 
 	/*
 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
@@ -990,19 +982,19 @@ static int ip6_dst_gc(void)
 	static unsigned long last_gc;
 	unsigned long now = jiffies;
 
-	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
-	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
+	if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
+	    atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
 		goto out;
 
 	expire++;
 	fib6_run_gc(expire);
 	last_gc = now;
 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
-		expire = ip6_rt_gc_timeout>>1;
+		expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
 
 out:
-	expire -= expire>>ip6_rt_gc_elasticity;
-	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
+	expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
+	return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
 }
 
 /* Clean host part of a prefix. Not necessary in radix tree,
@@ -1508,7 +1500,7 @@ void rt6_pmtu_discovery(struct in6_addr 
 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
+		dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1534,7 +1526,7 @@ void rt6_pmtu_discovery(struct in6_addr 
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
@@ -2390,15 +2382,14 @@ static inline void ipv6_route_proc_fini(
 
 #ifdef CONFIG_SYSCTL
 
-static int flush_delay;
-
 static
 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	int delay = init_net.ipv6.sysctl.flush_delay;
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
+		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
 		return 0;
 	} else
 		return -EINVAL;
@@ -2407,7 +2398,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table 
 ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
-		.data		=	&flush_delay,
+		.data		=	&init_net.ipv6.sysctl.flush_delay,
 		.maxlen		=	sizeof(int),
 		.mode		=	0200,
 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
@@ -2423,7 +2414,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
 		.procname	=	"max_size",
-		.data		=	&ip6_rt_max_size,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec,
@@ -2431,7 +2422,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
 		.procname	=	"gc_min_interval",
-		.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2440,7 +2431,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
 		.procname	=	"gc_timeout",
-		.data		=	&ip6_rt_gc_timeout,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2449,7 +2440,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
 		.procname	=	"gc_interval",
-		.data		=	&ip6_rt_gc_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2458,7 +2449,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
 		.procname	=	"gc_elasticity",
-		.data		=	&ip6_rt_gc_elasticity,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2467,7 +2458,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
 		.procname	=	"mtu_expires",
-		.data		=	&ip6_rt_mtu_expires,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2476,7 +2467,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
 		.procname	=	"min_adv_mss",
-		.data		=	&ip6_rt_min_advmss,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2485,7 +2476,7 @@ ctl_table ipv6_route_table_template[] = 
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
 		.procname	=	"gc_min_interval_ms",
-		.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_ms_jiffies,
Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -113,7 +113,18 @@ static int ipv6_sysctl_net_init(struct n
      	if (!ipv6_icmp_table)
      		goto out_ipv6_route_table;
 
+     	ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay;
+     	/* ipv6_route_table[1].data will be handled when we have
+	   routes per namespace */
+     	ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+     	ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+     	ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
+     	ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
+     	ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
+     	ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
+     	ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
      	ipv6_table[0].child = ipv6_route_table;
+
      	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
Index: net-2.6.25/include/net/ip6_route.h
===================================================================
--- net-2.6.25.orig/include/net/ip6_route.h
+++ net-2.6.25/include/net/ip6_route.h
@@ -43,8 +43,6 @@ extern struct rt6_info	ip6_prohibit_entr
 extern struct rt6_info	ip6_blk_hole_entry;
 #endif
 
-extern int ip6_rt_gc_interval;
-
 extern void			ip6_route_input(struct sk_buff *skb);
 
 extern struct dst_entry *	ip6_route_output(struct sock *sk,
Index: net-2.6.25/net/ipv6/ip6_fib.c
===================================================================
--- net-2.6.25.orig/net/ipv6/ip6_fib.c
+++ net-2.6.25/net/ipv6/ip6_fib.c
@@ -681,13 +681,15 @@ static __inline__ void fib6_start_gc(str
 {
 	if (ip6_fib_timer.expires == 0 &&
 	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 void fib6_force_start_gc(void)
 {
 	if (ip6_fib_timer.expires == 0)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 /*
@@ -1447,7 +1449,8 @@ void fib6_run_gc(unsigned long dummy)
 {
 	if (dummy != ~0UL) {
 		spin_lock_bh(&fib6_gc_lock);
-		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
+		gc_args.timeout = dummy ? (int)dummy :
+			init_net.ipv6.sysctl.ip6_rt_gc_interval;
 	} else {
 		local_bh_disable();
 		if (!spin_trylock(&fib6_gc_lock)) {
@@ -1455,7 +1458,7 @@ void fib6_run_gc(unsigned long dummy)
 			local_bh_enable();
 			return;
 		}
-		gc_args.timeout = ip6_rt_gc_interval;
+		gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval;
 	}
 	gc_args.more = 0;
 
@@ -1463,7 +1466,8 @@ void fib6_run_gc(unsigned long dummy)
 	fib6_clean_all(fib6_age, 0, NULL);
 
 	if (gc_args.more)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 	else {
 		del_timer(&ip6_fib_timer);
 		ip6_fib_timer.expires = 0;
Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -726,6 +726,14 @@ static int inet6_net_init(struct net *ne
 	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
 	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
 	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+ 	net->ipv6.sysctl.flush_delay = 0;
+ 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
+ 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
+ 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
+ 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
+ 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+ 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 	ipv6_frag_sysctl_init(net);
 
 	return 0;

-- 

^ permalink raw reply

* [patch net-2.6.25 07/10][NETNS][IPV6] make ip6_frags per namespace
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/move-ip6-frags-to-netns.patch --]
[-- Type: text/plain, Size: 5924 bytes --]

The ip6_frags is moved to the network namespace structure.
Because there can be multiple instances of the network namespaces,
and the ip6_frags is no longer a global static variable, a helper
function has been added to facilitate the initialization of the
variables.

Until the ipv6 protocol is not per namespace, the variables are
accessed relatively from the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/ipv6.h         |    3 ---
 include/net/netns/ipv6.h   |    3 +++
 net/ipv6/af_inet6.c        |    8 ++++++++
 net/ipv6/reassembly.c      |   16 +++++++---------
 net/ipv6/sysctl_net_ipv6.c |   12 ++++++++----
 5 files changed, 26 insertions(+), 16 deletions(-)

Index: net-2.6.25/include/net/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/ipv6.h
+++ net-2.6.25/include/net/ipv6.h
@@ -572,9 +572,6 @@ extern int inet6_hash_connect(struct ine
 /*
  * reassembly.c
  */
-struct inet_frags_ctl;
-extern struct inet_frags_ctl ip6_frags_ctl;
-
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
 
Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/netns/ipv6.h
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -2,6 +2,8 @@
  * ipv6 in net namespaces
  */
 
+#include <net/inet_frag.h>
+
 #ifndef __NETNS_IPV6_H__
 #define __NETNS_IPV6_H__
 
@@ -11,6 +13,7 @@ struct netns_sysctl_ipv6 {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *table;
 #endif
+	struct inet_frags_ctl frags;
 	int bindv6only;
 };
 
Index: net-2.6.25/net/ipv6/reassembly.c
===================================================================
--- net-2.6.25.orig/net/ipv6/reassembly.c
+++ net-2.6.25/net/ipv6/reassembly.c
@@ -82,13 +82,6 @@ struct frag_queue
 	__u16			nhoffset;
 };
 
-struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
-	.high_thresh 	 = 256 * 1024,
-	.low_thresh	 = 192 * 1024,
-	.timeout	 = IPV6_FRAG_TIMEOUT,
-	.secret_interval = 10 * 60 * HZ,
-};
-
 static struct inet_frags ip6_frags;
 
 int ip6_frag_nqueues(void)
@@ -605,7 +598,7 @@ static int ipv6_frag_rcv(struct sk_buff 
 		return 1;
 	}
 
-	if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
+	if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
 		ip6_evictor(ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
@@ -632,6 +625,11 @@ static struct inet6_protocol frag_protoc
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
+void ipv6_frag_sysctl_init(struct net *net)
+{
+	ip6_frags.ctl = &net->ipv6.sysctl.frags;
+}
+
 int __init ipv6_frag_init(void)
 {
 	int ret;
@@ -639,7 +637,7 @@ int __init ipv6_frag_init(void)
 	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 	if (ret)
 		goto out;
-	ip6_frags.ctl = &ip6_frags_ctl;
+
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = NULL;
Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -43,7 +43,7 @@ static ctl_table ipv6_table_template[] =
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
-		.data		= &ip6_frags_ctl.high_thresh,
+		.data		= &init_net.ipv6.sysctl.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -51,7 +51,7 @@ static ctl_table ipv6_table_template[] =
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
 		.procname	= "ip6frag_low_thresh",
-		.data		= &ip6_frags_ctl.low_thresh,
+		.data		= &init_net.ipv6.sysctl.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -59,7 +59,7 @@ static ctl_table ipv6_table_template[] =
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
 		.procname	= "ip6frag_time",
-		.data		= &ip6_frags_ctl.timeout,
+		.data		= &init_net.ipv6.sysctl.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -68,7 +68,7 @@ static ctl_table ipv6_table_template[] =
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
-		.data		= &ip6_frags_ctl.secret_interval,
+		.data		= &init_net.ipv6.sysctl.frags.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -117,6 +117,10 @@ static int ipv6_sysctl_net_init(struct n
      	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
+      	ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh;
+      	ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh;
+      	ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
+    	ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
 
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -72,6 +72,8 @@ MODULE_LICENSE("GPL");
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
+void ipv6_frag_sysctl_init(struct net *net);
+
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -720,6 +722,12 @@ static void cleanup_ipv6_mibs(void)
 static int inet6_net_init(struct net *net)
 {
 	net->ipv6.sysctl.bindv6only = 0;
+	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
+	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
+	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+	ipv6_frag_sysctl_init(net);
+
 	return 0;
 }
 

-- 

^ permalink raw reply

* [patch net-2.6.25 05/10][NETNS][IPV6] make multiple instance of sysctl tables
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/make-ipv6-sysctl-per-namespace.patch --]
[-- Type: text/plain, Size: 6096 bytes --]

Each network namespace wants its own set of sysctl value, eg. we should
not be able from a namespace to set a sysctl value for another namespace
, especially for the initial network namespace.

This patch duplicates the sysctl table when we register a new network
namespace for ipv6. The duplicated table are postfixed with the "template"
word to notify the developper the table is cloned.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/ipv6.h         |    4 +-
 include/net/netns/ipv6.h   |    9 ++++++
 net/ipv6/icmp.c            |   12 +++++++-
 net/ipv6/route.c           |   11 ++++++-
 net/ipv6/sysctl_net_ipv6.c |   67 ++++++++++++++++++++++++++++++++++++++-------
 5 files changed, 89 insertions(+), 14 deletions(-)

Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -14,20 +14,23 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 
-static ctl_table ipv6_table[] = {
+extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
+extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
+
+static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ROUTE,
 		.procname	= "route",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_route_table
+		.child		= ipv6_route_table_template
 	},
 	{
 		.ctl_name	= NET_IPV6_ICMP,
 		.procname	= "icmp",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_icmp_table
+		.child		= ipv6_icmp_table_template
 	},
 	{
 		.ctl_name	= NET_IPV6_BINDV6ONLY,
@@ -89,22 +92,66 @@ struct ctl_path net_ipv6_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
-static struct ctl_table_header *ipv6_sysctl_header;
-
 static int ipv6_sysctl_net_init(struct net *net)
 {
-	ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path,
-						       ipv6_table);
-	if (!ipv6_sysctl_header)
+     	struct ctl_table *ipv6_table;
+     	struct ctl_table *ipv6_route_table;
+     	struct ctl_table *ipv6_icmp_table;
+     	int err;
+
+     	err = -ENOMEM;
+     	ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
+     			     GFP_KERNEL);
+     	if (!ipv6_table)
+     		goto out;
+
+  	ipv6_route_table = ipv6_route_sysctl_init(net);
+  	if (!ipv6_route_table)
+  		goto out_ipv6_table;
+
+     	ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
+     	if (!ipv6_icmp_table)
+     		goto out_ipv6_route_table;
+
+     	ipv6_table[0].child = ipv6_route_table;
+     	ipv6_table[1].child = ipv6_icmp_table;
+
+	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
+							   ipv6_table);
+	if (!net->ipv6.sysctl.table)
 		return -ENOMEM;
 
-	return 0;
+     	if (!net->ipv6.sysctl.table)
+     		goto out_ipv6_icmp_table;
 
+     	err = 0;
+out:
+     	return err;
+
+out_ipv6_icmp_table:
+     	kfree(ipv6_icmp_table);
+out_ipv6_route_table:
+     	kfree(ipv6_route_table);
+out_ipv6_table:
+     	kfree(ipv6_table);
+     	goto out;
 }
 
 static void ipv6_sysctl_net_exit(struct net *net)
 {
-	unregister_net_sysctl_table(ipv6_sysctl_header);
+     	struct ctl_table *ipv6_table;
+     	struct ctl_table *ipv6_route_table;
+     	struct ctl_table *ipv6_icmp_table;
+
+     	ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
+     	ipv6_route_table = ipv6_table[0].child;
+     	ipv6_icmp_table = ipv6_table[1].child;
+
+     	unregister_net_sysctl_table(net->ipv6.sysctl.table);
+
+     	kfree(ipv6_table);
+     	kfree(ipv6_route_table);
+     	kfree(ipv6_icmp_table);
 }
 
 static struct pernet_operations ipv6_sysctl_net_ops = {
Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/netns/ipv6.h
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -5,6 +5,15 @@
 #ifndef __NETNS_IPV6_H__
 #define __NETNS_IPV6_H__
 
+struct ctl_table_header;
+
+struct netns_sysctl_ipv6 {
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header *table;
+#endif
+};
+
 struct netns_ipv6 {
+	struct netns_sysctl_ipv6 sysctl;
 };
 #endif
Index: net-2.6.25/include/net/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/ipv6.h
+++ net-2.6.25/include/net/ipv6.h
@@ -619,8 +619,8 @@ static inline int snmp6_unregister_dev(s
 #endif
 
 #ifdef CONFIG_SYSCTL
-extern ctl_table ipv6_route_table[];
-extern ctl_table ipv6_icmp_table[];
+extern ctl_table ipv6_route_table_template[];
+extern ctl_table ipv6_icmp_table_template[];
 
 extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
Index: net-2.6.25/net/ipv6/icmp.c
===================================================================
--- net-2.6.25.orig/net/ipv6/icmp.c
+++ net-2.6.25/net/ipv6/icmp.c
@@ -909,7 +909,7 @@ int icmpv6_err_convert(int type, int cod
 EXPORT_SYMBOL(icmpv6_err_convert);
 
 #ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table[] = {
+ctl_table ipv6_icmp_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
 		.procname	= "ratelimit",
@@ -920,5 +920,15 @@ ctl_table ipv6_icmp_table[] = {
 	},
 	{ .ctl_name = 0 },
 };
+
+struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+   	table = kmemdup(ipv6_icmp_table_template,
+			sizeof(ipv6_icmp_table_template),
+			GFP_KERNEL);
+	return table;
+}
 #endif
 
Index: net-2.6.25/net/ipv6/route.c
===================================================================
--- net-2.6.25.orig/net/ipv6/route.c
+++ net-2.6.25/net/ipv6/route.c
@@ -2404,7 +2404,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table 
 		return -EINVAL;
 }
 
-ctl_table ipv6_route_table[] = {
+ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
 		.data		=	&flush_delay,
@@ -2494,6 +2494,15 @@ ctl_table ipv6_route_table[] = {
 	{ .ctl_name = 0 }
 };
 
+struct ctl_table *ipv6_route_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+   	table = kmemdup(ipv6_route_table_template,
+			sizeof(ipv6_route_table_template),
+			GFP_KERNEL);
+	return table;
+}
 #endif
 
 int __init ip6_route_init(void)

-- 

^ permalink raw reply

* [patch net-2.6.25 04/10][NETNS][IPV6] make the ipv6 sysctl to be a netns subsystem
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/make-ipv6-sysctl-to-be-a-subsystem.patch --]
[-- Type: text/plain, Size: 1654 bytes --]

The initialization of the sysctl for the ipv6 protocol is changed to
a network namespace subsystem. That means when a new network namespace
is created the initialization function for the sysctl will be called.

That do not change the behavior of the sysctl in case of the kernel
with the network namespace disabled.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 net/ipv6/sysctl_net_ipv6.c |   23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -91,10 +91,10 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static struct ctl_table_header *ipv6_sysctl_header;
 
-int ipv6_sysctl_register(void)
+static int ipv6_sysctl_net_init(struct net *net)
 {
-	ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path,
-						   ipv6_table);
+	ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path,
+						       ipv6_table);
 	if (!ipv6_sysctl_header)
 		return -ENOMEM;
 
@@ -102,7 +102,22 @@ int ipv6_sysctl_register(void)
 
 }
 
+static void ipv6_sysctl_net_exit(struct net *net)
+{
+	unregister_net_sysctl_table(ipv6_sysctl_header);
+}
+
+static struct pernet_operations ipv6_sysctl_net_ops = {
+	.init = ipv6_sysctl_net_init,
+	.exit = ipv6_sysctl_net_exit,
+};
+
+int ipv6_sysctl_register(void)
+{
+	return register_pernet_subsys(&ipv6_sysctl_net_ops);
+}
+
 void ipv6_sysctl_unregister(void)
 {
-	unregister_sysctl_table(ipv6_sysctl_header);
+	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 }

-- 

^ permalink raw reply

* [patch net-2.6.25 01/10][NETNS][IPV6] make ipv6_sysctl_register to return a value
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/ipv6-sysctl-register-return-value.patch --]
[-- Type: text/plain, Size: 2035 bytes --]

This patch makes the function ipv6_sysctl_register to return a
value. The af_inet6 init function is now able to handle an error
and catch it from the initialization of the sysctl.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/ipv6.h         |    2 +-
 net/ipv6/af_inet6.c        |    5 ++++-
 net/ipv6/sysctl_net_ipv6.c |    9 +++++++--
 3 files changed, 12 insertions(+), 4 deletions(-)

Index: net-2.6.25/include/net/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/ipv6.h
+++ net-2.6.25/include/net/ipv6.h
@@ -622,7 +622,7 @@ static inline int snmp6_unregister_dev(s
 extern ctl_table ipv6_route_table[];
 extern ctl_table ipv6_icmp_table[];
 
-extern void ipv6_sysctl_register(void);
+extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
 #endif
 
Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -783,7 +783,9 @@ static int __init inet6_init(void)
 	 */
 
 #ifdef CONFIG_SYSCTL
-	ipv6_sysctl_register();
+	err = ipv6_sysctl_register();
+	if (err)
+		goto sysctl_fail;
 #endif
 	err = icmpv6_init(&inet6_family_ops);
 	if (err)
@@ -897,6 +899,7 @@ ndisc_fail:
 icmp_fail:
 #ifdef CONFIG_SYSCTL
 	ipv6_sysctl_unregister();
+sysctl_fail:
 #endif
 	cleanup_ipv6_mibs();
 out_unregister_sock:
Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -91,10 +91,15 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static struct ctl_table_header *ipv6_sysctl_header;
 
-void ipv6_sysctl_register(void)
+int ipv6_sysctl_register(void)
 {
 	ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path,
-			ipv6_table);
+						   ipv6_table);
+	if (!ipv6_sysctl_header)
+		return -ENOMEM;
+
+	return 0;
+
 }
 
 void ipv6_sysctl_unregister(void)

-- 

^ permalink raw reply

* [patch net-2.6.25 06/10][NETNS][IPV6] make bindv6only sysctl per namespace
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: sysctl/move-bindv6only-to-netns.patch --]
[-- Type: text/plain, Size: 2924 bytes --]

This patch moves the bindv6only sysctl to the network namespace
structure. Until the ipv6 protocol is not per namespace, the sysctl
variable is always from the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/ipv6.h         |    1 -
 include/net/netns/ipv6.h   |    1 +
 net/ipv6/af_inet6.c        |    5 ++---
 net/ipv6/sysctl_net_ipv6.c |    4 +++-
 4 files changed, 6 insertions(+), 5 deletions(-)

Index: net-2.6.25/include/net/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/ipv6.h
+++ net-2.6.25/include/net/ipv6.h
@@ -109,7 +109,6 @@ struct frag_hdr {
 #include <net/sock.h>
 
 /* sysctls */
-extern int sysctl_ipv6_bindv6only;
 extern int sysctl_mld_max_msf;
 
 extern struct ctl_path net_ipv6_ctl_path[];
Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- net-2.6.25.orig/include/net/netns/ipv6.h
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -11,6 +11,7 @@ struct netns_sysctl_ipv6 {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *table;
 #endif
+	int bindv6only;
 };
 
 struct netns_ipv6 {
Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -66,8 +66,6 @@ MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only __read_mostly;
-
 /* The inetsw6 table contains everything that inet6_create needs to
  * build a new socket.
  */
@@ -193,7 +191,7 @@ lookup_protocol:
 	np->mcast_hops	= -1;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->ipv6only	= sysctl_ipv6_bindv6only;
+	np->ipv6only	= init_net.ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
@@ -721,6 +719,7 @@ static void cleanup_ipv6_mibs(void)
 
 static int inet6_net_init(struct net *net)
 {
+	net->ipv6.sysctl.bindv6only = 0;
 	return 0;
 }
 
Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] =
 	{
 		.ctl_name	= NET_IPV6_BINDV6ONLY,
 		.procname	= "bindv6only",
-		.data		= &sysctl_ipv6_bindv6only,
+		.data		= &init_net.ipv6.sysctl.bindv6only,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -116,6 +116,8 @@ static int ipv6_sysctl_net_init(struct n
      	ipv6_table[0].child = ipv6_route_table;
      	ipv6_table[1].child = ipv6_icmp_table;
 
+	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
+
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
 	if (!net->ipv6.sysctl.table)

-- 

^ permalink raw reply

* [patch net-2.6.25 03/10][NETNS][IPV6] add ipv6 structure for netns
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: add-ipv6-for-netns.patch --]
[-- Type: text/plain, Size: 1248 bytes --]

Like the ipv4 part, this patch adds an ipv6 structure in the net structure
to aggregate the different resources to make ipv6 per namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/net_namespace.h |    4 ++++
 include/net/netns/ipv6.h    |   10 ++++++++++
 2 files changed, 14 insertions(+)

Index: net-2.6.25/include/net/net_namespace.h
===================================================================
--- net-2.6.25.orig/include/net/net_namespace.h
+++ net-2.6.25/include/net/net_namespace.h
@@ -11,6 +11,7 @@
 #include <net/netns/unix.h>
 #include <net/netns/packet.h>
 #include <net/netns/ipv4.h>
+#include <net/netns/ipv6.h>
 
 struct proc_dir_entry;
 struct net_device;
@@ -48,6 +49,9 @@ struct net {
 	struct netns_packet	packet;
 	struct netns_unix	unx;
 	struct netns_ipv4	ipv4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netns_ipv6	ipv6;
+#endif
 };
 
 #ifdef CONFIG_NET
Index: net-2.6.25/include/net/netns/ipv6.h
===================================================================
--- /dev/null
+++ net-2.6.25/include/net/netns/ipv6.h
@@ -0,0 +1,10 @@
+/*
+ * ipv6 in net namespaces
+ */
+
+#ifndef __NETNS_IPV6_H__
+#define __NETNS_IPV6_H__
+
+struct netns_ipv6 {
+};
+#endif

-- 

^ permalink raw reply

* [patch net-2.6.25 08/10][NETNS][IPV6] make mld_max_msf readonly in other namespaces
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: make-mld_max_msf-readonly.patch --]
[-- Type: text/plain, Size: 1366 bytes --]

The mld_max_msf protects the system with a maximum allowed multicast 
source filters. Making this variable per namespace can be potentially
an problem if someone inside a namespace set it to a big value, that
will impact the whole system including other namespaces.

I don't see any benefits to have it per namespace for now, so in order 
to keep a directory entry in a newly created namespace, I make it
read-only when we are not in the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 net/ipv6/sysctl_net_ipv6.c |    6 ++++++
 1 file changed, 6 insertions(+)

Index: net-2.6.25/net/ipv6/sysctl_net_ipv6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/sysctl_net_ipv6.c
+++ net-2.6.25/net/ipv6/sysctl_net_ipv6.c
@@ -122,6 +122,12 @@ static int ipv6_sysctl_net_init(struct n
       	ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
     	ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
 
+ 	/* We don't want this value to be per namespace, it should be global
+	   to all namespaces, so make it read-only when we are not in the
+	   init network namespace */
+    	if (net != &init_net)
+    		ipv6_table[7].mode = 0444;
+
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
 	if (!net->ipv6.sysctl.table)

-- 

^ permalink raw reply

* [patch net-2.6.25 02/10][NETNS][IPV6] make a subsystem for af_inet6
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery
In-Reply-To: <20080109164533.695191040@localhost.localdomain>

[-- Attachment #1: make-af-inet6-a-subsystem.patch --]
[-- Type: text/plain, Size: 1965 bytes --]

This patch add a network namespace subsystem for the af_inet6 module. 
It does nothing right now, but one of its purpose is to receive the 
different variables for sysctl in order to initialize them.

When the sysctl variable will be moved to the network namespace structure,
they will be no longer initialized as global static variables, so we must
find a place to initialize them. Because the sysctl can be disabled, it 
has no sense to store them in the sysctl_net_ipv6 file.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 net/ipv6/af_inet6.c |   22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

Index: net-2.6.25/net/ipv6/af_inet6.c
===================================================================
--- net-2.6.25.orig/net/ipv6/af_inet6.c
+++ net-2.6.25/net/ipv6/af_inet6.c
@@ -719,6 +719,21 @@ static void cleanup_ipv6_mibs(void)
 	snmp_mib_free((void **)udplite_stats_in6);
 }
 
+static int inet6_net_init(struct net *net)
+{
+	return 0;
+}
+
+static void inet6_net_exit(struct net *net)
+{
+	return;
+}
+
+static struct pernet_operations inet6_net_ops = {
+	.init = inet6_net_init,
+	.exit = inet6_net_exit,
+};
+
 static int __init inet6_init(void)
 {
 	struct sk_buff *dummy_skb;
@@ -782,6 +797,10 @@ static int __init inet6_init(void)
 	 *	able to communicate via both network protocols.
 	 */
 
+	err = register_pernet_subsys(&inet6_net_ops);
+	if (err)
+		goto register_pernet_fail;
+
 #ifdef CONFIG_SYSCTL
 	err = ipv6_sysctl_register();
 	if (err)
@@ -901,6 +920,8 @@ icmp_fail:
 	ipv6_sysctl_unregister();
 sysctl_fail:
 #endif
+	unregister_pernet_subsys(&inet6_net_ops);
+register_pernet_fail:
 	cleanup_ipv6_mibs();
 out_unregister_sock:
 	sock_unregister(PF_INET6);
@@ -956,6 +977,7 @@ static void __exit inet6_exit(void)
 #ifdef CONFIG_SYSCTL
 	ipv6_sysctl_unregister();
 #endif
+	unregister_pernet_subsys(&inet6_net_ops);
 	cleanup_ipv6_mibs();
 	proto_unregister(&rawv6_prot);
 	proto_unregister(&udplitev6_prot);

-- 

^ permalink raw reply

* [patch net-2.6.25 00/10][NETNS][IPV6] make sysctl per namespace - V3
From: Daniel Lezcano @ 2008-01-09 16:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, benjamin.thery

The following patchset makes the ipv6 sysctl to handle multiple
network namespaces. Each instance of a network namespace as its own
set of sysctl values, that means the behavior of the ipv6 stack can be
different depending on the sysctl values setup in the different
network namespaces.

Changelog:
	V3 : fixed compilation error when CONFIG_SYSCTL=n,
	     fixed missing initialization when CONFIG_SYSCTL=n

	V2 : make the mld_max_msf variable readonly when we are
	     not in the initial network namespace

	V1 : initial post

-- 

^ permalink raw reply

* Re: Linux IPv6 DAD not full conform to RFC 4862 ?
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2008-01-09 16:40 UTC (permalink / raw)
  To: kkeil; +Cc: netdev, yoshfuji
In-Reply-To: <20080110.013857.37616214.yoshfuji@linux-ipv6.org>

In article <20080110.013857.37616214.yoshfuji@linux-ipv6.org> (at Thu, 10 Jan 2008 01:38:57 +0900 (JST)), YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> says:

> - we could have "dad_reaction" interface variable and
>  > 1: disable interface
>  = 1: disable IPv6
>  < 0: ignore (as we do now)

Argh, >0, 0 and <0, maybe.

--yoshfuji

^ permalink raw reply

* Re: Linux IPv6 DAD not full conform to RFC 4862 ?
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2008-01-09 16:38 UTC (permalink / raw)
  To: kkeil; +Cc: netdev, yoshfuji
In-Reply-To: <20080109153656.GA16962@pingi.kke.suse.de>

In article <20080109153656.GA16962@pingi.kke.suse.de> (at Wed, 9 Jan 2008 16:36:56 +0100), Karsten Keil <kkeil@suse.de> says:

> So I think we should disable the interface now, if DAD fails on a
> hardware based LLA.

I don't want to do this, at least, unconditionally.

Options (not exclusive):

- we could have "enable_ipv6" interface flag and check it in
  input/output paths
- we could have "dad_reaction" interface variable and
 > 1: disable interface
 = 1: disable IPv6
 < 0: ignore (as we do now)

--yoshfuji

^ permalink raw reply

* Re: Linux IPv6 DAD not full conform to RFC 4862 ?
From: Neil Horman @ 2008-01-09 16:17 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20080109153656.GA16962@pingi.kke.suse.de>

On Wed, Jan 09, 2008 at 04:36:56PM +0100, Karsten Keil wrote:
> Hi,
> 
> I tried to run the 1.5.0 Beta2  TAHI Selftest on recent Linux kernel.
> It fails in the Stateless Address Autoconfiguration section with
> 6 tests.
> These tests are for Duplicate Address Detection (DAD).
> They are detect for the Link Local Address a duplicate address on the
> network. It seems that our current behavior is to log an message and
> do not assign this address.
> 
> But the RFC 4862 says:
> 
> 5.4.5.  When Duplicate Address Detection Fails
> 
>    A tentative address that is determined to be a duplicate as described
>    above MUST NOT be assigned to an interface, and the node SHOULD log a
>    system management error.
> 
>    If the address is a link-local address formed from an interface
>    identifier based on the hardware address, which is supposed to be
>    uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
>    operation on the interface SHOULD be disabled.  By disabling IP
>    operation, the node will then:
> 
>    -  not send any IP packets from the interface,
> 
>    -  silently drop any IP packets received on the interface, and
> 
>    -  not forward any IP packets to the interface (when acting as a
>       router or processing a packet with a Routing header).
> 
>    In this case, the IP address duplication probably means duplicate
>    hardware addresses are in use, and trying to recover from it by
>    configuring another IP address will not result in a usable network.
>    In fact, it probably makes things worse by creating problems that are
>    harder to diagnose than just disabling network operation on the
>    interface; the user will see a partially working network where some
>    things work, and other things do not.
> 
>    On the other hand, if the duplicate link-local address is not formed
>    from an interface identifier based on the hardware address, which is
>    supposed to be uniquely assigned, IP operation on the interface MAY
>    be continued.
> 
> 
> So I think we should disable the interface now, if DAD fails on a
> hardware based LLA.
> 

Not sure I agree with that.  I assume that by disable, you mean that we should
clear the IFF_UP flag?  If we do that, and another ip address is assigned to
that interface, then your proposal would discontinue the functionality of those
already established addresses, which would be bad.  I could see a DOS scenario
comming out of that as well.  Simply send ndisc na's for a recently advertised
address, and you could prevent network communication for an entire system.

Reading the section you reference, we do follow all the MUST requirements, and
we log an error.  Given that the disable section is a SHOULD, I think we can at
least be somewhat more restrictive in our implementation.  Perhaps we should
just disable the interface iff the failed address is link-local AND there are no
other functional address assigned to the interface.

Neil

> -- 
> Karsten Keil
> SuSE Labs
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 0/0]: Cassini bug fixes.
From: Laszlo Attila Toth @ 2008-01-09 16:13 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, bazsi, hidden
In-Reply-To: <20080104.003231.127196736.davem@davemloft.net>

David Miller wrote:
> Over the past day I've put together the following set of bug fixes for
> the Cassini driver.
> 
> At least with my setup it appears to basically work fine, not leak
> memory, and the SKB BUG messages go away too.
> 
> I'll be honest and say that I've devoted a couple days to this work,
> and therefore I have to turn my attention back to other tasks.  As a
> result, it means it will be some time before I can look seriously into
> any feedback folks provide.  And for that I apologize, but this
> already consumed too much of my time.
> 
> I'll be pushing these to Linus and -stable shortly.
> 
> Thanks.
> 

We tested the card, it works well, all previous bugs are gone (truesize 
bug messages and memory comsumption).

Thank you again.

--
Attila

^ permalink raw reply

* Linux IPv6 DAD not full conform to RFC 4862 ?
From: Karsten Keil @ 2008-01-09 15:36 UTC (permalink / raw)
  To: netdev

Hi,

I tried to run the 1.5.0 Beta2  TAHI Selftest on recent Linux kernel.
It fails in the Stateless Address Autoconfiguration section with
6 tests.
These tests are for Duplicate Address Detection (DAD).
They are detect for the Link Local Address a duplicate address on the
network. It seems that our current behavior is to log an message and
do not assign this address.

But the RFC 4862 says:

5.4.5.  When Duplicate Address Detection Fails

   A tentative address that is determined to be a duplicate as described
   above MUST NOT be assigned to an interface, and the node SHOULD log a
   system management error.

   If the address is a link-local address formed from an interface
   identifier based on the hardware address, which is supposed to be
   uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
   operation on the interface SHOULD be disabled.  By disabling IP
   operation, the node will then:

   -  not send any IP packets from the interface,

   -  silently drop any IP packets received on the interface, and

   -  not forward any IP packets to the interface (when acting as a
      router or processing a packet with a Routing header).

   In this case, the IP address duplication probably means duplicate
   hardware addresses are in use, and trying to recover from it by
   configuring another IP address will not result in a usable network.
   In fact, it probably makes things worse by creating problems that are
   harder to diagnose than just disabling network operation on the
   interface; the user will see a partially working network where some
   things work, and other things do not.

   On the other hand, if the duplicate link-local address is not formed
   from an interface identifier based on the hardware address, which is
   supposed to be uniquely assigned, IP operation on the interface MAY
   be continued.

So I think we should disable the interface now, if DAD fails on a
hardware based LLA.

-- 
Karsten Keil
SuSE Labs

^ permalink raw reply

* Re: [PATCH 0/3] bonding: 3 fixes for 2.6.24
From: Andy Gospodarek @ 2008-01-09 15:27 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Krzysztof Oledzki, netdev, Jeff Garzik, David Miller,
	Andy Gospodarek, Herbert Xu
In-Reply-To: <17850.1199865514@death>

On Tue, Jan 08, 2008 at 11:58:34PM -0800, Jay Vosburgh wrote:
> Krzysztof Oledzki <olel@ans.pl> wrote:
> 
> >Fine. Just let you know that someone test your patches and everything
> >works, except mentioned problem.
> 
> 	And I appreciate it; I just wanted to make sure our many fans
> following along at home didn't misunderstand.
> 
> 	Could you let me know if the patch below make the lockdep
> warning go away?  This applies on top of the previous three, although it
> should be trivial to do by hand.
> 
> 	I'm still checking to make sure this is safe with regard to
> mutexing the bonding structures, but it would be good to know if it
> eliminates the warning.
> 
> 	-J
> 

Jay,

My initial concern was that a slave device could disappear out from
under us, but it seems like this certainly isn't the case since all
calls to bond_release are protected by rtnl-locks, so I think you are
correct that we are safe.  I'll test this on my setup here and let you
know if I see any problems.

-andy




^ permalink raw reply

* Re: Top 10 kernel oopses for the week ending January 5th, 2008
From: Arjan van de Ven @ 2008-01-09 15:28 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Linux Kernel Mailing List, Linus Torvalds, Andrew Morton, NetDev
In-Reply-To: <1199887950.6762.26.camel@johannes.berg>

Johannes Berg wrote:
>> Rank 1: __ieee80211_rx
>> 	Warning at net/mac80211/rx.c:1672
>> 	Reported 6 times (11 total reports)
>> 	Same issue that was ranked 2nd last week
>> 	Johannes has diagnosed this as a driver bug in the iwlwifi drivers
>> 	More info: http://www.kerneloops.org/search.php?search=__ieee80211_rx
> 
> Note that because we don't get the module list for WARN_ON, we don't
> actually know whether all of these instances are from the iwlwifi
> drivers. A few other drivers suffer from the same problem. In one of
> these cases, iwlwifi was contained in the stack trace, but in the common
> case that isn't happening because packet processing is delayed to a
> tasklet.
> 

and fwiw a patch to get this added to WARN_ON was posted by my last week to fix this;
once this goes into 2.6.25-rc this annoyance/hinderance in debugging will be fixed.

^ permalink raw reply

* Re: SACK scoreboard
From: John Heffner @ 2008-01-09 14:56 UTC (permalink / raw)
  To: David Miller; +Cc: andi, ilpo.jarvinen, lachlan.andrew, netdev, quetchen
In-Reply-To: <20080108.224144.234253941.davem@davemloft.net>

David Miller wrote:
> From: John Heffner <jheffner@psc.edu>
> Date: Tue, 08 Jan 2008 23:27:08 -0500
> 
>> I also wonder how much of a problem this is (for now, with window sizes 
>> of order 10000 packets.  My understanding is that the biggest problems 
>> arise from O(N^2) time for recovery because every ack was expensive. 
>> Have current tests shown the final ack to be a major source of problems?
> 
> Yes, several people have reported this.

I may have missed some of this.  Does anyone have a link to some recent 
data?

   -John

^ permalink raw reply

* Re: [NET] ROUTE: fix rcu_dereference() uses in /proc/net/rt_cache
From: Paul E. McKenney @ 2008-01-09 14:43 UTC (permalink / raw)
  To: David Miller; +Cc: dada1, herbert, dipankar, netdev, josh
In-Reply-To: <20080109.063126.68241252.davem@davemloft.net>

On Wed, Jan 09, 2008 at 06:31:26AM -0800, David Miller wrote:
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> Date: Wed, 9 Jan 2008 06:22:58 -0800
> 
> > On Wed, Jan 09, 2008 at 11:37:27AM +0100, Eric Dumazet wrote:
> > > On Wed, 9 Jan 2008 20:46:37 +1100
> > > Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > > 
> > > diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> > > index d337706..28484f3 100644
> > > --- a/net/ipv4/route.c
> > > +++ b/net/ipv4/route.c
> > > @@ -283,12 +283,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
> > >  			break;
> > >  		rcu_read_unlock_bh();
> > >  	}
> > > -	return r;
> > > +	return rcu_dereference(r);
> > >  }
> > 
> > Would it be possible to tag rt_cache_get_first() with an __acquires(RCU)
> > to help out sparse?
> 
> Sparse can't handle conditional locking very well, as is done here.
> There is a seperate thread where Eric reworks how all of this
> locking is done in order to pacify sparse and be able to add the
> __acquires() etc. tags and some of us found it too ugly to
> swallow :-)

Ah!  ;-)

							Thanx, Paul

^ permalink raw reply

* Re: [NET] ROUTE: fix rcu_dereference() uses in /proc/net/rt_cache
From: David Miller @ 2008-01-09 14:31 UTC (permalink / raw)
  To: paulmck; +Cc: dada1, herbert, dipankar, netdev
In-Reply-To: <20080109142258.GC13714@linux.vnet.ibm.com>

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 9 Jan 2008 06:22:58 -0800

> On Wed, Jan 09, 2008 at 11:37:27AM +0100, Eric Dumazet wrote:
> > On Wed, 9 Jan 2008 20:46:37 +1100
> > Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > 
> > diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> > index d337706..28484f3 100644
> > --- a/net/ipv4/route.c
> > +++ b/net/ipv4/route.c
> > @@ -283,12 +283,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
> >  			break;
> >  		rcu_read_unlock_bh();
> >  	}
> > -	return r;
> > +	return rcu_dereference(r);
> >  }
> 
> Would it be possible to tag rt_cache_get_first() with an __acquires(RCU)
> to help out sparse?

Sparse can't handle conditional locking very well, as is done here.
There is a seperate thread where Eric reworks how all of this
locking is done in order to pacify sparse and be able to add the
__acquires() etc. tags and some of us found it too ugly to
swallow :-)

^ permalink raw reply

* Re: [NET] ROUTE: fix rcu_dereference() uses in /proc/net/rt_cache
From: Paul E. McKenney @ 2008-01-09 14:22 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Herbert Xu, davem, dipankar, netdev
In-Reply-To: <20080109113727.50eae500.dada1@cosmosbay.com>

On Wed, Jan 09, 2008 at 11:37:27AM +0100, Eric Dumazet wrote:
> On Wed, 9 Jan 2008 20:46:37 +1100
> Herbert Xu <herbert@gondor.apana.org.au> wrote:
> 
> > On Wed, Jan 09, 2008 at 08:38:56AM +0100, Eric Dumazet wrote:
> > > 
> > > I am not sure this is valid, since it will do this :
> > > 
> > > r = rt_hash_table[st->bucket].chain;
> > > if (r)
> > >     return rcu_dereference(r);
> > > 
> > > So compiler might be dumb enough do dereference 
> > > &rt_hash_table[st->bucket].chain two times.
> > 
> > That wouldn't be a problem at all.  The key is to add a barrier between
> > reading the pointer:
> > 
> > 	r = rt_hash_table[st->bucket].chain
> > 
> > and dereferencing it later, e.g.,
> > 
> > 	r->u.dst.rt_next
> > 
> > The barrier is there so that when we dereference r we don't read
> > stale cache that was there before the memory at r was initialised.
> > How many times you read the pointer value before the barrier is
> > irrelevant to the effectiveness of the barrier preceding the
> > dereference.

Agreed -- as long as you don't try to dereference the pointer before
passing it through rcu_dereference(), and as long as both the initial
fetch of the pointer, the rcu_dereference(), and the actual dereferencing
of the pointer are all within the same RCU read-side critical section.

> You are absolutely right Herbert, so I changed the patch to :
> 
> [NET] ROUTE: fix rcu_dereference() uses in /proc/net/rt_cache
> 
> In rt_cache_get_next(), no need to guard seq->private by a rcu_dereference()
> since seq is private to the thread running this function. Reading seq.private
> once (as guaranted bu rcu_dereference()) or several time if compiler really is 
> dumb enough wont change the result.
> 
> But we miss real spots where rcu_dereference() are needed, both in 
> rt_cache_get_first() and rt_cache_get_next()
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
> 
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index d337706..28484f3 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -283,12 +283,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
>  			break;
>  		rcu_read_unlock_bh();
>  	}
> -	return r;
> +	return rcu_dereference(r);
>  }

Would it be possible to tag rt_cache_get_first() with an __acquires(RCU)
to help out sparse?

>  static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
>  {
> -	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
> +	struct rt_cache_iter_state *st = seq->private;
> 
>  	r = r->u.dst.rt_next;
>  	while (!r) {
> @@ -298,7 +298,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
>  		rcu_read_lock_bh();
>  		r = rt_hash_table[st->bucket].chain;
>  	}
> -	return r;
> +	return rcu_dereference(r);
>  }

Ditto for rt_cache_get_next()?

>  static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)

There would need to be a __releases(RCU) somewhere -- possibly
in rt_cache_seq_stop(), but need to defer to you guys on this one.

						Thanx, Paul

^ permalink raw reply

* Re: FW:  ccid2/ccid3 oopses
From: Gerrit Renker @ 2008-01-09 14:17 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, devzero, dccp, netdev
In-Reply-To: <20080109140211.GA9857@ghostprotocols.net>

| > >> the easiest way to reproduce is:
| > >> 
| > >> while true;do modprobe dccp_ccid2/3;modprobe -r dccp_ccid2/3;done
| > >> after short time, the kernel oopses (messages below)
| > >> 
<snip>
| 
| Gerrit, the control socket isn't attached to any CCID module, so the
| CCID modules should be safe to remove, and IIRC they were safe to
| unload.
| 
Ah, right. I have misread the email. And can confirm the above: running
the for-loop at the top of the message (60 seconds uninterrupted for
CCID2,3 each) brought no oopses.
So maybe the cause triggering this oops is somewhere else.

^ permalink raw reply

* Re: Top 10 kernel oopses for the week ending January 5th, 2008
From: Johannes Berg @ 2008-01-09 14:12 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Linux Kernel Mailing List, Linus Torvalds, Andrew Morton, NetDev
In-Reply-To: <477FF149.4070609@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 670 bytes --]

> Rank 1: __ieee80211_rx
> 	Warning at net/mac80211/rx.c:1672
> 	Reported 6 times (11 total reports)
> 	Same issue that was ranked 2nd last week
> 	Johannes has diagnosed this as a driver bug in the iwlwifi drivers
> 	More info: http://www.kerneloops.org/search.php?search=__ieee80211_rx

Note that because we don't get the module list for WARN_ON, we don't
actually know whether all of these instances are from the iwlwifi
drivers. A few other drivers suffer from the same problem. In one of
these cases, iwlwifi was contained in the stack trace, but in the common
case that isn't happening because packet processing is delayed to a
tasklet.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: FW:  ccid2/ccid3 oopses
From: Arnaldo Carvalho de Melo @ 2008-01-09 14:02 UTC (permalink / raw)
  To: Gerrit Renker, devzero, dccp, netdev
In-Reply-To: <20080109122827.GC4461@gerrit.erg.abdn.ac.uk>

Em Wed, Jan 09, 2008 at 12:28:27PM +0000, Gerrit Renker escreveu:
> Roland, -
> 
> >> apparently, i got crashes when loading/unloading other driver modules just
> >> after ccid2 or ccid3 had been loaded/unloaded _once_ (have not used them at
> >> all, just modprobe module;modprobe -r module) >
> >> 
> <snip>
> >> the easiest way to reproduce is:
> >> 
> >> while true;do modprobe dccp_ccid2/3;modprobe -r dccp_ccid2/3;done
> >> after short time, the kernel oopses (messages below)
> >> 
> >> i`m not sure if this is worth to be filed at kernel bugzilla, so i`m contacting
> >> you personally first.
> >>
> The issue is known: once loaded, the DCCP modules can not be unloaded
> without causing a crash as the one you have observed. This is due to the
> fact that dccp_ipv{4,6} use control sockets which need to be released
> before the module can be unloaded.
> When the control sockets are not released then crashes will always
> result.
> In earlier versions of DCCP there was a kernel option known as "unload hack",
> which conditionally inserted 
> 	sock_release(dccp_v{4,6}_ctl_socket);
> in 
> 	dccp_v{4,6}_exit()
> 
> However, as the name says, it is a hack since there are other issues to 
> be considered:
> 	* sockets in timewait state
> 	* other wait states (e.g. half-open connections)
> 	* memory which has not been released
> 	* module dependencies
> 
> With regard to the latter, I am normally using the Unload Hack and
> release modules in the following order:
> 
> 	dccp_probe => dccp_ccid2 => dccp_ccid3 => dccp_tfrc_lib =>
>         dccp_ipv6  => dccp_ipv4  => dccp_diag  => dccp
> 
> Long story short
>  * the CCID/DCCP modules can currently not safely be unloaded
>  * maybe we should disable module unloading for the mainline kernel
>  * if anyone is interested to use the unload hack, here is the old patch
>    http://www.erg.abdn.ac.uk/users/gerrit/dccp/testing_dccp/Unload_Hack.diff

Gerrit, the control socket isn't attached to any CCID module, so the
CCID modules should be safe to remove, and IIRC they were safe to
unload.

The unload hack was for something else, for the core DCCP modules. We
can't unload because there are refcounts held by the control sock, so
the unload hack would just destroy the control sock and thus the module
refcount would reach zero and it could then be unloaded.

I've been consistently being sidetracked with work (huh :-)) and
couldn't look at this issue, but the CCID modules should be safe to
unload.

- Arnaldo

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox