Netdev List
 help / color / mirror / Atom feed
* [PATCH 05/19] netfilter: ecache: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_ecache.h | 19 +++++++++++----
 net/netfilter/nf_conntrack_core.c           | 14 +++++++----
 net/netfilter/nf_conntrack_ecache.c         | 37 ++++++++++-------------------
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 5654d29..092dc65 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -207,9 +207,11 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event,
 	nf_ct_expect_event_report(event, exp, 0, 0);
 }
 
-extern int nf_conntrack_ecache_init(struct net *net);
-extern void nf_conntrack_ecache_fini(struct net *net);
+extern int nf_conntrack_ecache_pernet_init(struct net *net);
+extern void nf_conntrack_ecache_pernet_fini(struct net *net);
 
+extern int nf_conntrack_ecache_init(void);
+extern void nf_conntrack_ecache_fini(void);
 #else /* CONFIG_NF_CONNTRACK_EVENTS */
 
 static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
@@ -232,12 +234,21 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
  					     u32 portid,
  					     int report) {}
 
-static inline int nf_conntrack_ecache_init(struct net *net)
+static inline int nf_conntrack_ecache_pernet_init(struct net *net)
 {
 	return 0;
 }
 
-static inline void nf_conntrack_ecache_fini(struct net *net)
+static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
+{
+}
+
+static inline int nf_conntrack_ecache_init(void)
+{
+	return 0;
+}
+
+static inline void nf_conntrack_ecache_fini(void)
 {
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b8fb65a..b34bc9a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1360,7 +1360,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_proto_fini(net);
 	nf_conntrack_helper_fini(net);
 	nf_conntrack_timeout_fini(net);
-	nf_conntrack_ecache_fini(net);
+	nf_conntrack_ecache_pernet_fini(net);
 	nf_conntrack_tstamp_pernet_fini(net);
 	nf_conntrack_acct_pernet_fini(net);
 	nf_conntrack_expect_pernet_fini(net);
@@ -1378,6 +1378,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_ecache_fini();
 	nf_conntrack_tstamp_fini();
 	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini();
@@ -1512,6 +1513,9 @@ int nf_conntrack_init_start(void)
 	ret = nf_conntrack_tstamp_init();
 	if (ret < 0)
 		goto err_tstamp;
+	ret = nf_conntrack_ecache_init();
+	if (ret < 0)
+		goto err_ecache;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
@@ -1530,8 +1534,10 @@ int nf_conntrack_init_start(void)
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
-	nf_conntrack_tstamp_fini();
+	nf_conntrack_ecache_fini();
 #endif
+err_ecache:
+	nf_conntrack_tstamp_fini();
 err_tstamp:
 	nf_conntrack_acct_fini();
 err_acct:
@@ -1590,7 +1596,7 @@ int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_tstamp_pernet_init(net);
 	if (ret < 0)
 		goto err_tstamp;
-	ret = nf_conntrack_ecache_init(net);
+	ret = nf_conntrack_ecache_pernet_init(net);
 	if (ret < 0)
 		goto err_ecache;
 	ret = nf_conntrack_timeout_init(net);
@@ -1608,7 +1614,7 @@ out_proto:
 err_helper:
 	nf_conntrack_timeout_fini(net);
 err_timeout:
-	nf_conntrack_ecache_fini(net);
+	nf_conntrack_ecache_pernet_fini(net);
 err_ecache:
 	nf_conntrack_tstamp_pernet_fini(net);
 err_tstamp:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index faa978f..b5d2eb8 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -233,38 +233,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
 }
 #endif /* CONFIG_SYSCTL */
 
-int nf_conntrack_ecache_init(struct net *net)
+int nf_conntrack_ecache_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_events = nf_ct_events;
 	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+	return nf_conntrack_event_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&event_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_event: Unable to register "
-					"event extension.\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_ecache_pernet_fini(struct net *net)
+{
+	nf_conntrack_event_fini_sysctl(net);
+}
 
-	ret = nf_conntrack_event_init_sysctl(net);
+int nf_conntrack_ecache_init(void)
+{
+	int ret = nf_ct_extend_register(&event_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
-out_extend_register:
+		pr_err("nf_ct_event: Unable to register event extension.\n");
 	return ret;
 }
 
-void nf_conntrack_ecache_fini(struct net *net)
+void nf_conntrack_ecache_fini(void)
 {
-	nf_conntrack_event_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
+	nf_ct_extend_unregister(&event_extend);
 }
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 02/19] netfilter: expect: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_expect.h |  7 ++--
 net/netfilter/nf_conntrack_core.c           | 14 ++++++--
 net/netfilter/nf_conntrack_expect.c         | 52 ++++++++++++++---------------
 3 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cc13f37..cbbae76 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -69,8 +69,11 @@ struct nf_conntrack_expect_policy {
 
 #define NF_CT_EXPECT_CLASS_DEFAULT	0
 
-int nf_conntrack_expect_init(struct net *net);
-void nf_conntrack_expect_fini(struct net *net);
+int nf_conntrack_expect_pernet_init(struct net *net);
+void nf_conntrack_expect_pernet_fini(struct net *net);
+
+int nf_conntrack_expect_init(void);
+void nf_conntrack_expect_fini(void);
 
 struct nf_conntrack_expect *
 __nf_ct_expect_find(struct net *net, u16 zone,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ffb2463..96b281f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1363,7 +1363,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_tstamp_fini(net);
 	nf_conntrack_acct_fini(net);
-	nf_conntrack_expect_fini(net);
+	nf_conntrack_expect_pernet_fini(net);
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
@@ -1378,6 +1378,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_expect_fini();
 }
 
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
@@ -1497,6 +1498,11 @@ int nf_conntrack_init_start(void)
 	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
+
+	ret = nf_conntrack_expect_init();
+	if (ret < 0)
+		goto err_expect;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1514,7 +1520,9 @@ int nf_conntrack_init_start(void)
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
+	nf_conntrack_expect_fini();
 #endif
+err_expect:
 	return ret;
 }
 
@@ -1559,7 +1567,7 @@ int nf_conntrack_init_net(struct net *net)
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_hash;
 	}
-	ret = nf_conntrack_expect_init(net);
+	ret = nf_conntrack_expect_pernet_init(net);
 	if (ret < 0)
 		goto err_expect;
 	ret = nf_conntrack_acct_init(net);
@@ -1592,7 +1600,7 @@ err_ecache:
 err_tstamp:
 	nf_conntrack_acct_fini(net);
 err_acct:
-	nf_conntrack_expect_fini(net);
+	nf_conntrack_expect_pernet_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 527651a..0557acc 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -587,53 +587,51 @@ static void exp_proc_remove(struct net *net)
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
-int nf_conntrack_expect_init(struct net *net)
+int nf_conntrack_expect_pernet_init(struct net *net)
 {
 	int err = -ENOMEM;
 
-	if (net_eq(net, &init_net)) {
-		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = net->ct.htable_size / 256;
-			if (!nf_ct_expect_hsize)
-				nf_ct_expect_hsize = 1;
-		}
-		nf_ct_expect_max = nf_ct_expect_hsize * 4;
-	}
-
 	net->ct.expect_count = 0;
 	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
-					sizeof(struct nf_conntrack_expect),
-					0, 0, NULL);
-		if (!nf_ct_expect_cachep)
-			goto err2;
-	}
-
 	err = exp_proc_init(net);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
 	return 0;
 
-err3:
-	if (net_eq(net, &init_net))
-		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
 	return err;
 }
 
-void nf_conntrack_expect_fini(struct net *net)
+void nf_conntrack_expect_pernet_fini(struct net *net)
 {
 	exp_proc_remove(net);
-	if (net_eq(net, &init_net)) {
-		rcu_barrier(); /* Wait for call_rcu() before destroy */
-		kmem_cache_destroy(nf_ct_expect_cachep);
-	}
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
+
+int nf_conntrack_expect_init(void)
+{
+	if (!nf_ct_expect_hsize) {
+		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+		if (!nf_ct_expect_hsize)
+			nf_ct_expect_hsize = 1;
+	}
+	nf_ct_expect_max = nf_ct_expect_hsize * 4;
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+				sizeof(struct nf_conntrack_expect),
+				0, 0, NULL);
+	if (!nf_ct_expect_cachep)
+		return -ENOMEM;
+	return 0;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+	rcu_barrier(); /* Wait for call_rcu() before destroy */
+	kmem_cache_destroy(nf_ct_expect_cachep);
+}
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 06/19] netfilter: timeout: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_timeout.h |  8 ++++----
 net/netfilter/nf_conntrack_core.c            | 15 ++++++++-------
 net/netfilter/nf_conntrack_timeout.c         | 23 +++++++----------------
 3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index e41e472..d23aceb 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -76,15 +76,15 @@ nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern int nf_conntrack_timeout_init(struct net *net);
-extern void nf_conntrack_timeout_fini(struct net *net);
+extern int nf_conntrack_timeout_init(void);
+extern void nf_conntrack_timeout_fini(void);
 #else
-static inline int nf_conntrack_timeout_init(struct net *net)
+static inline int nf_conntrack_timeout_init(void)
 {
         return 0;
 }
 
-static inline void nf_conntrack_timeout_fini(struct net *net)
+static inline void nf_conntrack_timeout_fini(void)
 {
         return;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b34bc9a..a9a23c4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1359,7 +1359,6 @@ void nf_conntrack_cleanup_net(struct net *net)
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_conntrack_proto_fini(net);
 	nf_conntrack_helper_fini(net);
-	nf_conntrack_timeout_fini(net);
 	nf_conntrack_ecache_pernet_fini(net);
 	nf_conntrack_tstamp_pernet_fini(net);
 	nf_conntrack_acct_pernet_fini(net);
@@ -1378,6 +1377,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_timeout_fini();
 	nf_conntrack_ecache_fini();
 	nf_conntrack_tstamp_fini();
 	nf_conntrack_acct_fini();
@@ -1517,6 +1517,10 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_ecache;
 
+	ret = nf_conntrack_timeout_init();
+	if (ret < 0)
+		goto err_timeout;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1534,8 +1538,10 @@ int nf_conntrack_init_start(void)
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
-	nf_conntrack_ecache_fini();
+	nf_conntrack_timeout_fini();
 #endif
+err_timeout:
+	nf_conntrack_ecache_fini();
 err_ecache:
 	nf_conntrack_tstamp_fini();
 err_tstamp:
@@ -1599,9 +1605,6 @@ int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_ecache_pernet_init(net);
 	if (ret < 0)
 		goto err_ecache;
-	ret = nf_conntrack_timeout_init(net);
-	if (ret < 0)
-		goto err_timeout;
 	ret = nf_conntrack_helper_init(net);
 	if (ret < 0)
 		goto err_helper;
@@ -1612,8 +1615,6 @@ int nf_conntrack_init_net(struct net *net)
 out_proto:
 	nf_conntrack_helper_fini(net);
 err_helper:
-	nf_conntrack_timeout_fini(net);
-err_timeout:
 	nf_conntrack_ecache_pernet_fini(net);
 err_ecache:
 	nf_conntrack_tstamp_pernet_fini(net);
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index a878ce5..93da609 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = {
 	.id	= NF_CT_EXT_TIMEOUT,
 };
 
-int nf_conntrack_timeout_init(struct net *net)
+int nf_conntrack_timeout_init(void)
 {
-	int ret = 0;
-
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&timeout_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_timeout: Unable to register "
-					"timeout extension.\n");
-			return ret;
-		}
-	}
-
-	return 0;
+	int ret = nf_ct_extend_register(&timeout_extend);
+	if (ret < 0)
+		pr_err("nf_ct_timeout: Unable to register timeout extension.\n");
+	return ret;
 }
 
-void nf_conntrack_timeout_fini(struct net *net)
+void nf_conntrack_timeout_fini(void)
 {
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&timeout_extend);
+	nf_ct_extend_unregister(&timeout_extend);
 }
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 09/19] netfilter: l3proto: prepare reworking l3proto support for netns
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Prepare to move the code that register/unregister l3proto
to the module_init/exit context.

This patch deletes the codes that register/unregister l3proto,
this code will be added in next two patch.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_l3proto.h   | 20 ++++++++++++-----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  6 ++---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ++---
 net/netfilter/nf_conntrack_proto.c             | 31 ++++++++++----------------
 4 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 6f7c13f..f3834d5 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -76,11 +76,21 @@ struct nf_conntrack_l3proto {
 
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
 
-/* Protocol registration. */
-extern int nf_conntrack_l3proto_register(struct net *net,
-					 struct nf_conntrack_l3proto *proto);
-extern void nf_conntrack_l3proto_unregister(struct net *net,
-					    struct nf_conntrack_l3proto *proto);
+/* Protocol pernet registration. */
+extern int
+nf_conntrack_l3proto_pernet_register(struct net *net,
+				     struct nf_conntrack_l3proto *proto);
+extern void
+nf_conntrack_l3proto_pernet_unregister(struct net *net,
+				       struct nf_conntrack_l3proto *proto);
+
+/* Protocol global registration. */
+extern int
+nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
+
+extern void
+nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+
 extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
 extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fcdd0c2..afd3a96 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -438,8 +438,8 @@ static int ipv4_net_init(struct net *net)
 		pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
 		goto out_icmp;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv4);
+	ret = nf_conntrack_l3proto_pernet_register(net,
+					&nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
 		pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
 		goto out_ipv4;
@@ -460,7 +460,7 @@ out_tcp:
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
+	nf_conntrack_l3proto_pernet_unregister(net,
 					&nf_conntrack_l3proto_ipv4);
 	nf_conntrack_l4proto_unregister(net,
 					&nf_conntrack_l4proto_icmp);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 00ee17c..469a15f 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -439,8 +439,8 @@ static int ipv6_net_init(struct net *net)
 		printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
 		goto cleanup_udp6;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv6);
+	ret = nf_conntrack_l3proto_pernet_register(net,
+						&nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
 		printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
 		goto cleanup_icmpv6;
@@ -461,7 +461,7 @@ static int ipv6_net_init(struct net *net)
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
+	nf_conntrack_l3proto_pernet_unregister(net,
 					&nf_conntrack_l3proto_ipv6);
 	nf_conntrack_l4proto_unregister(net,
 					&nf_conntrack_l4proto_icmpv6);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 3d01b90..5a625a6 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -212,8 +212,8 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net,
 #endif
 }
 
-static int
-nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto)
+int
+nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
@@ -242,9 +242,10 @@ out_unlock:
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
 
-int nf_conntrack_l3proto_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto)
+int nf_conntrack_l3proto_pernet_register(struct net *net,
+					 struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 
@@ -258,18 +259,12 @@ int nf_conntrack_l3proto_register(struct net *net,
 	if (ret < 0)
 		return ret;
 
-	if (net == &init_net) {
-		ret = nf_conntrack_l3proto_register_net(proto);
-		if (ret < 0)
-			nf_ct_l3proto_unregister_sysctl(net, proto);
-	}
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_pernet_register);
 
-static void
-nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
+void
+nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= AF_MAX);
 
@@ -283,19 +278,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
 
-void nf_conntrack_l3proto_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto)
+void nf_conntrack_l3proto_pernet_unregister(struct net *net,
+					struct nf_conntrack_l3proto *proto)
 {
-	if (net == &init_net)
-		nf_conntrack_l3proto_unregister_net(proto);
-
 	nf_ct_l3proto_unregister_sysctl(net, proto);
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l3proto, proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_pernet_unregister);
 
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 					      struct nf_conntrack_l4proto *l4proto)
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 03/19] netfilter: acct: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_acct.h |  6 +++--
 net/netfilter/nf_conntrack_acct.c         | 37 ++++++++++++-------------------
 net/netfilter/nf_conntrack_core.c         | 15 +++++++++----
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 463ae8e..2bdb7a1 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -57,7 +57,9 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
 	net->ct.sysctl_acct = enable;
 }
 
-extern int nf_conntrack_acct_init(struct net *net);
-extern void nf_conntrack_acct_fini(struct net *net);
+extern int nf_conntrack_acct_pernet_init(struct net *net);
+extern void nf_conntrack_acct_pernet_fini(struct net *net);
 
+extern int nf_conntrack_acct_init(void);
+extern void nf_conntrack_acct_fini(void);
 #endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 7df424e..8138555 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -106,36 +106,27 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net)
 }
 #endif
 
-int nf_conntrack_acct_init(struct net *net)
+int nf_conntrack_acct_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_acct = nf_ct_acct;
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&acct_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-			goto out_extend_register;
-		}
-	}
-
-	ret = nf_conntrack_acct_init_sysctl(net);
-	if (ret < 0)
-		goto out_sysctl;
+	return nf_conntrack_acct_init_sysctl(net);
+}
 
-	return 0;
+void nf_conntrack_acct_pernet_fini(struct net *net)
+{
+	nf_conntrack_acct_fini_sysctl(net);
+}
 
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
-out_extend_register:
+int nf_conntrack_acct_init(void)
+{
+	int ret = nf_ct_extend_register(&acct_extend);
+	if (ret < 0)
+		pr_err("nf_conntrack_acct: Unable to register extension\n");
 	return ret;
 }
 
-void nf_conntrack_acct_fini(struct net *net)
+void nf_conntrack_acct_fini(void)
 {
-	nf_conntrack_acct_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
+	nf_ct_extend_unregister(&acct_extend);
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 96b281f..feb08d0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1362,7 +1362,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_timeout_fini(net);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_tstamp_fini(net);
-	nf_conntrack_acct_fini(net);
+	nf_conntrack_acct_pernet_fini(net);
 	nf_conntrack_expect_pernet_fini(net);
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 	kfree(net->ct.slabname);
@@ -1378,6 +1378,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini();
 }
 
@@ -1503,6 +1504,10 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_expect;
 
+	ret = nf_conntrack_acct_init();
+	if (ret < 0)
+		goto err_acct;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1520,8 +1525,10 @@ int nf_conntrack_init_start(void)
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
-	nf_conntrack_expect_fini();
+	nf_conntrack_acct_fini();
 #endif
+err_acct:
+	nf_conntrack_expect_fini();
 err_expect:
 	return ret;
 }
@@ -1570,7 +1577,7 @@ int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_expect_pernet_init(net);
 	if (ret < 0)
 		goto err_expect;
-	ret = nf_conntrack_acct_init(net);
+	ret = nf_conntrack_acct_pernet_init(net);
 	if (ret < 0)
 		goto err_acct;
 	ret = nf_conntrack_tstamp_init(net);
@@ -1598,7 +1605,7 @@ err_timeout:
 err_ecache:
 	nf_conntrack_tstamp_fini(net);
 err_tstamp:
-	nf_conntrack_acct_fini(net);
+	nf_conntrack_acct_pernet_fini(net);
 err_acct:
 	nf_conntrack_expect_pernet_fini(net);
 err_expect:
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 07/19] netfilter: helper: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_helper.h |  7 ++--
 net/netfilter/nf_conntrack_core.c           | 15 +++++---
 net/netfilter/nf_conntrack_helper.c         | 53 ++++++++++++++---------------
 3 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 9aad956..ce27edf 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -82,8 +82,11 @@ static inline void *nfct_help_data(const struct nf_conn *ct)
 	return (void *)help->data;
 }
 
-extern int nf_conntrack_helper_init(struct net *net);
-extern void nf_conntrack_helper_fini(struct net *net);
+extern int nf_conntrack_helper_pernet_init(struct net *net);
+extern void nf_conntrack_helper_pernet_fini(struct net *net);
+
+extern int nf_conntrack_helper_init(void);
+extern void nf_conntrack_helper_fini(void);
 
 extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
 				       unsigned int protoff,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a9a23c4..06ed86f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1358,7 +1358,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_conntrack_proto_fini(net);
-	nf_conntrack_helper_fini(net);
+	nf_conntrack_helper_pernet_fini(net);
 	nf_conntrack_ecache_pernet_fini(net);
 	nf_conntrack_tstamp_pernet_fini(net);
 	nf_conntrack_acct_pernet_fini(net);
@@ -1377,6 +1377,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_helper_fini();
 	nf_conntrack_timeout_fini();
 	nf_conntrack_ecache_fini();
 	nf_conntrack_tstamp_fini();
@@ -1521,6 +1522,10 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_timeout;
 
+	ret = nf_conntrack_helper_init();
+	if (ret < 0)
+		goto err_helper;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1538,8 +1543,10 @@ int nf_conntrack_init_start(void)
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
-	nf_conntrack_timeout_fini();
+	nf_conntrack_helper_fini();
 #endif
+err_helper:
+	nf_conntrack_timeout_fini();
 err_timeout:
 	nf_conntrack_ecache_fini();
 err_ecache:
@@ -1605,7 +1612,7 @@ int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_ecache_pernet_init(net);
 	if (ret < 0)
 		goto err_ecache;
-	ret = nf_conntrack_helper_init(net);
+	ret = nf_conntrack_helper_pernet_init(net);
 	if (ret < 0)
 		goto err_helper;
 	ret = nf_conntrack_proto_init(net);
@@ -1613,7 +1620,7 @@ int nf_conntrack_init_net(struct net *net)
 		goto out_proto;
 	return 0;
 out_proto:
-	nf_conntrack_helper_fini(net);
+	nf_conntrack_helper_pernet_fini(net);
 err_helper:
 	nf_conntrack_ecache_pernet_fini(net);
 err_ecache:
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 884f2b3..ad4c747 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -423,44 +423,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.id	= NF_CT_EXT_HELPER,
 };
 
-int nf_conntrack_helper_init(struct net *net)
+int nf_conntrack_helper_pernet_init(struct net *net)
 {
-	int err;
-
 	net->ct.auto_assign_helper_warned = false;
 	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+	return nf_conntrack_helper_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-		nf_ct_helper_hash =
-			nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
-		if (!nf_ct_helper_hash)
-			return -ENOMEM;
+void nf_conntrack_helper_pernet_fini(struct net *net)
+{
+	nf_conntrack_helper_fini_sysctl(net);
+}
 
-		err = nf_ct_extend_register(&helper_extend);
-		if (err < 0)
-			goto err1;
+int nf_conntrack_helper_init(void)
+{
+	int ret;
+	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+	nf_ct_helper_hash =
+		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+	if (!nf_ct_helper_hash)
+		return -ENOMEM;
+
+	ret = nf_ct_extend_register(&helper_extend);
+	if (ret < 0) {
+		pr_err("nf_ct_helper: Unable to register helper extension.\n");
+		goto out_extend;
 	}
-
-	err = nf_conntrack_helper_init_sysctl(net);
-	if (err < 0)
-		goto out_sysctl;
-
 	return 0;
 
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&helper_extend);
-err1:
+out_extend:
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	return err;
+	return ret;
 }
 
-void nf_conntrack_helper_fini(struct net *net)
+void nf_conntrack_helper_fini(void)
 {
-	nf_conntrack_helper_fini_sysctl(net);
-	if (net_eq(net, &init_net)) {
-		nf_ct_extend_unregister(&helper_extend);
-		nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	}
+	nf_ct_extend_unregister(&helper_extend);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 08/19] netfilter: proto: move initial codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the global initial codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_core.h |  7 +++++--
 net/netfilter/nf_conntrack_core.c         | 13 +++++++++---
 net/netfilter/nf_conntrack_proto.c        | 35 +++++++++++++++++--------------
 3 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index ec51a3c..5ea7518 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -34,8 +34,11 @@ extern void nf_conntrack_cleanup_start(void);
 extern void nf_conntrack_init_end(void);
 extern void nf_conntrack_cleanup_end(void);
 
-extern int nf_conntrack_proto_init(struct net *net);
-extern void nf_conntrack_proto_fini(struct net *net);
+extern int nf_conntrack_proto_pernet_init(struct net *net);
+extern void nf_conntrack_proto_pernet_fini(struct net *net);
+
+extern int nf_conntrack_proto_init(void);
+extern void nf_conntrack_proto_fini(void);
 
 extern bool
 nf_ct_get_tuple(const struct sk_buff *skb,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 06ed86f..fc0805e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1357,7 +1357,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-	nf_conntrack_proto_fini(net);
+	nf_conntrack_proto_pernet_fini(net);
 	nf_conntrack_helper_pernet_fini(net);
 	nf_conntrack_ecache_pernet_fini(net);
 	nf_conntrack_tstamp_pernet_fini(net);
@@ -1377,6 +1377,7 @@ void nf_conntrack_cleanup_end(void)
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_proto_fini();
 	nf_conntrack_helper_fini();
 	nf_conntrack_timeout_fini();
 	nf_conntrack_ecache_fini();
@@ -1531,6 +1532,10 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_extend;
 #endif
+	ret = nf_conntrack_proto_init();
+	if (ret < 0)
+		goto err_proto;
+
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 	for_each_possible_cpu(cpu) {
 		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
@@ -1541,10 +1546,12 @@ int nf_conntrack_init_start(void)
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
+err_proto:
 #ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
 err_extend:
-	nf_conntrack_helper_fini();
 #endif
+	nf_conntrack_helper_fini();
 err_helper:
 	nf_conntrack_timeout_fini();
 err_timeout:
@@ -1615,7 +1622,7 @@ int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_helper_pernet_init(net);
 	if (ret < 0)
 		goto err_helper;
-	ret = nf_conntrack_proto_init(net);
+	ret = nf_conntrack_proto_pernet_init(net);
 	if (ret < 0)
 		goto out_proto;
 	return 0;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51e928d..3d01b90 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -503,9 +503,8 @@ void nf_conntrack_l4proto_unregister(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
 
-int nf_conntrack_proto_init(struct net *net)
+int nf_conntrack_proto_pernet_init(struct net *net)
 {
-	unsigned int i;
 	int err;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
@@ -520,29 +519,33 @@ int nf_conntrack_proto_init(struct net *net)
 	if (err < 0)
 		return err;
 
-	if (net == &init_net) {
-		for (i = 0; i < AF_MAX; i++)
-			rcu_assign_pointer(nf_ct_l3protos[i],
-					   &nf_conntrack_l3proto_generic);
-	}
-
 	pn->users++;
 	return 0;
 }
 
-void nf_conntrack_proto_fini(struct net *net)
+void nf_conntrack_proto_pernet_fini(struct net *net)
 {
-	unsigned int i;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
-
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(net,
 					pn,
 					&nf_conntrack_l4proto_generic);
-	if (net == &init_net) {
-		/* free l3proto protocol tables */
-		for (i = 0; i < PF_MAX; i++)
-			kfree(nf_ct_protos[i]);
-	}
+}
+
+int nf_conntrack_proto_init(void)
+{
+	unsigned int i;
+	for (i = 0; i < AF_MAX; i++)
+		rcu_assign_pointer(nf_ct_l3protos[i],
+				   &nf_conntrack_l3proto_generic);
+	return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+	/* free l3proto protocol tables */
+	for (i = 0; i < PF_MAX; i++)
+		kfree(nf_ct_protos[i]);
 }
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 10/19] netfilter: ipv4: register ipv4 in module_init
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

register nf_conntrack_l3proto_ipv4 in module_init,
and unregister it in module_exit.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index afd3a96..a942add 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -500,6 +500,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_pernet;
 	}
+
+	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+	if (ret < 0)
+		goto cleanup_proto;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	ret = nf_conntrack_ipv4_compat_init();
 	if (ret < 0)
@@ -508,8 +512,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 	return ret;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
  cleanup_hooks:
-	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 #endif
+ cleanup_proto:
+	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
  cleanup_pernet:
 	unregister_pernet_subsys(&ipv4_net_ops);
  cleanup_sockopt:
@@ -523,6 +529,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	nf_conntrack_ipv4_compat_fini();
 #endif
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 	unregister_pernet_subsys(&ipv4_net_ops);
 	nf_unregister_sockopt(&so_getorigdst);
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 11/19] netfilter: ipv6: register l3proto ipv6 in module_init
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

register nf_conntrack_l3proto_ipv6 in module_init,
and unregister it in module_exit.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 469a15f..07ec50b 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -499,8 +499,13 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 		       "hook.\n");
 		goto cleanup_ipv6;
 	}
+	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+	if (ret < 0)
+		goto cleanup_proto;
 	return ret;
 
+ cleanup_proto:
+	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
  cleanup_ipv6:
 	unregister_pernet_subsys(&ipv6_net_ops);
  cleanup_pernet:
@@ -511,6 +516,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 {
 	synchronize_net();
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
 	unregister_pernet_subsys(&ipv6_net_ops);
 	nf_unregister_sockopt(&so_getorigdst6);
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 10/19] netfilter: ipv4: register l3proto ipv4 in module_init
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

register nf_conntrack_l3proto_ipv4 in module_init,
and unregister it in module_exit.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index afd3a96..a942add 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -500,6 +500,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_pernet;
 	}
+
+	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+	if (ret < 0)
+		goto cleanup_proto;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	ret = nf_conntrack_ipv4_compat_init();
 	if (ret < 0)
@@ -508,8 +512,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 	return ret;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
  cleanup_hooks:
-	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 #endif
+ cleanup_proto:
+	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
  cleanup_pernet:
 	unregister_pernet_subsys(&ipv4_net_ops);
  cleanup_sockopt:
@@ -523,6 +529,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	nf_conntrack_ipv4_compat_fini();
 #endif
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 	unregister_pernet_subsys(&ipv4_net_ops);
 	nf_unregister_sockopt(&so_getorigdst);
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 15/19] netfilter: sctp: move registration codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the proto (un)registration codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 0aa91dd..62ef799 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -891,11 +891,32 @@ static struct pernet_operations sctp_net_ops = {
 
 static int __init nf_conntrack_proto_sctp_init(void)
 {
-	return register_pernet_subsys(&sctp_net_ops);
+	int ret;
+	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
+	if (ret < 0)
+		goto out_sctp4;
+
+	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
+	if (ret < 0)
+		goto out_sctp6;
+
+	ret = register_pernet_subsys(&sctp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+out_sctp6:
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+out_sctp4:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_sctp_fini(void)
 {
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
 	unregister_pernet_subsys(&sctp_net_ops);
 }
 
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 16/19] netfilter: udplite: move registration codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the proto (un)registration codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/netfilter/nf_conntrack_proto_udplite.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 56e53c0..6ad1e5c 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -374,11 +374,32 @@ static struct pernet_operations udplite_net_ops = {
 
 static int __init nf_conntrack_proto_udplite_init(void)
 {
-	return register_pernet_subsys(&udplite_net_ops);
+	int ret;
+	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4);
+	if (ret < 0)
+		goto out_udplite4;
+
+	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6);
+	if (ret < 0)
+		goto out_udplite6;
+
+	ret = register_pernet_subsys(&udplite_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+out_udplite6:
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+out_udplite4:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_udplite_exit(void)
 {
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
 	unregister_pernet_subsys(&udplite_net_ops);
 }
 
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH 18/19] netfilter: gre: move registration codes out of pernet_operations
From: Gao feng @ 2012-12-28  2:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, canqunzhang, kaber, pablo, ebiederm, Gao feng
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Move the proto (un)registration codes to the module_init/exit context.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/netfilter/nf_conntrack_proto_gre.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index f5f14c2..ea1f651 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -420,11 +420,25 @@ static struct pernet_operations proto_gre_net_ops = {
 
 static int __init nf_ct_proto_gre_init(void)
 {
-	return register_pernet_subsys(&proto_gre_net_ops);
+	int ret;
+	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (ret < 0)
+		goto out_gre4;
+
+	ret = register_pernet_subsys(&proto_gre_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+out_gre4:
+	return ret;
 }
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
+	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
 	unregister_pernet_subsys(&proto_gre_net_ops);
 }
 
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH V2] forcedeth: Fix WARNINGS that result when DMA mapping is not checked
From: Larry Finger @ 2012-12-28  3:25 UTC (permalink / raw)
  To: linville, davem; +Cc: linux-wireless, Larry Finger, netdev, linux-kernel

With 3.8-rc1, the first call of pci_map_single() that is not checked
with a corresponding pci_dma_mapping_error() call results in a warning
with a splat as follows:

WARNING: at lib/dma-debug.c:933 check_unmap+0x480/0x950()
Hardware name: HP Pavilion dv2700 Notebook PC
forcedeth 0000:00:0a.0: DMA-API: device driver failed to check
 map error[device address=0x00000000b176e002] [size=90 bytes] [mapped as single]

Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---

V1 => V2: Replaced dev_kfree_skb_any() calls with kfree_skb() in RX
	  Changed TX mapping errors to drop packet, update drop count
	  and return NETDEV_TX_OK
---

 drivers/net/ethernet/nvidia/forcedeth.c |   35 +++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 653487d..87fa591 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1821,6 +1821,11 @@ static int nv_alloc_rx(struct net_device *dev)
 							     skb->data,
 							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
+			if (pci_dma_mapping_error(np->pci_dev,
+						  np->put_rx_ctx->dma)) {
+				kfree_skb(skb);
+				goto packet_dropped;
+			}
 			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
@@ -1830,6 +1835,7 @@ static int nv_alloc_rx(struct net_device *dev)
 			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
 				np->put_rx_ctx = np->first_rx_ctx;
 		} else {
+packet_dropped:
 			u64_stats_update_begin(&np->swstats_rx_syncp);
 			np->stat_rx_dropped++;
 			u64_stats_update_end(&np->swstats_rx_syncp);
@@ -1856,6 +1862,11 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 							     skb->data,
 							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
+			if (pci_dma_mapping_error(np->pci_dev,
+						  np->put_rx_ctx->dma)) {
+				kfree_skb(skb);
+				goto packet_dropped;
+			}
 			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.ex->bufhigh = cpu_to_le32(dma_high(np->put_rx_ctx->dma));
 			np->put_rx.ex->buflow = cpu_to_le32(dma_low(np->put_rx_ctx->dma));
@@ -1866,6 +1877,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
 				np->put_rx_ctx = np->first_rx_ctx;
 		} else {
+packet_dropped:
 			u64_stats_update_begin(&np->swstats_rx_syncp);
 			np->stat_rx_dropped++;
 			u64_stats_update_end(&np->swstats_rx_syncp);
@@ -2217,6 +2229,15 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
 		np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
 						PCI_DMA_TODEVICE);
+		if (pci_dma_mapping_error(np->pci_dev,
+					  np->put_tx_ctx->dma)) {
+			/* on DMA mapping error - drop the packet */
+			kfree_skb(skb);
+			u64_stats_update_begin(&np->swstats_tx_syncp);
+			np->stat_tx_dropped++;
+			u64_stats_update_end(&np->swstats_tx_syncp);
+			return NETDEV_TX_OK;
+		}
 		np->put_tx_ctx->dma_len = bcnt;
 		np->put_tx_ctx->dma_single = 1;
 		put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma);
@@ -2337,6 +2358,15 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 		bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
 		np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
 						PCI_DMA_TODEVICE);
+		if (pci_dma_mapping_error(np->pci_dev,
+					  np->put_tx_ctx->dma)) {
+			/* on DMA mapping error - drop the packet */
+			kfree_skb(skb);
+			u64_stats_update_begin(&np->swstats_tx_syncp);
+			np->stat_tx_dropped++;
+			u64_stats_update_end(&np->swstats_tx_syncp);
+			return NETDEV_TX_OK;
+		}
 		np->put_tx_ctx->dma_len = bcnt;
 		np->put_tx_ctx->dma_single = 1;
 		put_tx->bufhigh = cpu_to_le32(dma_high(np->put_tx_ctx->dma));
@@ -5003,6 +5033,11 @@ static int nv_loopback_test(struct net_device *dev)
 	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
 				       skb_tailroom(tx_skb),
 				       PCI_DMA_FROMDEVICE);
+	if (pci_dma_mapping_error(np->pci_dev,
+				  test_dma_addr)) {
+		dev_kfree_skb_any(tx_skb);
+		goto out;
+	}
 	pkt_data = skb_put(tx_skb, pkt_len);
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
-- 
1.7.10.4

^ permalink raw reply related

* Re: kernel panic when running /etc/init.d/iptables restart
From: canqun zhang @ 2012-12-28  3:27 UTC (permalink / raw)
  To: Gao feng
  Cc: Patrick McHardy, netfilter-devel, netfilter, linux-kernel,
	netdev@vger.kernel.org
In-Reply-To: <CAFFEFTWWNQ_NKw840mQ3gTfB9XR44EXb_=Ft4o+UoHwxv13VRg@mail.gmail.com>

Hi all
As discussed above,if the host machine create several linux
containers, there will be several  net namespaces.Resources with "nf
conntrack" are registered or unregistered on the first net
namespace(init_net),But init_net is not unregistered lastly,so
cleanuping other net namespaces  will triger painic.
If net namespaces are created  with the order of 1,2,...n,they should
be cleaned with the order of n,...2,1,so in this case init_net will be
unregistered lastly.
I fixed it up (see below)

diff -r 6a1a258923f5 -r 2667e89e6f50 net/core/net_namespace.c
--- a/net/core/net_namespace.c  Fri Dec 28 11:01:17 2012 +0800
+++ b/net/core/net_namespace.c  Fri Dec 28 11:05:12 2012 +0800
@@ -450,7 +450,7 @@

        list_del(&ops->list);
        for_each_net(net)
-               list_add_tail(&net->exit_list, &net_exit_list);
+              list_add(&net->exit_list, &net_exit_list);
        ops_exit_list(ops, &net_exit_list);
        ops_free_list(ops, &net_exit_list);
 }

2012/12/25 canqun zhang <canqunzhang@gmail.com>:
> Thanks for your suggestion,i will modify this patch and take tests.
>
> 2012/12/25 Gao feng <gaofeng@cn.fujitsu.com>:
>> On 2012/12/25 15:25, canqun zhang wrote:
>>> Hi Gao feng
>>> The stack information is as follows. The kenel will panic because the
>>> nf_ct_destroy is NULL.
>>>
>>> Reproduction:
>>> (1) starting a lxc container
>>> (2) iptables -t nat -A POSTROUTING -s 10.48.254.18 -o eth1 -j
>>> MASQUERADE (run it on host machine)
>>> (3) /etc/ini.d/iptables save (run it on host machine)
>>> (4)/etc/init.d/iptables restart (run it on host machine)
>>>
>>
>> Thanks!
>> It seems that nf_conntrack_l[3,4]proto_unregister doesn't make sure
>> nf_conns of the proto being destroyed.
>>
>> If I'm right, there is another problem even your fix this panic problem.
>> the l3,14proto will be unregistered before all of it's nf_conns being destroyed.
>> So even nf_ct_destroy is not NULL,in destroy_conntrack we are not able to
>> find the right l4proto,the l4proto->destroy will be incorrect.resources will
>> not be released correctly.
>>
>> So I think the root problem is we do register/unregister, set/unset both on the
>> first net (init_net), Maybe it's better to do register set on the first net, and
>> do unregister unset on the last net.

^ permalink raw reply

* Re: kernel panic when running /etc/init.d/iptables restart
From: canqun zhang @ 2012-12-28  3:39 UTC (permalink / raw)
  To: Gao feng
  Cc: Patrick McHardy, netfilter-devel, netfilter, linux-kernel,
	netdev@vger.kernel.org
In-Reply-To: <50D965F9.7090007@cn.fujitsu.com>

Hi all
As discussed above,if the host machine create several linux
containers, there will be several  net namespaces.Resources with "nf
conntrack" are registered or unregistered on the first net
namespace(init_net),But init_net is not unregistered lastly,so
cleanuping other net namespaces  will triger painic.
If net namespaces are created  with the order of 1,2,...n,they should
be cleaned with the order of n,...2,1,so in this case init_net will be
unregistered lastly.
I fixed it up (see below)

diff -r 6a1a258923f5 -r 2667e89e6f50 net/core/net_namespace.c
--- a/net/core/net_namespace.c  Fri Dec 28 11:01:17 2012 +0800
+++ b/net/core/net_namespace.c  Fri Dec 28 11:05:12 2012 +0800
@@ -450,7 +450,7 @@

        list_del(&ops->list);
        for_each_net(net)
-               list_add_tail(&net->exit_list, &net_exit_list);
+              list_add(&net->exit_list, &net_exit_list);
        ops_exit_list(ops, &net_exit_list);
        ops_free_list(ops, &net_exit_list);
 }

2012/12/25 Gao feng <gaofeng@cn.fujitsu.com>:
> On 2012/12/25 15:25, canqun zhang wrote:
>> Hi Gao feng
>> The stack information is as follows. The kenel will panic because the
>> nf_ct_destroy is NULL.
>>
>> Reproduction:
>> (1) starting a lxc container
>> (2) iptables -t nat -A POSTROUTING -s 10.48.254.18 -o eth1 -j
>> MASQUERADE (run it on host machine)
>> (3) /etc/ini.d/iptables save (run it on host machine)
>> (4)/etc/init.d/iptables restart (run it on host machine)
>>
>
> Thanks!
> It seems that nf_conntrack_l[3,4]proto_unregister doesn't make sure
> nf_conns of the proto being destroyed.
>
> If I'm right, there is another problem even your fix this panic problem.
> the l3,14proto will be unregistered before all of it's nf_conns being destroyed.
> So even nf_ct_destroy is not NULL,in destroy_conntrack we are not able to
> find the right l4proto,the l4proto->destroy will be incorrect.resources will
> not be released correctly.
>
> So I think the root problem is we do register/unregister, set/unset both on the
> first net (init_net), Maybe it's better to do register set on the first net, and
> do unregister unset on the last net.

^ permalink raw reply

* Re: [PATCH 01/19] netfilter: move nf_conntrack initialize out of pernet operations
From: canqun zhang @ 2012-12-28  3:52 UTC (permalink / raw)
  To: Gao feng
  Cc: netfilter-devel, netdev@vger.kernel.org, Patrick McHardy, pablo,
	ebiederm
In-Reply-To: <1356662206-2260-1-git-send-email-gaofeng@cn.fujitsu.com>

Hi all
As discussed above,if the host machine create several linux
containers, there will be several  net namespaces.Resources with "nf
conntrack" are registered or unregistered on the first net
namespace(init_net),But init_net is not unregistered lastly,so
cleanuping other net namespaces  will triger painic.
If net namespaces are created  with the order of 1,2,...n,they should
be cleaned with the order of n,...2,1,so in this case init_net will be
unregistered lastly.
I fixed it up (see below). I have taken a lot of test!

diff -r 6a1a258923f5 -r 2667e89e6f50 net/core/net_namespace.c
--- a/net/core/net_namespace.c  Fri Dec 28 11:01:17 2012 +0800
+++ b/net/core/net_namespace.c  Fri Dec 28 11:05:12 2012 +0800
@@ -450,7 +450,7 @@

        list_del(&ops->list);
        for_each_net(net)
-               list_add_tail(&net->exit_list, &net_exit_list);
+              list_add(&net->exit_list, &net_exit_list);
        ops_exit_list(ops, &net_exit_list);
        ops_free_list(ops, &net_exit_lis

2012/12/28 Gao feng <gaofeng@cn.fujitsu.com>:
> canqun zhang reported a panic BUG,kernel may panic when
> unloading nf_conntrack module.
>
> It's because we reset nf_ct_destroy to NULL when we deal
> with init_net,it's too early.Some packets belongs to other
> netns still refers to the conntrack.when these packets need
> to be freed, kfree_skb will call nf_ct_destroy which is
> NULL.
>
> fix this bug by moving the nf_conntrack initialize and cleanup
> codes out of the pernet operations,this job should be done
> in module_init/exit.We can't use init_net to identify if
> it's the right time.
>
> Reported-by: canqun zhang <canqunzhang@gmail.com>
> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
> ---
>  include/net/netfilter/nf_conntrack_core.h | 10 +++-
>  net/netfilter/nf_conntrack_core.c         | 99 ++++++++++++-------------------
>  net/netfilter/nf_conntrack_standalone.c   | 29 ++++++---
>  3 files changed, 67 insertions(+), 71 deletions(-)
>
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index d8f5b9f..ec51a3c 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -25,8 +25,14 @@ extern unsigned int nf_conntrack_in(struct net *net,
>                                     unsigned int hooknum,
>                                     struct sk_buff *skb);
>
> -extern int nf_conntrack_init(struct net *net);
> -extern void nf_conntrack_cleanup(struct net *net);
> +extern int nf_conntrack_init_net(struct net *net);
> +extern void nf_conntrack_cleanup_net(struct net *net);
> +
> +extern int nf_conntrack_init_start(void);
> +extern void nf_conntrack_cleanup_start(void);
> +
> +extern void nf_conntrack_init_end(void);
> +extern void nf_conntrack_cleanup_end(void);
>
>  extern int nf_conntrack_proto_init(struct net *net);
>  extern void nf_conntrack_proto_fini(struct net *net);
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index 08cdc71..ffb2463 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -1331,18 +1331,23 @@ static int untrack_refs(void)
>         return cnt;
>  }
>
> -static void nf_conntrack_cleanup_init_net(void)
> +void nf_conntrack_cleanup_start(void)
>  {
> -       while (untrack_refs() > 0)
> -               schedule();
> -
> -#ifdef CONFIG_NF_CONNTRACK_ZONES
> -       nf_ct_extend_unregister(&nf_ct_zone_extend);
> -#endif
> +       RCU_INIT_POINTER(ip_ct_attach, NULL);
>  }
>
> -static void nf_conntrack_cleanup_net(struct net *net)
> +/*
> + * Mishearing the voices in his head, our hero wonders how he's
> + * supposed to kill the mall.
> + */
> +void nf_conntrack_cleanup_net(struct net *net)
>  {
> +       /*
> +        * This makes sure all current packets have passed through
> +        * netfilter framework.  Roll on, two-stage module
> +        * delete...
> +        */
> +       synchronize_net();
>   i_see_dead_people:
>         nf_ct_iterate_cleanup(net, kill_all, NULL);
>         nf_ct_release_dying_list(net);
> @@ -1352,6 +1357,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
>         }
>
>         nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
> +       nf_conntrack_proto_fini(net);
>         nf_conntrack_helper_fini(net);
>         nf_conntrack_timeout_fini(net);
>         nf_conntrack_ecache_fini(net);
> @@ -1363,24 +1369,15 @@ static void nf_conntrack_cleanup_net(struct net *net)
>         free_percpu(net->ct.stat);
>  }
>
> -/* Mishearing the voices in his head, our hero wonders how he's
> -   supposed to kill the mall. */
> -void nf_conntrack_cleanup(struct net *net)
> +void nf_conntrack_cleanup_end(void)
>  {
> -       if (net_eq(net, &init_net))
> -               RCU_INIT_POINTER(ip_ct_attach, NULL);
> -
> -       /* This makes sure all current packets have passed through
> -          netfilter framework.  Roll on, two-stage module
> -          delete... */
> -       synchronize_net();
> -       nf_conntrack_proto_fini(net);
> -       nf_conntrack_cleanup_net(net);
> +       RCU_INIT_POINTER(nf_ct_destroy, NULL);
> +       while (untrack_refs() > 0)
> +               schedule();
>
> -       if (net_eq(net, &init_net)) {
> -               RCU_INIT_POINTER(nf_ct_destroy, NULL);
> -               nf_conntrack_cleanup_init_net();
> -       }
> +#ifdef CONFIG_NF_CONNTRACK_ZONES
> +       nf_ct_extend_unregister(&nf_ct_zone_extend);
> +#endif
>  }
>
>  void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
> @@ -1473,7 +1470,7 @@ void nf_ct_untracked_status_or(unsigned long bits)
>  }
>  EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
>
> -static int nf_conntrack_init_init_net(void)
> +int nf_conntrack_init_start(void)
>  {
>         int max_factor = 8;
>         int ret, cpu;
> @@ -1527,7 +1524,7 @@ err_extend:
>  #define UNCONFIRMED_NULLS_VAL  ((1<<30)+0)
>  #define DYING_NULLS_VAL                ((1<<30)+1)
>
> -static int nf_conntrack_init_net(struct net *net)
> +int nf_conntrack_init_net(struct net *net)
>  {
>         int ret;
>
> @@ -1580,7 +1577,12 @@ static int nf_conntrack_init_net(struct net *net)
>         ret = nf_conntrack_helper_init(net);
>         if (ret < 0)
>                 goto err_helper;
> +       ret = nf_conntrack_proto_init(net);
> +       if (ret < 0)
> +               goto out_proto;
>         return 0;
> +out_proto:
> +       nf_conntrack_helper_fini(net);
>  err_helper:
>         nf_conntrack_timeout_fini(net);
>  err_timeout:
> @@ -1603,42 +1605,17 @@ err_stat:
>         return ret;
>  }
>
> +void nf_conntrack_init_end(void)
> +{
> +       /* For use by REJECT target */
> +       RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
> +       RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
> +
> +       /* Howto get NAT offsets */
> +       RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
> +}
> +
>  s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
>                         enum ip_conntrack_dir dir,
>                         u32 seq);
>  EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
> -
> -int nf_conntrack_init(struct net *net)
> -{
> -       int ret;
> -
> -       if (net_eq(net, &init_net)) {
> -               ret = nf_conntrack_init_init_net();
> -               if (ret < 0)
> -                       goto out_init_net;
> -       }
> -       ret = nf_conntrack_proto_init(net);
> -       if (ret < 0)
> -               goto out_proto;
> -       ret = nf_conntrack_init_net(net);
> -       if (ret < 0)
> -               goto out_net;
> -
> -       if (net_eq(net, &init_net)) {
> -               /* For use by REJECT target */
> -               RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
> -               RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
> -
> -               /* Howto get NAT offsets */
> -               RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
> -       }
> -       return 0;
> -
> -out_net:
> -       nf_conntrack_proto_fini(net);
> -out_proto:
> -       if (net_eq(net, &init_net))
> -               nf_conntrack_cleanup_init_net();
> -out_init_net:
> -       return ret;
> -}
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index 363285d..00bf93c 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -530,11 +530,11 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
>  }
>  #endif /* CONFIG_SYSCTL */
>
> -static int nf_conntrack_net_init(struct net *net)
> +static int nf_conntrack_pernet_init(struct net *net)
>  {
>         int ret;
>
> -       ret = nf_conntrack_init(net);
> +       ret = nf_conntrack_init_net(net);
>         if (ret < 0)
>                 goto out_init;
>         ret = nf_conntrack_standalone_init_proc(net);
> @@ -550,31 +550,44 @@ static int nf_conntrack_net_init(struct net *net)
>  out_sysctl:
>         nf_conntrack_standalone_fini_proc(net);
>  out_proc:
> -       nf_conntrack_cleanup(net);
> +       nf_conntrack_cleanup_net(net);
>  out_init:
>         return ret;
>  }
>
> -static void nf_conntrack_net_exit(struct net *net)
> +static void nf_conntrack_pernet_exit(struct net *net)
>  {
>         nf_conntrack_standalone_fini_sysctl(net);
>         nf_conntrack_standalone_fini_proc(net);
> -       nf_conntrack_cleanup(net);
> +       nf_conntrack_cleanup_net(net);
>  }
>
>  static struct pernet_operations nf_conntrack_net_ops = {
> -       .init = nf_conntrack_net_init,
> -       .exit = nf_conntrack_net_exit,
> +       .init = nf_conntrack_pernet_init,
> +       .exit = nf_conntrack_pernet_exit,
>  };
>
>  static int __init nf_conntrack_standalone_init(void)
>  {
> -       return register_pernet_subsys(&nf_conntrack_net_ops);
> +       int ret = nf_conntrack_init_start();
> +       if (ret < 0)
> +               goto out_start;
> +       ret = register_pernet_subsys(&nf_conntrack_net_ops);
> +       if (ret < 0)
> +               goto out_pernet;
> +       nf_conntrack_init_end();
> +       return 0;
> +out_pernet:
> +       nf_conntrack_cleanup_end();
> +out_start:
> +       return ret;
>  }
>
>  static void __exit nf_conntrack_standalone_fini(void)
>  {
> +       nf_conntrack_cleanup_start();
>         unregister_pernet_subsys(&nf_conntrack_net_ops);
> +       nf_conntrack_cleanup_end();
>  }
>
>  module_init(nf_conntrack_standalone_init);
> --
> 1.7.11.7
>

^ permalink raw reply

* Re: [PATCH 2/2] vhost: handle polling failure
From: Jason Wang @ 2012-12-28  4:29 UTC (permalink / raw)
  To: gaowanlong; +Cc: netdev, virtualization, linux-kernel, kvm, mst
In-Reply-To: <50DC1C8F.3020008@cn.fujitsu.com>

On 12/27/2012 06:01 PM, Wanlong Gao wrote:
> On 12/27/2012 02:39 PM, Jason Wang wrote:
>> > Currently, polling error were ignored in vhost. This may lead some issues (e.g
>> > kenrel crash when passing a tap fd to vhost before calling TUNSETIFF). Fix this
>> > by:
> Can this kernel crash be reproduced by hand?
>
> Thanks,
> Wanlong Gao
>
>> > 
Yes, it could be simply reproduced by: open a tap fd but does not cal
TUNSETIFF, then pass it to qemu and enable vhost.

^ permalink raw reply

* Re: [PATCH] bnx2x: use ARRAY_SIZE where possible
From: David Miller @ 2012-12-28  4:30 UTC (permalink / raw)
  To: eilong; +Cc: sasha.levin, netdev, linux-kernel
In-Reply-To: <1356252567.15507.1.camel@lb-tlvb-eilong.il.broadcom.com>

From: "Eilon Greenstein" <eilong@broadcom.com>
Date: Sun, 23 Dec 2012 10:49:27 +0200

> On Thu, 2012-12-20 at 14:11 -0500, Sasha Levin wrote:
>> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> 
> Acked-by Eilon Greenstein <eilong@broadcom.com>

Applied.

^ permalink raw reply

* net-next is OPEN
From: David Miller @ 2012-12-28  4:31 UTC (permalink / raw)
  To: netdev; +Cc: netfilter-devel, linux-wireless


The net-next tree is now open for submissions.

Thanks.

^ permalink raw reply

* Re: [PATCH 01/19] netfilter: move nf_conntrack initialize out of pernet operations
From: Eric W. Biederman @ 2012-12-28  4:48 UTC (permalink / raw)
  To: canqun zhang
  Cc: Gao feng, netfilter-devel, netdev@vger.kernel.org,
	Patrick McHardy, pablo
In-Reply-To: <CAFFEFTXT_fkF2pPSxDEEgic80NVWLqBWtFuvs6W9uDUW2aCnqw@mail.gmail.com>

canqun zhang <canqunzhang@gmail.com> writes:

> Hi all
> As discussed above,if the host machine create several linux
> containers, there will be several  net namespaces.Resources with "nf
> conntrack" are registered or unregistered on the first net
> namespace(init_net),But init_net is not unregistered lastly,so
> cleanuping other net namespaces  will triger painic.
> If net namespaces are created  with the order of 1,2,...n,they should
> be cleaned with the order of n,...2,1,so in this case init_net will be
> unregistered lastly.

No.  Network namespaces in general can be cleaned up in any order.

In particular you should never ever expect to see the order
n,n-1,n-2,...,2,1.

It may make sense to special case init_net in the cleanup order
but I would really rather not.

Now init_net is special and really should never be cleaned up
for non-modular code.  So it almost makes sense to special
case init_net.

Does anyone know why Alexy decided to do this only for init_net?

My inclination is that Gao Feng is on the rigt path by just removing
the strange init_net special case and performing the work once
per module load, and once per module unload.

> I fixed it up (see below). I have taken a lot of test!

Thank you.

It is nice to see that we have exposed this mis-assumption.

I am inclined to leave the order of this list as is so that
other assumptions of network namespace unregistration order
are exposed.

Unless there is a truly good reason to perform magic on init_net.

Eric

> diff -r 6a1a258923f5 -r 2667e89e6f50 net/core/net_namespace.c
> --- a/net/core/net_namespace.c  Fri Dec 28 11:01:17 2012 +0800
> +++ b/net/core/net_namespace.c  Fri Dec 28 11:05:12 2012 +0800
> @@ -450,7 +450,7 @@
>
>         list_del(&ops->list);
>         for_each_net(net)
> -               list_add_tail(&net->exit_list, &net_exit_list);
> +              list_add(&net->exit_list, &net_exit_list);
>         ops_exit_list(ops, &net_exit_list);
>         ops_free_list(ops, &net_exit_lis
>

^ permalink raw reply

* Re: [PATCH 1/2] vhost_net: correct error hanlding in vhost_net_set_backend()
From: Jason Wang @ 2012-12-28  4:58 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20121227130305.GE20595@redhat.com>

On 12/27/2012 09:03 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 27, 2012 at 02:39:20PM +0800, Jason Wang wrote:
>> Currently, polling error were ignored in vhost. This may lead some issues (e.g
>> kenrel crash when passing a tap fd to vhost before calling TUNSETIFF). Fix this
>> by:
>>
>> - extend the idea of vhost_net_poll_state to all vhost_polls
>> - change the state only when polling is succeed
>> - make vhost_poll_start() report errors to the caller, which could be used
>>   caller or userspace.
> Maybe it could but this patch just ignores these errors.
> And it's not clear how would userspace handle these errors.

Not all were ignored, one example is vhost_net_enable_vq(), this could
be used to let userspace know the fd were not setup correctly.
> Also, since we have a reference on the fd, it would seem
> that once poll succeeds it can't fail in the future.

Right.
>
> So two other options would make more sense to me:
> - if vhost is bound to tun without SETIFF, fail this immediately
> - if vhost is bound to tun without SETIFF, defer polling
>   until SETIFF
>
> Option 1 would seem much easier to implement, I think it's
> preferable.

Option 1 seems better, since userspace may also disable a queue in the
meantime. Will add a vq_err() and break out of the loop when fails to
start the polling.
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  drivers/vhost/net.c   |   75 +++++++++++++++++--------------------------------
>>  drivers/vhost/vhost.c |   16 +++++++++-
>>  drivers/vhost/vhost.h |   11 ++++++-
>>  3 files changed, 50 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 629d6b5..56e7f5a 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -64,20 +64,10 @@ enum {
>>  	VHOST_NET_VQ_MAX = 2,
>>  };
>>  
>> -enum vhost_net_poll_state {
>> -	VHOST_NET_POLL_DISABLED = 0,
>> -	VHOST_NET_POLL_STARTED = 1,
>> -	VHOST_NET_POLL_STOPPED = 2,
>> -};
>> -
>>  struct vhost_net {
>>  	struct vhost_dev dev;
>>  	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>>  	struct vhost_poll poll[VHOST_NET_VQ_MAX];
>> -	/* Tells us whether we are polling a socket for TX.
>> -	 * We only do this when socket buffer fills up.
>> -	 * Protected by tx vq lock. */
>> -	enum vhost_net_poll_state tx_poll_state;
>>  	/* Number of TX recently submitted.
>>  	 * Protected by tx vq lock. */
>>  	unsigned tx_packets;
>> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
>>  	}
>>  }
>>  
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_stop(struct vhost_net *net)
>> -{
>> -	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>> -		return;
>> -	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>> -	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -}
>> -
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
>> -{
>> -	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>> -		return;
>> -	vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>> -	net->tx_poll_state = VHOST_NET_POLL_STARTED;
>> -}
>> -
>>  /* In case of DMA done not in order in lower device driver for some reason.
>>   * upend_idx is used to track end of used idx, done_idx is used to track head
>>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
>> @@ -252,7 +224,7 @@ static void handle_tx(struct vhost_net *net)
>>  	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>  	if (wmem >= sock->sk->sk_sndbuf) {
>>  		mutex_lock(&vq->mutex);
>> -		tx_poll_start(net, sock);
>> +		vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>>  		mutex_unlock(&vq->mutex);
>>  		return;
>>  	}
>> @@ -261,7 +233,7 @@ static void handle_tx(struct vhost_net *net)
>>  	vhost_disable_notify(&net->dev, vq);
>>  
>>  	if (wmem < sock->sk->sk_sndbuf / 2)
>> -		tx_poll_stop(net);
>> +		vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>>  	hdr_size = vq->vhost_hlen;
>>  	zcopy = vq->ubufs;
>>  
>> @@ -283,7 +255,8 @@ static void handle_tx(struct vhost_net *net)
>>  
>>  			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
>>  			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(net->poll + VHOST_NET_VQ_TX,
>> +						 sock->file);
>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>  				break;
>>  			}
>> @@ -294,7 +267,8 @@ static void handle_tx(struct vhost_net *net)
>>  				    (vq->upend_idx - vq->done_idx) :
>>  				    (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
>>  			if (unlikely(num_pends > VHOST_MAX_PEND)) {
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(net->poll + VHOST_NET_VQ_TX,
>> +						 sock->file);
>>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>>  				break;
>>  			}
>> @@ -360,7 +334,8 @@ static void handle_tx(struct vhost_net *net)
>>  			}
>>  			vhost_discard_vq_desc(vq, 1);
>>  			if (err == -EAGAIN || err == -ENOBUFS)
>> -				tx_poll_start(net, sock);
>> +				vhost_poll_start(net->poll + VHOST_NET_VQ_TX,
>> +						 sock->file);
>>  			break;
>>  		}
>>  		if (err != len)
>> @@ -623,7 +598,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>>  
>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
>>  	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
>> -	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>>  
>>  	f->private_data = n;
>>  
>> @@ -635,27 +609,26 @@ static void vhost_net_disable_vq(struct vhost_net *n,
>>  {
>>  	if (!vq->private_data)
>>  		return;
>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>> -		tx_poll_stop(n);
>> -		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
>> -	} else
>> +	if (vq == n->vqs + VHOST_NET_VQ_TX)
>> +		vhost_poll_stop(n->poll + VHOST_NET_VQ_TX);
>> +	else
>>  		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
>>  }
>>  
>> -static void vhost_net_enable_vq(struct vhost_net *n,
>> -				struct vhost_virtqueue *vq)
>> +static int vhost_net_enable_vq(struct vhost_net *n,
>> +			       struct vhost_virtqueue *vq)
>>  {
>> +	int err, index = vq - n->vqs;
>>  	struct socket *sock;
>>  
>>  	sock = rcu_dereference_protected(vq->private_data,
>>  					 lockdep_is_held(&vq->mutex));
>>  	if (!sock)
>> -		return;
>> -	if (vq == n->vqs + VHOST_NET_VQ_TX) {
>> -		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -		tx_poll_start(n, sock);
>> -	} else
>> -		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
>> +		return 0;
>> +
>> +	n->poll[index].state = VHOST_POLL_STOPPED;
>> +	err = vhost_poll_start(n->poll + index, sock->file);
>> +	return err;
>>  }
>>  
>>  static struct socket *vhost_net_stop_vq(struct vhost_net *n,
>> @@ -831,12 +804,16 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>>  		vq->ubufs = ubufs;
>>  		vhost_net_disable_vq(n, vq);
>>  		rcu_assign_pointer(vq->private_data, sock);
>> -		vhost_net_enable_vq(n, vq);
>> +		r = vhost_net_enable_vq(n, vq);
>> +		if (r) {
>> +			sock = NULL;
>> +			goto err_enable;
>> +		}
>>  
>>  		r = vhost_init_used(vq);
>>  		if (r) {
>>  			sock = NULL;
>> -			goto err_used;
>> +			goto err_enable;
>>  		}
>>  
>>  		n->tx_packets = 0;
>> @@ -861,7 +838,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>>  	mutex_unlock(&n->dev.mutex);
>>  	return 0;
>>  
>> -err_used:
>> +err_enable:
>>  	if (oldubufs)
>>  		vhost_ubuf_put_and_wait(oldubufs);
>>  	if (oldsock)
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 34389f7..1cb2604 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -77,26 +77,36 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
>>  	init_poll_funcptr(&poll->table, vhost_poll_func);
>>  	poll->mask = mask;
>>  	poll->dev = dev;
>> +	poll->state = VHOST_POLL_DISABLED;
>>  
>>  	vhost_work_init(&poll->work, fn);
>>  }
>>  
>>  /* Start polling a file. We add ourselves to file's wait queue. The caller must
>>   * keep a reference to a file until after vhost_poll_stop is called. */
>> -void vhost_poll_start(struct vhost_poll *poll, struct file *file)
>> +int vhost_poll_start(struct vhost_poll *poll, struct file *file)
>>  {
>>  	unsigned long mask;
>> +	if (unlikely(poll->state != VHOST_POLL_STOPPED))
>> +		return 0;
>>  
>>  	mask = file->f_op->poll(file, &poll->table);
>> +	if (mask & POLLERR)
>> +		return -EINVAL;
>>  	if (mask)
>>  		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
>> +	poll->state = VHOST_POLL_STARTED;
>> +	return 0;
>>  }
>>  
> Hmm, interesting. I note that tun has this:
>
>        if (tun->dev->reg_state != NETREG_REGISTERED)
>                 mask = POLLERR;
>
> So apparently we sometimes return POLLERR when poll
> did succeed, then test below wouldn't remove
> from wqh in this case. Maybe it's a bug in tun,
> need to look into this.

Looks a bug of tun to me, looks like a POLLHUP is better here.
>
>>  /* Stop polling a file. After this function returns, it becomes safe to drop the
>>   * file reference. You must also flush afterwards. */
>>  void vhost_poll_stop(struct vhost_poll *poll)
>>  {
>> +	if (likely(poll->state != VHOST_POLL_STARTED))
>> +		return;
>>  	remove_wait_queue(poll->wqh, &poll->wait);
>> +	poll->state = VHOST_POLL_STOPPED;
>>  }
>>  
>>  static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
>> @@ -791,8 +801,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
>>  	if (filep)
>>  		fput(filep);
>>  
>> -	if (pollstart && vq->handle_kick)
>> +	if (pollstart && vq->handle_kick) {
>> +		vq->poll.state = VHOST_POLL_STOPPED;
>>  		vhost_poll_start(&vq->poll, vq->kick);
>> +	}
>>  
>>  	mutex_unlock(&vq->mutex);
>>  
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index 2639c58..98861d9 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -26,6 +26,12 @@ struct vhost_work {
>>  	unsigned		  done_seq;
>>  };
>>  
>> +enum vhost_poll_state {
>> +	VHOST_POLL_DISABLED = 0,
>> +	VHOST_POLL_STARTED = 1,
>> +	VHOST_POLL_STOPPED = 2,
>> +};
>> +
>>  /* Poll a file (eventfd or socket) */
>>  /* Note: there's nothing vhost specific about this structure. */
>>  struct vhost_poll {
>> @@ -35,6 +41,9 @@ struct vhost_poll {
>>  	struct vhost_work	  work;
>>  	unsigned long		  mask;
>>  	struct vhost_dev	 *dev;
>> +	/* Tells us whether we are polling a file.
>> +	 * Protected by tx vq lock. */
> tx vq lock does not make sense in this context.

Yes, thanks for pointing this out.
>> +	enum vhost_poll_state	  state;
>>  };
>>  
>>  void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
>> @@ -42,7 +51,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
>>  
>>  void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
>>  		     unsigned long mask, struct vhost_dev *dev);
>> -void vhost_poll_start(struct vhost_poll *poll, struct file *file);
>> +int vhost_poll_start(struct vhost_poll *poll, struct file *file);
>>  void vhost_poll_stop(struct vhost_poll *poll);
>>  void vhost_poll_flush(struct vhost_poll *poll);
>>  void vhost_poll_queue(struct vhost_poll *poll);
>> -- 
>> 1.7.1

^ permalink raw reply

* Re: [PATCH 08/14] xen: netback: Remove redundant check on unsigned variable
From: Tushar Behera @ 2012-12-28  5:15 UTC (permalink / raw)
  To: Ian Campbell
  Cc: linux-kernel@vger.kernel.org, patches@linaro.org,
	xen-devel@lists.xensource.com, netdev@vger.kernel.org
In-Reply-To: <1353057394.3499.159.camel@zakaz.uk.xensource.com>

On 11/16/2012 02:46 PM, Ian Campbell wrote:
> On Fri, 2012-11-16 at 06:50 +0000, Tushar Behera wrote:
>> No need to check whether unsigned variable is less than 0.
>>
>> CC: Ian Campbell <ian.campbell@citrix.com>
>> CC: xen-devel@lists.xensource.com
>> CC: netdev@vger.kernel.org
>> Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
> 
> Acked-by: Ian Campbell <ian.campbell@citrix.com>
> 
> Thanks.
> 

This patch was not picked up for 3.8-rc1. Any idea, who should pick this up?

>> ---
>>  drivers/net/xen-netback/netback.c |    4 ++--
>>  1 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
>> index aab8677..515e10c 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -190,14 +190,14 @@ static int get_page_ext(struct page *pg,
>>  
>>  	group = ext.e.group - 1;
>>  
>> -	if (group < 0 || group >= xen_netbk_group_nr)
>> +	if (group >= xen_netbk_group_nr)
>>  		return 0;
>>  
>>  	netbk = &xen_netbk[group];
>>  
>>  	idx = ext.e.idx;
>>  
>> -	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
>> +	if (idx >= MAX_PENDING_REQS)
>>  		return 0;
>>  
>>  	if (netbk->mmap_pages[idx] != pg)
> 
> 


-- 
Tushar Behera

^ permalink raw reply

* Re: [PATCH 10/14] atm: Removed redundant check on unsigned variable
From: Tushar Behera @ 2012-12-28  5:16 UTC (permalink / raw)
  To: linux-kernel; +Cc: patches, Chas Williams, linux-atm-general, netdev
In-Reply-To: <1353048646-10935-11-git-send-email-tushar.behera@linaro.org>

Ping.

On 11/16/2012 12:20 PM, Tushar Behera wrote:
> No need to check whether unsigned variable is less than 0.
> 
> CC: Chas Williams <chas@cmf.nrl.navy.mil>
> CC: linux-atm-general@lists.sourceforge.net
> CC: netdev@vger.kernel.org
> Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
> ---
>  drivers/atm/fore200e.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
> index 361f5ae..fdd3fe7 100644
> --- a/drivers/atm/fore200e.c
> +++ b/drivers/atm/fore200e.c
> @@ -972,7 +972,7 @@ int bsq_audit(int where, struct host_bsq* bsq, int scheme, int magn)
>  		   where, scheme, magn, buffer->index, buffer->scheme);
>  	}
>  
> -	if ((buffer->index < 0) || (buffer->index >= fore200e_rx_buf_nbr[ scheme ][ magn ])) {
> +	if (buffer->index >= fore200e_rx_buf_nbr[ scheme ][ magn ]) {
>  	    printk(FORE200E "bsq_audit(%d): queue %d.%d, out of range buffer index = %ld !\n",
>  		   where, scheme, magn, buffer->index);
>  	}
> 


-- 
Tushar Behera

^ permalink raw reply

* Re: [PATCH 1/2] vhost_net: correct error hanlding in vhost_net_set_backend()
From: Jason Wang @ 2012-12-28  5:31 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20121227131442.GF20595@redhat.com>

On 12/27/2012 09:14 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 27, 2012 at 02:39:19PM +0800, Jason Wang wrote:
>> Fix the leaking of oldubufs and fd refcnt when fail to initialized used ring.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  drivers/vhost/net.c |   14 +++++++++++---
>>  1 files changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index ebd08b2..629d6b5 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -834,8 +834,10 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>>  		vhost_net_enable_vq(n, vq);
>>  
>>  		r = vhost_init_used(vq);
>> -		if (r)
>> -			goto err_vq;
>> +		if (r) {
>> +			sock = NULL;
>> +			goto err_used;
>> +		}
>>  
>>  		n->tx_packets = 0;
>>  		n->tx_zcopy_err = 0;
>> @@ -859,8 +861,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>>  	mutex_unlock(&n->dev.mutex);
>>  	return 0;
>>  
>> +err_used:
>> +	if (oldubufs)
>> +		vhost_ubuf_put_and_wait(oldubufs);
>> +	if (oldsock)
>> +		fput(oldsock->file);
>>  err_ubufs:
>> -	fput(sock->file);
>> +	if (sock)
>> +		fput(sock->file);
>>  err_vq:
>>  	mutex_unlock(&vq->mutex);
>>  err:
> I think it's a real bug, but I don't see how the fix
> makes sense.
> We are returning an error, so we ideally
> revert to the state before the faulty
> operation. So this should put sock and ubufs,
> not oldsock/oldubufs.

Agree.
>
> The best way is probably to change
> vhost_init_used so that it gets private data
> pointer as a parameter.
>
> We can then call it before ubuf alloc.
> You can then add err_used right after err_ubufs
> with no extra logic.
>

Make more sense, thanks.
>
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox