* [PATCH 10/38] netns ct: per-netns expectations
@ 2008-08-21 22:00 adobriyan
2008-09-04 16:43 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: adobriyan @ 2008-08-21 22:00 UTC (permalink / raw)
To: kaber; +Cc: netfilter-devel, netdev, containers
Make per-netns expectation hash and expectation count.
Expectation always belongs to netns to which it's master conntrack belongs.
This is natural and allows to not bloat expectations.
Proc files and leaf users in protocol modules are stubbed to init_net,
this is temporary.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_expect.h | 20 ++++--
include/net/netns/conntrack.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 -
net/ipv4/netfilter/nf_nat_pptp.c | 2
net/netfilter/nf_conntrack_core.c | 8 +-
net/netfilter/nf_conntrack_expect.c | 53 ++++++++----------
net/netfilter/nf_conntrack_h323_main.c | 2
net/netfilter/nf_conntrack_helper.c | 2
net/netfilter/nf_conntrack_netlink.c | 12 ++--
net/netfilter/nf_conntrack_pptp.c | 4 -
net/netfilter/nf_conntrack_sip.c | 3 -
11 files changed, 61 insertions(+), 52 deletions(-)
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_EXPECT_H
#include <net/netfilter/nf_conntrack.h>
-extern struct hlist_head *nf_ct_expect_hash;
extern unsigned int nf_ct_expect_hsize;
extern unsigned int nf_ct_expect_max;
@@ -56,6 +55,15 @@ struct nf_conntrack_expect
struct rcu_head rcu;
};
+static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
+{
+#ifdef CONFIG_NET_NS
+ return exp->master->ct_net; /* by definition */
+#else
+ return &init_net;
+#endif
+}
+
struct nf_conntrack_expect_policy
{
unsigned int max_expected;
@@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy
#define NF_CT_EXPECT_PERMANENT 0x1
#define NF_CT_EXPECT_INACTIVE 0x2
-int nf_conntrack_expect_init(void);
-void nf_conntrack_expect_fini(void);
+int nf_conntrack_expect_init(struct net *net);
+void nf_conntrack_expect_fini(struct net *net);
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
void nf_ct_remove_expectations(struct nf_conn *ct);
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -7,5 +7,8 @@ struct netns_ct {
atomic_t count;
struct hlist_head *hash;
int hash_vmalloc;
+ unsigned int expect_count;
+ struct hlist_head *expect_hash;
+ int expect_vmalloc;
};
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -181,7 +181,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ n = rcu_dereference(init_net.ct.expect_hash[st->bucket].first);
if (n)
return n;
}
@@ -197,7 +197,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ head = rcu_dereference(init_net.ct.expect_hash[st->bucket].first);
}
return head;
}
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -73,7 +73,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
- other_exp = nf_ct_expect_find_get(&t);
+ other_exp = nf_ct_expect_find_get(&init_net, &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -562,7 +562,7 @@ init_conntrack(struct net *net,
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(tuple);
+ exp = nf_ct_find_expectation(net, tuple);
if (exp) {
pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
ct, exp);
@@ -1036,7 +1036,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_conntrack_htable_size);
nf_conntrack_acct_fini();
- nf_conntrack_expect_fini();
+ nf_conntrack_expect_fini(net);
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
}
@@ -1171,7 +1171,7 @@ int nf_conntrack_init(struct net *net)
if (ret < 0)
goto err_free_conntrack_slab;
- ret = nf_conntrack_expect_init();
+ ret = nf_conntrack_expect_init(net);
if (ret < 0)
goto out_fini_proto;
@@ -1201,7 +1201,7 @@ int nf_conntrack_init(struct net *net)
out_fini_helper:
nf_conntrack_helper_fini();
out_fini_expect:
- nf_conntrack_expect_fini();
+ nf_conntrack_expect_fini(net);
out_fini_proto:
nf_conntrack_proto_fini();
err_free_conntrack_slab:
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -28,17 +28,12 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct hlist_head *nf_ct_expect_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-
unsigned int nf_ct_expect_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
static unsigned int nf_ct_expect_hash_rnd __read_mostly;
-static unsigned int nf_ct_expect_count;
unsigned int nf_ct_expect_max __read_mostly;
static int nf_ct_expect_hash_rnd_initted __read_mostly;
-static int nf_ct_expect_vmalloc;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
@@ -46,12 +41,13 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
- nf_ct_expect_count--;
+ net->ct.expect_count--;
hlist_del(&exp->lnode);
master_help->expecting[exp->class]--;
@@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
struct hlist_node *n;
unsigned int h;
- if (!nf_ct_expect_count)
+ if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i;
}
@@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
rcu_read_lock();
- i = __nf_ct_expect_find(tuple);
+ i = __nf_ct_expect_find(net, tuple);
if (i && !atomic_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
struct hlist_node *n;
unsigned int h;
- if (!nf_ct_expect_count)
+ if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
exp = i;
@@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
@@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- nf_ct_expect_count++;
+ hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+ net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
@@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
int ret;
@@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
goto out;
}
h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
if (refresh_timer(i)) {
@@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
}
}
- if (nf_ct_expect_count >= nf_ct_expect_max) {
+ if (net->ct.expect_count >= nf_ct_expect_max) {
if (net_ratelimit())
printk(KERN_WARNING
"nf_conntrack: expectation table full\n");
@@ -434,7 +432,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ n = rcu_dereference(init_net.ct.expect_hash[st->bucket].first);
if (n)
return n;
}
@@ -450,7 +448,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ head = rcu_dereference(init_net.ct.expect_hash[st->bucket].first);
}
return head;
}
@@ -558,7 +556,7 @@ static void exp_proc_remove(void)
module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
-int nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(struct net *net)
{
int err = -ENOMEM;
@@ -569,9 +567,10 @@ int nf_conntrack_expect_init(void)
}
nf_ct_expect_max = nf_ct_expect_hsize * 4;
- nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &nf_ct_expect_vmalloc);
- if (nf_ct_expect_hash == NULL)
+ net->ct.expect_count = 0;
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+ &net->ct.expect_vmalloc);
+ if (net->ct.expect_hash == NULL)
goto err1;
nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
@@ -589,16 +588,16 @@ int nf_conntrack_expect_init(void)
err3:
kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
err1:
return err;
}
-void nf_conntrack_expect_fini(void)
+void nf_conntrack_expect_fini(struct net *net)
{
exp_proc_remove();
kmem_cache_destroy(nf_ct_expect_cachep);
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
}
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1218,7 +1218,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
tuple.dst.u.tcp.port = port;
tuple.dst.protonum = IPPROTO_TCP;
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (exp && exp->master == ct)
return exp;
return NULL;
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
/* Get rid of expectations */
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i], hnode) {
+ &init_net.ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((help->helper == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1467,7 +1467,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
last = (struct nf_conntrack_expect *)cb->args[1];
for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
restart:
- hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]],
+ hlist_for_each_entry(exp, n, &init_net.ct.expect_hash[cb->args[0]],
hnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
@@ -1529,7 +1529,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(&tuple);
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
if (!exp)
return -ENOENT;
@@ -1583,7 +1583,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(&tuple);
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
if (!exp)
return -ENOENT;
@@ -1613,7 +1613,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i],
+ &init_net.ct.expect_hash[i],
hnode) {
m_help = nfct_help(exp->master);
if (m_help->helper == h
@@ -1629,7 +1629,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
spin_lock_bh(&nf_conntrack_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i],
+ &init_net.ct.expect_hash[i],
hnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -1724,7 +1724,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
spin_lock_bh(&nf_conntrack_lock);
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_lock);
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
- exp_other = nf_ct_expect_find_get(&inv_t);
+ exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
if (exp_other) {
/* delete other expectation. */
pr_debug("found\n");
@@ -154,7 +154,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
nf_ct_put(sibling);
return 1;
} else {
- exp = nf_ct_expect_find_get(t);
+ exp = nf_ct_expect_find_get(&init_net, t);
if (exp) {
pr_debug("unexpect_related of expect %p\n", exp);
nf_ct_unexpect_related(exp);
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -775,7 +775,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
rcu_read_lock();
do {
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
--
1.5.6.3
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 10/38] netns ct: per-netns expectations
2008-08-21 22:00 [PATCH 10/38] netns ct: per-netns expectations adobriyan
@ 2008-09-04 16:43 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 01/33] nf_conntrack_sip: de-static helper pointers Alexey Dobriyan
` (32 more replies)
0 siblings, 33 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-04 16:43 UTC (permalink / raw)
To: adobriyan; +Cc: netfilter-devel, netdev, containers
adobriyan@gmail.com wrote:
> Make per-netns expectation hash and expectation count.
>
> Expectation always belongs to netns to which it's master conntrack belongs.
> This is natural and allows to not bloat expectations.
>
> Proc files and leaf users in protocol modules are stubbed to init_net,
> this is temporary.
This one again introduces overly long lines, please fix up
the remaining patches yourself and resend.
I'll upload my current tree to kernel.org so you can use it
as a base (will take a couple of minutes):
git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH 01/33] nf_conntrack_sip: de-static helper pointers
2008-09-04 16:43 ` Patrick McHardy
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 02/33] nf_conntrack_gre: more locking around keymap list Alexey Dobriyan
` (31 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
->help hook can run concurrently with itself, so iterating over SIP helpers
with static pointer can't work reliably.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2f9bbc0..1fa306b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1193,7 +1193,6 @@ static const struct sip_handler sip_handlers[] = {
static int process_sip_response(struct sk_buff *skb,
const char **dptr, unsigned int *datalen)
{
- static const struct sip_handler *handler;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
unsigned int matchoff, matchlen;
@@ -1214,6 +1213,8 @@ static int process_sip_response(struct sk_buff *skb,
dataoff = matchoff + matchlen + 1;
for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
handler = &sip_handlers[i];
if (handler->response == NULL)
continue;
@@ -1228,13 +1229,14 @@ static int process_sip_response(struct sk_buff *skb,
static int process_sip_request(struct sk_buff *skb,
const char **dptr, unsigned int *datalen)
{
- static const struct sip_handler *handler;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
unsigned int matchoff, matchlen;
unsigned int cseq, i;
for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 02/33] nf_conntrack_gre: more locking around keymap list
2008-09-04 16:43 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 01/33] nf_conntrack_sip: de-static helper pointers Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet Alexey Dobriyan
` (30 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
gre_keymap_list should be protected in all places.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 0e3d124..2752b74 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -97,10 +97,14 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
kmp = &help->help.ct_pptp_info.keymap[dir];
if (*kmp) {
/* check whether it's a retransmission */
+ read_lock_bh(&nf_ct_gre_lock);
list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t) && km == *kmp)
+ if (gre_key_cmpfn(km, t) && km == *kmp) {
+ read_unlock_bh(&nf_ct_gre_lock);
return 0;
+ }
}
+ read_unlock_bh(&nf_ct_gre_lock);
pr_debug("trying to override keymap_%s for ct %p\n",
dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
return -EEXIST;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet
2008-09-04 16:43 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 01/33] nf_conntrack_sip: de-static helper pointers Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 02/33] nf_conntrack_gre: more locking around keymap list Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:39 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns Alexey Dobriyan
` (29 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
It does "kfree(list_head)" which looks wrong because entity that was
allocated is definitely not list_head.
However, this all works because list_head is first item in
struct nf_ct_gre_keymap .
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 2752b74..c5a7822 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -45,12 +45,12 @@ static LIST_HEAD(gre_keymap_list);
void nf_ct_gre_keymap_flush(void)
{
- struct list_head *pos, *n;
+ struct nf_ct_gre_keymap *km, *tmp;
write_lock_bh(&nf_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- list_del(pos);
- kfree(pos);
+ list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+ list_del(&km->list);
+ kfree(km);
}
write_unlock_bh(&nf_ct_gre_lock);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns
2008-09-04 16:43 ` Patrick McHardy
` (2 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:44 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 05/33] netns ct: per-netns expectations Alexey Dobriyan
` (28 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Take netns from skb->dst->dev. It should be safe because, they are called
from LOCAL_OUT hook where dst is valid (though, I'm not exactly sure about
IPVS and queueing packets to userspace).
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f8edacd..9c54024 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,6 +12,7 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
+ struct net *net = dev_net(skb->dst->dev);
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi fl = {};
@@ -19,7 +20,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
unsigned int hh_len;
unsigned int type;
- type = inet_addr_type(&init_net, iph->saddr);
+ type = inet_addr_type(net, iph->saddr);
if (addr_type == RTN_UNSPEC)
addr_type = type;
@@ -33,7 +34,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
fl.mark = skb->mark;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
/* Drop old route. */
@@ -43,7 +44,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
fl.nl_u.ip4_u.daddr = iph->saddr;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
odst = skb->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8c6c5e7..4cb4844 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
.saddr = iph->saddr, } },
};
- dst = ip6_route_output(&init_net, skb->sk, &fl);
+ dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl);
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 05/33] netns ct: per-netns expectations
2008-09-04 16:43 ` Patrick McHardy
` (3 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:49 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 06/33] netns ct: per-netns unconfirmed list Alexey Dobriyan
` (27 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Make per-netns a) expectation hash and b) expectations count.
Expectations always belongs to netns to which it's master conntrack belong.
This is natural and doesn't bloat expectation.
Proc files and leaf users are stubbed to init_net, this is temporary.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 4c4d894..37a7fc1 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_EXPECT_H
#include <net/netfilter/nf_conntrack.h>
-extern struct hlist_head *nf_ct_expect_hash;
extern unsigned int nf_ct_expect_hsize;
extern unsigned int nf_ct_expect_max;
@@ -56,6 +55,15 @@ struct nf_conntrack_expect
struct rcu_head rcu;
};
+static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
+{
+#ifdef CONFIG_NET_NS
+ return exp->master->ct_net; /* by definition */
+#else
+ return &init_net;
+#endif
+}
+
struct nf_conntrack_expect_policy
{
unsigned int max_expected;
@@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy
#define NF_CT_EXPECT_PERMANENT 0x1
#define NF_CT_EXPECT_INACTIVE 0x2
-int nf_conntrack_expect_init(void);
-void nf_conntrack_expect_fini(void);
+int nf_conntrack_expect_init(struct net *net);
+void nf_conntrack_expect_fini(struct net *net);
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
void nf_ct_remove_expectations(struct nf_conn *ct);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index b767683..e453a33 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -5,7 +5,10 @@
struct netns_ct {
atomic_t count;
+ unsigned int expect_count;
struct hlist_head *hash;
+ struct hlist_head *expect_hash;
int hash_vmalloc;
+ int expect_vmalloc;
};
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8e0afdc..f8636a5 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -177,11 +177,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
+ struct net *net = &init_net;
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
if (n)
return n;
}
@@ -191,13 +192,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = &init_net;
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
}
return head;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index da3d91a..e4bdddc 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -73,7 +73,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
- other_exp = nf_ct_expect_find_get(&t);
+ other_exp = nf_ct_expect_find_get(&init_net, &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da56b26..c188ede 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -562,7 +562,7 @@ init_conntrack(struct net *net,
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(tuple);
+ exp = nf_ct_find_expectation(net, tuple);
if (exp) {
pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
ct, exp);
@@ -1038,7 +1038,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_conntrack_htable_size);
nf_conntrack_acct_fini();
- nf_conntrack_expect_fini();
+ nf_conntrack_expect_fini(net);
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
}
@@ -1173,7 +1173,7 @@ int nf_conntrack_init(struct net *net)
if (ret < 0)
goto err_free_conntrack_slab;
- ret = nf_conntrack_expect_init();
+ ret = nf_conntrack_expect_init(net);
if (ret < 0)
goto out_fini_proto;
@@ -1203,7 +1203,7 @@ int nf_conntrack_init(struct net *net)
out_fini_helper:
nf_conntrack_helper_fini();
out_fini_expect:
- nf_conntrack_expect_fini();
+ nf_conntrack_expect_fini(net);
out_fini_proto:
nf_conntrack_proto_fini();
err_free_conntrack_slab:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e6a79f2..5307316 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -28,17 +28,12 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct hlist_head *nf_ct_expect_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-
unsigned int nf_ct_expect_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
static unsigned int nf_ct_expect_hash_rnd __read_mostly;
-static unsigned int nf_ct_expect_count;
unsigned int nf_ct_expect_max __read_mostly;
static int nf_ct_expect_hash_rnd_initted __read_mostly;
-static int nf_ct_expect_vmalloc;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
@@ -46,12 +41,13 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
- nf_ct_expect_count--;
+ net->ct.expect_count--;
hlist_del(&exp->lnode);
master_help->expecting[exp->class]--;
@@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
struct hlist_node *n;
unsigned int h;
- if (!nf_ct_expect_count)
+ if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i;
}
@@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
rcu_read_lock();
- i = __nf_ct_expect_find(tuple);
+ i = __nf_ct_expect_find(net, tuple);
if (i && !atomic_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
struct hlist_node *n;
unsigned int h;
- if (!nf_ct_expect_count)
+ if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
exp = i;
@@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
@@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- nf_ct_expect_count++;
+ hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+ net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
@@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
int ret;
@@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
goto out;
}
h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
if (refresh_timer(i)) {
@@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
}
}
- if (nf_ct_expect_count >= nf_ct_expect_max) {
+ if (net->ct.expect_count >= nf_ct_expect_max) {
if (net_ratelimit())
printk(KERN_WARNING
"nf_conntrack: expectation table full\n");
@@ -430,11 +428,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
+ struct net *net = &init_net;
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
if (n)
return n;
}
@@ -444,13 +443,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = &init_net;
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
}
return head;
}
@@ -558,7 +558,7 @@ static void exp_proc_remove(void)
module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
-int nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(struct net *net)
{
int err = -ENOMEM;
@@ -569,9 +569,10 @@ int nf_conntrack_expect_init(void)
}
nf_ct_expect_max = nf_ct_expect_hsize * 4;
- nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &nf_ct_expect_vmalloc);
- if (nf_ct_expect_hash == NULL)
+ net->ct.expect_count = 0;
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+ &net->ct.expect_vmalloc);
+ if (net->ct.expect_hash == NULL)
goto err1;
nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
@@ -589,16 +590,16 @@ int nf_conntrack_expect_init(void)
err3:
kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
err1:
return err;
}
-void nf_conntrack_expect_fini(void)
+void nf_conntrack_expect_fini(struct net *net)
{
exp_proc_remove();
kmem_cache_destroy(nf_ct_expect_cachep);
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 5dc0478..dfb826c 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1219,7 +1219,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
tuple.dst.u.tcp.port = port;
tuple.dst.protonum = IPPROTO_TCP;
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (exp && exp->master == ct)
return exp;
return NULL;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index d91278d..c793db8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
/* Get rid of expectations */
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i], hnode) {
+ &init_net.ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((help->helper == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 918a335..cadfd15 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1458,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
static int
ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = &init_net;
struct nf_conntrack_expect *exp, *last;
struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
struct hlist_node *n;
@@ -1467,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
last = (struct nf_conntrack_expect *)cb->args[1];
for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
restart:
- hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]],
+ hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
hnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
@@ -1529,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(&tuple);
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
if (!exp)
return -ENOENT;
@@ -1583,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(&tuple);
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
if (!exp)
return -ENOENT;
@@ -1613,7 +1614,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i],
+ &init_net.ct.expect_hash[i],
hnode) {
m_help = nfct_help(exp->master);
if (m_help->helper == h
@@ -1629,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
spin_lock_bh(&nf_conntrack_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_expect_hash[i],
+ &init_net.ct.expect_hash[i],
hnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -1724,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
spin_lock_bh(&nf_conntrack_lock);
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7caf45b..5db7df5 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
- exp_other = nf_ct_expect_find_get(&inv_t);
+ exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
if (exp_other) {
/* delete other expectation. */
pr_debug("found\n");
@@ -154,7 +154,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
nf_ct_put(sibling);
return 1;
} else {
- exp = nf_ct_expect_find_get(t);
+ exp = nf_ct_expect_find_get(&init_net, t);
if (exp) {
pr_debug("unexpect_related of expect %p\n", exp);
nf_ct_unexpect_related(exp);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1fa306b..a006080 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -775,7 +775,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
rcu_read_lock();
do {
- exp = __nf_ct_expect_find(&tuple);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 06/33] netns ct: per-netns unconfirmed list
2008-09-04 16:43 ` Patrick McHardy
` (4 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 05/33] netns ct: per-netns expectations Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:50 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in() Alexey Dobriyan
` (26 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
What is confirmed connection in one netns can very well be unconfirmed
in another one.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 1c37356..b4b45c5 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
extern spinlock_t nf_conntrack_lock ;
-extern struct hlist_head unconfirmed;
#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e453a33..6ddf58e 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -1,6 +1,7 @@
#ifndef __NETNS_CONNTRACK_H
#define __NETNS_CONNTRACK_H
+#include <linux/list.h>
#include <asm/atomic.h>
struct netns_ct {
@@ -8,6 +9,7 @@ struct netns_ct {
unsigned int expect_count;
struct hlist_head *hash;
struct hlist_head *expect_hash;
+ struct hlist_head unconfirmed;
int hash_vmalloc;
int expect_vmalloc;
};
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c188ede..2a105db 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,7 +54,6 @@ struct nf_conn nf_conntrack_untracked __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
unsigned int nf_ct_log_invalid __read_mostly;
-HLIST_HEAD(unconfirmed);
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
@@ -596,7 +595,8 @@ init_conntrack(struct net *net,
}
/* Overload tuple linked list to put us in unconfirmed list. */
- hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
+ hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+ &net->ct.unconfirmed);
spin_unlock_bh(&nf_conntrack_lock);
@@ -957,7 +957,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
goto found;
}
}
- hlist_for_each_entry(h, n, &unconfirmed, hnode) {
+ hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
set_bit(IPS_DYING_BIT, &ct->status);
@@ -1154,6 +1154,7 @@ int nf_conntrack_init(struct net *net)
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
goto err_out;
}
+ INIT_HLIST_HEAD(&net->ct.unconfirmed);
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c793db8..920e778 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -156,7 +156,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
}
/* Get rid of expecteds, set helpers to NULL. */
- hlist_for_each_entry(h, n, &unconfirmed, hnode)
+ hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode)
unhelp(h, me);
for (i = 0; i < nf_conntrack_htable_size; i++) {
hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in()
2008-09-04 16:43 ` Patrick McHardy
` (5 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 06/33] netns ct: per-netns unconfirmed list Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:52 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook Alexey Dobriyan
` (25 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
It's deducible from skb->dev or skb->dst->dev, but we know netns at
the moment of call, so pass it down and use for finding and creating
conntracks.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index b4b45c5..e78afe7 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,7 +20,8 @@
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
of connection tracking. */
-extern unsigned int nf_conntrack_in(u_int8_t pf,
+extern unsigned int nf_conntrack_in(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
struct sk_buff *skb);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 03dd108..2e4dd3f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -172,7 +172,7 @@ static unsigned int ipv4_conntrack_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
}
static unsigned int ipv4_conntrack_local(unsigned int hooknum,
@@ -188,7 +188,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 85050c0..e91db16 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
return NF_STOLEN;
}
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int __ipv6_conntrack_in(struct net *net,
+ unsigned int hooknum,
+ struct sk_buff *skb,
+ int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb->nfct_reasm;
@@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
if (!reasm->nfct) {
unsigned int ret;
- ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
+ ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
if (ret != NF_ACCEPT)
return ret;
}
@@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET6, hooknum, skb);
+ return nf_conntrack_in(net, PF_INET6, hooknum, skb);
+}
+
+static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
printk("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
+ return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2a105db..5c96d97 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -611,7 +611,8 @@ init_conntrack(struct net *net,
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
static inline struct nf_conn *
-resolve_normal_ct(struct sk_buff *skb,
+resolve_normal_ct(struct net *net,
+ struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
@@ -632,10 +633,9 @@ resolve_normal_ct(struct sk_buff *skb,
}
/* look for tuple match */
- h = nf_conntrack_find_get(&init_net, &tuple);
+ h = nf_conntrack_find_get(net, &tuple);
if (!h) {
- h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb,
- dataoff);
+ h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -669,7 +669,8 @@ resolve_normal_ct(struct sk_buff *skb,
}
unsigned int
-nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)
+nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
+ struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -709,8 +710,8 @@ nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)
return -ret;
}
- ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
- &set_reply, &ctinfo);
+ ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+ l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(invalid);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook
2008-09-04 16:43 ` Patrick McHardy
` (6 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in() Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:54 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack Alexey Dobriyan
` (24 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Again, it's deducible from skb, but we're going to use it for
nf_conntrack_checksum and statistics, so just pass it from upper layer.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d4376e9..97723d3 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -50,7 +50,7 @@ struct nf_conntrack_l4proto
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
- int (*error)(struct sk_buff *skb, unsigned int dataoff,
+ int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
u_int8_t pf, unsigned int hooknum);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index daf3463..8c7ed5b 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
-icmp_error_message(struct sk_buff *skb,
+icmp_error_message(struct net *net, struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
@@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(&init_net, &innertuple);
+ h = nf_conntrack_find_get(net, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
@@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb,
/* Small and modified version of icmp_rcv */
static int
-icmp_error(struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
{
const struct icmphdr *icmph;
@@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
&& icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(skb, ctinfo, hooknum);
+ return icmp_error_message(net, skb, ctinfo, hooknum);
}
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 548cf4f..aabddfe 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -122,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
}
static int
-icmpv6_error_message(struct sk_buff *skb,
+icmpv6_error_message(struct net *net,
+ struct sk_buff *skb,
unsigned int icmp6off,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
@@ -156,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(&init_net, &intuple);
+ h = nf_conntrack_find_get(net, &intuple);
if (!h) {
pr_debug("icmpv6_error: no match\n");
return -NF_ACCEPT;
@@ -172,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb,
}
static int
-icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
+icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
{
const struct icmp6hdr *icmp6h;
@@ -197,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
- return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
+ return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum);
}
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5c96d97..251f020 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -703,11 +703,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
- if (l4proto->error != NULL &&
- (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
- NF_CT_STAT_INC_ATOMIC(error);
- NF_CT_STAT_INC_ATOMIC(invalid);
- return -ret;
+ if (l4proto->error != NULL) {
+ ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+ if (ret <= 0) {
+ NF_CT_STAT_INC_ATOMIC(error);
+ NF_CT_STAT_INC_ATOMIC(invalid);
+ return -ret;
+ }
}
ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index edc3035..6ead8da 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -545,9 +545,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
-static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
- enum ip_conntrack_info *ctinfo, u_int8_t pf,
- unsigned int hooknum)
+static int dccp_error(struct net *net, struct sk_buff *skb,
+ unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+ u_int8_t pf, unsigned int hooknum)
{
struct dccp_hdr _dh, *dh;
unsigned int dccp_len = skb->len - dataoff;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 539a820..4e71de2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -746,7 +746,8 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct sk_buff *skb,
+static int tcp_error(struct net *net,
+ struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
u_int8_t pf,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 2a965c4..8a245be 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -89,7 +89,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-static int udp_error(struct sk_buff *skb, unsigned int dataoff,
+static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
u_int8_t pf,
unsigned int hooknum)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4fb6c8d..9817019 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -89,7 +89,9 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
+static int udplite_error(struct net *net,
+ struct sk_buff *skb,
+ unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
u_int8_t pf,
unsigned int hooknum)
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack
2008-09-04 16:43 ` Patrick McHardy
` (7 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:56 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect Alexey Dobriyan
` (23 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5456e4b..02eaf87 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -40,18 +40,20 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
EXPORT_SYMBOL_GPL(print_tuple);
struct ct_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_get_first(struct seq_file *seq)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n)
return n;
}
@@ -61,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
static struct hlist_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
- head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
}
return head;
}
@@ -177,7 +180,7 @@ static const struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ct_seq_ops,
+ return seq_open_net(inode, file, &ct_seq_ops,
sizeof(struct ct_iter_state));
}
@@ -186,7 +189,7 @@ static const struct file_operations ct_file_ops = {
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
@@ -277,38 +280,38 @@ static const struct file_operations ct_cpu_seq_fops = {
.release = seq_release,
};
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
- pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops);
+ pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
if (!pde)
goto out_nf_conntrack;
- pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat,
+ pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
goto out_stat_nf_conntrack;
return 0;
out_stat_nf_conntrack:
- proc_net_remove(&init_net, "nf_conntrack");
+ proc_net_remove(net, "nf_conntrack");
out_nf_conntrack:
return -ENOMEM;
}
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
{
- remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
- proc_net_remove(&init_net, "nf_conntrack");
+ remove_proc_entry("nf_conntrack", net->proc_net_stat);
+ proc_net_remove(net, "nf_conntrack");
}
#else
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
{
return 0;
}
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
{
}
#endif /* CONFIG_PROC_FS */
@@ -442,11 +445,25 @@ static void nf_conntrack_standalone_fini_sysctl(void)
static int nf_conntrack_net_init(struct net *net)
{
- return nf_conntrack_init(net);
+ int ret;
+
+ ret = nf_conntrack_init(net);
+ if (ret < 0)
+ goto out_init;
+ ret = nf_conntrack_standalone_init_proc(net);
+ if (ret < 0)
+ goto out_proc;
+ return 0;
+
+out_proc:
+ nf_conntrack_cleanup(net);
+out_init:
+ return ret;
}
static void nf_conntrack_net_exit(struct net *net)
{
+ nf_conntrack_standalone_fini_proc(net);
nf_conntrack_cleanup(net);
}
@@ -462,17 +479,12 @@ static int __init nf_conntrack_standalone_init(void)
ret = register_pernet_subsys(&nf_conntrack_net_ops);
if (ret < 0)
goto out;
- ret = nf_conntrack_standalone_init_proc();
- if (ret < 0)
- goto out_proc;
ret = nf_conntrack_standalone_init_sysctl();
if (ret < 0)
goto out_sysctl;
return 0;
out_sysctl:
- nf_conntrack_standalone_fini_proc();
-out_proc:
unregister_pernet_subsys(&nf_conntrack_net_ops);
out:
return ret;
@@ -481,7 +493,6 @@ out:
static void __exit nf_conntrack_standalone_fini(void)
{
nf_conntrack_standalone_fini_sysctl();
- nf_conntrack_standalone_fini_proc();
unregister_pernet_subsys(&nf_conntrack_net_ops);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect
2008-09-04 16:43 ` Patrick McHardy
` (8 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:57 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect Alexey Dobriyan
` (22 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5307316..6a09200 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -423,12 +423,13 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related);
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
@@ -443,7 +444,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
@@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = {
static int exp_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &exp_seq_ops,
+ return seq_open_net(inode, file, &exp_seq_ops,
sizeof(struct ct_expect_iter_state));
}
@@ -533,26 +534,26 @@ static const struct file_operations exp_file_ops = {
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* CONFIG_PROC_FS */
-static int exp_proc_init(void)
+static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc;
- proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
+ proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
if (!proc)
return -ENOMEM;
#endif /* CONFIG_PROC_FS */
return 0;
}
-static void exp_proc_remove(void)
+static void exp_proc_remove(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_remove(&init_net, "nf_conntrack_expect");
+ proc_net_remove(net, "nf_conntrack_expect");
#endif /* CONFIG_PROC_FS */
}
@@ -581,7 +582,7 @@ int nf_conntrack_expect_init(struct net *net)
if (!nf_ct_expect_cachep)
goto err2;
- err = exp_proc_init();
+ err = exp_proc_init(net);
if (err < 0)
goto err3;
@@ -598,7 +599,7 @@ err1:
void nf_conntrack_expect_fini(struct net *net)
{
- exp_proc_remove();
+ exp_proc_remove(net);
kmem_cache_destroy(nf_ct_expect_cachep);
nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect
2008-09-04 16:43 ` Patrick McHardy
` (9 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:59 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 12/33] netns ct: export netns list Alexey Dobriyan
` (21 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f8636a5..b294083 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -21,18 +21,20 @@
#include <net/netfilter/nf_conntrack_acct.h>
struct ct_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_get_first(struct seq_file *seq)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n)
return n;
}
@@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
static struct hlist_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
- head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
}
return head;
}
@@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ct_seq_ops,
- sizeof(struct ct_iter_state));
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
}
static const struct file_operations ct_file_ops = {
@@ -167,17 +170,18 @@ static const struct file_operations ct_file_ops = {
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
/* expects */
struct ct_expect_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
@@ -192,7 +196,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
@@ -267,8 +271,8 @@ static const struct seq_operations exp_seq_ops = {
static int exp_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &exp_seq_ops,
- sizeof(struct ct_expect_iter_state));
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
}
static const struct file_operations ip_exp_file_ops = {
@@ -276,7 +280,7 @@ static const struct file_operations ip_exp_file_ops = {
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
@@ -367,36 +371,51 @@ static const struct file_operations ct_cpu_seq_fops = {
.release = seq_release,
};
-int __init nf_conntrack_ipv4_compat_init(void)
+static int __net_init ip_conntrack_net_init(struct net *net)
{
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
- proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
+ proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
if (!proc)
goto err1;
- proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
+ proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
&ip_exp_file_ops);
if (!proc_exp)
goto err2;
proc_stat = proc_create("ip_conntrack", S_IRUGO,
- init_net.proc_net_stat, &ct_cpu_seq_fops);
+ net->proc_net_stat, &ct_cpu_seq_fops);
if (!proc_stat)
goto err3;
return 0;
err3:
- proc_net_remove(&init_net, "ip_conntrack_expect");
+ proc_net_remove(net, "ip_conntrack_expect");
err2:
- proc_net_remove(&init_net, "ip_conntrack");
+ proc_net_remove(net, "ip_conntrack");
err1:
return -ENOMEM;
}
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+ remove_proc_entry("ip_conntrack", net->proc_net_stat);
+ proc_net_remove(net, "ip_conntrack_expect");
+ proc_net_remove(net, "ip_conntrack");
+}
+
+static struct pernet_operations ip_conntrack_net_ops = {
+ .init = ip_conntrack_net_init,
+ .exit = ip_conntrack_net_exit,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+ return register_pernet_subsys(&ip_conntrack_net_ops);
+}
+
void __exit nf_conntrack_ipv4_compat_fini(void)
{
- remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
- proc_net_remove(&init_net, "ip_conntrack_expect");
- proc_net_remove(&init_net, "ip_conntrack");
+ unregister_pernet_subsys(&ip_conntrack_net_ops);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 12/33] netns ct: export netns list
2008-09-04 16:43 ` Patrick McHardy
` (10 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 5:59 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 13/33] netns ct: unregister helper in every netns Alexey Dobriyan
` (20 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Conntrack code will use it for
a) removing expectations and helpers when corresponding module is removed, and
b) removing conntracks when L3 protocol conntrack module is removed.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7c52fe2..b0dc818 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list;
static DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
+EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net;
EXPORT_SYMBOL(init_net);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 13/33] netns ct: unregister helper in every netns
2008-09-04 16:43 ` Patrick McHardy
` (11 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 12/33] netns ct: export netns list Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 6:01 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister " Alexey Dobriyan
` (19 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 920e778..9c06b9f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -123,29 +123,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
+ struct net *net)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *n, *next;
unsigned int i;
- mutex_lock(&nf_ct_helper_mutex);
- hlist_del_rcu(&me->hnode);
- nf_ct_helper_count--;
- mutex_unlock(&nf_ct_helper_mutex);
-
- /* Make sure every nothing is still using the helper unless its a
- * connection in the hash.
- */
- synchronize_rcu();
-
- spin_lock_bh(&nf_conntrack_lock);
-
/* Get rid of expectations */
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &init_net.ct.expect_hash[i], hnode) {
+ &net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((help->helper == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
@@ -156,12 +145,31 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
}
/* Get rid of expecteds, set helpers to NULL. */
- hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode)
+ hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode)
unhelp(h, me);
for (i = 0; i < nf_conntrack_htable_size; i++) {
- hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
+ hlist_for_each_entry(h, n, &net->ct.hash[i], hnode)
unhelp(h, me);
}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ struct net *net;
+
+ mutex_lock(&nf_ct_helper_mutex);
+ hlist_del_rcu(&me->hnode);
+ nf_ct_helper_count--;
+ mutex_unlock(&nf_ct_helper_mutex);
+
+ /* Make sure every nothing is still using the helper unless its a
+ * connection in the hash.
+ */
+ synchronize_rcu();
+
+ spin_lock_bh(&nf_conntrack_lock);
+ for_each_net(net)
+ __nf_conntrack_helper_unregister(me, net);
spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister in every netns
2008-09-04 16:43 ` Patrick McHardy
` (12 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 13/33] netns ct: unregister helper in every netns Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 6:03 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb Alexey Dobriyan
` (18 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 3a2f7ef..a59a307 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -207,6 +207,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
+ struct net *net;
+
BUG_ON(proto->l3proto >= AF_MAX);
mutex_lock(&nf_ct_proto_mutex);
@@ -219,7 +221,8 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto);
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
@@ -316,6 +319,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{
+ struct net *net;
+
BUG_ON(l4proto->l3proto >= PF_MAX);
mutex_lock(&nf_ct_proto_mutex);
@@ -328,7 +333,8 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto);
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb
2008-09-04 16:43 ` Patrick McHardy
` (13 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister " Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 6:04 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 16/33] netns ct: per-netns event cache Alexey Dobriyan
` (17 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
This is cleaner, we already know conntrack to which event is relevant.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index f0b9078..c1b406c 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct);
extern void nf_ct_event_cache_flush(void);
static inline void
-nf_conntrack_event_cache(enum ip_conntrack_events event,
- const struct sk_buff *skb)
+nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
- struct nf_conn *ct = (struct nf_conn *)skb->nfct;
struct nf_conntrack_ecache *ecache;
local_bh_disable();
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 8c7ed5b..205ba39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
}
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 11976ea..7eed1fb 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -192,7 +192,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
ct, CTINFO2DIR(ctinfo));
- nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
+ nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
}
return 1;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index aabddfe..df04de9 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -93,7 +93,7 @@ static int icmpv6_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 251f020..01f59c5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -370,14 +370,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
spin_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
- nf_conntrack_event_cache(IPCT_HELPER, skb);
+ nf_conntrack_event_cache(IPCT_HELPER, ct);
#ifdef CONFIG_NF_NAT_NEEDED
if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, skb);
+ nf_conntrack_event_cache(IPCT_NATINFO, ct);
#endif
nf_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, skb);
+ IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT;
out:
@@ -740,7 +740,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
}
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
return ret;
}
@@ -853,7 +853,7 @@ acct:
/* must be unlocked when calling event cache */
if (event)
- nf_conntrack_event_cache(event, skb);
+ nf_conntrack_event_cache(event, ct);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index bb20672..4f71071 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
}
/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
+static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
+ struct nf_ct_ftp_master *info, int dir,
struct sk_buff *skb)
{
unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
} else if (oldest != NUM_SEQ_TO_REMEMBER &&
after(nl_seq, info->seq_aft_nl[dir][oldest])) {
info->seq_aft_nl[dir][oldest] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
}
}
@@ -509,7 +510,7 @@ out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info, dir, skb);
+ update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c5a7822..5b1273a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -229,7 +229,7 @@ static int gre_packet(struct nf_conn *ct,
ct->proto.gre.stream_timeout);
/* Also, more likely to be important, and not a probe. */
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.timeout);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b5a9059..ae8c260 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct,
ct->proto.sctp.state = new_state;
if (old_state != new_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
}
write_unlock_bh(&sctp_lock);
@@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct,
new_state == SCTP_CONNTRACK_ESTABLISHED) {
pr_debug("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
}
return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4e71de2..b5d62d6 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -969,9 +969,9 @@ static int tcp_packet(struct nf_conn *ct,
timeout = tcp_timeouts[new_state];
write_unlock_bh(&tcp_lock);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
if (new_state != old_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
/* If only reply is a RST, we can consider ourselves not to
@@ -990,7 +990,7 @@ static int tcp_packet(struct nf_conn *ct,
after SYN_RECV or a valid answer for a picked up
connection. */
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
}
nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8a245be..e0ee89e 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 9817019..c5b77c8 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
nf_ct_udplite_timeout_stream);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, skb);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index e72e5d0..e1415c3 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -54,7 +54,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
if (newmark != ct->mark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, skb);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_SAVE:
@@ -62,7 +62,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
(skb->mark & markinfo->mask);
if (ct->mark != newmark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, skb);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
@@ -95,7 +95,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
if (ct->mark != newmark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, skb);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_SAVE:
@@ -103,7 +103,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
(skb->mark & info->nfmask);
if (ct->mark != newmark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, skb);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index ae939e5..5f221c3 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb)
ct = nf_ct_get(skb, &ctinfo);
if (ct && !ct->secmark) {
ct->secmark = skb->secmark;
- nf_conntrack_event_cache(IPCT_SECMARK, skb);
+ nf_conntrack_event_cache(IPCT_SECMARK, ct);
}
}
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 16/33] netns ct: per-netns event cache
2008-09-04 16:43 ` Patrick McHardy
` (14 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
[not found] ` <1220842990-30500-16-git-send-email-adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
` (16 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Heh, last minute proof-reading of this patch made me think,
that this is actually unneeded, simply because "ct" pointers will be
different for different conntracks in different netns, just like they
are different in one netns.
Not so sure anymore.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index c1b406c..35f814c 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -8,6 +8,7 @@
#include <linux/notifier.h>
#include <linux/interrupt.h>
+#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
#ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -15,9 +16,6 @@ struct nf_conntrack_ecache {
struct nf_conn *ct;
unsigned int events;
};
-DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-
-#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x)
extern struct atomic_notifier_head nf_conntrack_chain;
extern int nf_conntrack_register_notifier(struct notifier_block *nb);
@@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
-extern void nf_ct_event_cache_flush(void);
+extern void nf_ct_event_cache_flush(struct net *net);
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
local_bh_disable();
- ecache = &__get_cpu_var(nf_conntrack_ecache);
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
if (ct != ecache->ct)
__nf_ct_event_cache_init(ct);
ecache->events |= event;
@@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event,
atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
}
+extern int nf_conntrack_ecache_init(struct net *net);
+extern void nf_conntrack_ecache_fini(struct net *net);
+
#else /* CONFIG_NF_CONNTRACK_EVENTS */
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
@@ -67,7 +69,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp) {}
-static inline void nf_ct_event_cache_flush(void) {}
+static inline void nf_ct_event_cache_flush(struct net *net) {}
+
+static inline int nf_conntrack_ecache_init(struct net *net)
+{
+ return 0;
+
+static inline void nf_conntrack_ecache_fini(struct net *net)
+{
+}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
#endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 6ddf58e..9d5c162 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,12 +4,17 @@
#include <linux/list.h>
#include <asm/atomic.h>
+struct nf_conntrack_ecache;
+
struct netns_ct {
atomic_t count;
unsigned int expect_count;
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct nf_conntrack_ecache *ecache;
+#endif
int hash_vmalloc;
int expect_vmalloc;
};
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01f59c5..b55944e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1023,7 +1023,8 @@ void nf_conntrack_cleanup(struct net *net)
delete... */
synchronize_net();
- nf_ct_event_cache_flush();
+ nf_ct_event_cache_flush(net);
+ nf_conntrack_ecache_fini(net);
i_see_dead_people:
nf_conntrack_flush(net);
if (atomic_read(&net->ct.count) != 0) {
@@ -1151,11 +1152,14 @@ int nf_conntrack_init(struct net *net)
max_factor = 4;
}
atomic_set(&net->ct.count, 0);
+ ret = nf_conntrack_ecache_init(net);
+ if (ret < 0)
+ goto err_ecache;
net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
&net->ct.hash_vmalloc);
if (!net->ct.hash) {
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
- goto err_out;
+ goto err_hash;
}
INIT_HLIST_HEAD(&net->ct.unconfirmed);
@@ -1215,6 +1219,8 @@ err_free_conntrack_slab:
err_free_hash:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
-err_out:
+err_hash:
+ nf_conntrack_ecache_fini(net);
+err_ecache:
return -ENOMEM;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 83c41ac..a5f5e2e 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain);
ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
-DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
-
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
static inline void
@@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
* by code prior to async packet handling for freeing the skb */
void nf_ct_deliver_cached_events(const struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
local_bh_disable();
- ecache = &__get_cpu_var(nf_conntrack_ecache);
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
if (ecache->ct == ct)
__nf_ct_deliver_cached_events(ecache);
local_bh_enable();
@@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
/* Deliver cached events for old pending events, if current conntrack != old */
void __nf_ct_event_cache_init(struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
/* take care of delivering potentially old events */
- ecache = &__get_cpu_var(nf_conntrack_ecache);
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
BUG_ON(ecache->ct == ct);
if (ecache->ct)
__nf_ct_deliver_cached_events(ecache);
@@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
/* flush the event cache - touches other CPU's data and must not be called
* while packets are still passing through the code */
-void nf_ct_event_cache_flush(void)
+void nf_ct_event_cache_flush(struct net *net)
{
struct nf_conntrack_ecache *ecache;
int cpu;
for_each_possible_cpu(cpu) {
- ecache = &per_cpu(nf_conntrack_ecache, cpu);
+ ecache = per_cpu_ptr(net->ct.ecache, cpu);
if (ecache->ct)
nf_ct_put(ecache->ct);
}
}
+int nf_conntrack_ecache_init(struct net *net)
+{
+ net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
+ if (!net->ct.ecache)
+ return -ENOMEM;
+ return 0;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+ free_percpu(net->ct.ecache);
+}
+
int nf_conntrack_register_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 17/33] netns ct: final init_net tweaks
2008-09-04 16:43 ` Patrick McHardy
` (15 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 16/33] netns ct: per-netns event cache Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-09 7:20 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
` (15 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Add checks for init_net to not create kmem caches twice and so on.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b55944e..52d0663 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1016,7 +1016,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
supposed to kill the mall. */
void nf_conntrack_cleanup(struct net *net)
{
- rcu_assign_pointer(ip_ct_attach, NULL);
+ if (net_eq(net, &init_net))
+ rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
@@ -1035,16 +1036,21 @@ void nf_conntrack_cleanup(struct net *net)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
- rcu_assign_pointer(nf_ct_destroy, NULL);
+ if (net_eq(net, &init_net)) {
+ rcu_assign_pointer(nf_ct_destroy, NULL);
- kmem_cache_destroy(nf_conntrack_cachep);
+ kmem_cache_destroy(nf_conntrack_cachep);
+ }
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
- nf_conntrack_acct_fini();
+ if (net_eq(net, &init_net))
+ nf_conntrack_acct_fini();
nf_conntrack_expect_fini(net);
- nf_conntrack_helper_fini();
- nf_conntrack_proto_fini();
+ if (net_eq(net, &init_net)) {
+ nf_conntrack_helper_fini();
+ nf_conntrack_proto_fini();
+ }
}
struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1134,22 +1140,28 @@ int nf_conntrack_init(struct net *net)
int max_factor = 8;
int ret;
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
- * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
- if (!nf_conntrack_htable_size) {
- nf_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct hlist_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- nf_conntrack_htable_size = 16384;
- if (nf_conntrack_htable_size < 32)
- nf_conntrack_htable_size = 32;
-
- /* Use a max. factor of four by default to get the same max as
- * with the old struct list_heads. When a table size is given
- * we use the old value of 8 to avoid reducing the max.
- * entries. */
- max_factor = 4;
+ if (net_eq(net, &init_net)) {
+ /*
+ * Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
+ * machine has 512 buckets. >= 1GB machines have 16384 buckets.
+ */
+ if (!nf_conntrack_htable_size) {
+ nf_conntrack_htable_size
+ = (((num_physpages << PAGE_SHIFT) / 16384)
+ / sizeof(struct hlist_head));
+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ nf_conntrack_htable_size = 16384;
+ if (nf_conntrack_htable_size < 32)
+ nf_conntrack_htable_size = 32;
+
+ /*
+ * Use a max. factor of four by default to get the same
+ * max as with the old struct list_heads. When a table
+ * size is given we use the old value of 8 to avoid
+ * reducing the max. entries.
+ */
+ max_factor = 4;
+ }
}
atomic_set(&net->ct.count, 0);
ret = nf_conntrack_ecache_init(net);
@@ -1163,59 +1175,66 @@ int nf_conntrack_init(struct net *net)
}
INIT_HLIST_HEAD(&net->ct.unconfirmed);
- nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+ if (net_eq(net, &init_net)) {
+ nf_conntrack_max = max_factor * nf_conntrack_htable_size;
- printk("nf_conntrack version %s (%u buckets, %d max)\n",
- NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
- nf_conntrack_max);
+ printk("nf_conntrack version %s (%u buckets, %d max)\n",
+ NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
+ nf_conntrack_max);
- nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+ nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn),
0, 0, NULL);
- if (!nf_conntrack_cachep) {
- printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- goto err_free_hash;
- }
+ if (!nf_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+ goto err_free_hash;
+ }
- ret = nf_conntrack_proto_init();
- if (ret < 0)
- goto err_free_conntrack_slab;
+ ret = nf_conntrack_proto_init();
+ if (ret < 0)
+ goto err_free_conntrack_slab;
+ }
ret = nf_conntrack_expect_init(net);
if (ret < 0)
goto out_fini_proto;
- ret = nf_conntrack_helper_init();
- if (ret < 0)
- goto out_fini_expect;
+ if (net_eq(net, &init_net)) {
+ ret = nf_conntrack_helper_init();
+ if (ret < 0)
+ goto out_fini_expect;
- ret = nf_conntrack_acct_init();
- if (ret < 0)
- goto out_fini_helper;
+ ret = nf_conntrack_acct_init();
+ if (ret < 0)
+ goto out_fini_helper;
- /* For use by REJECT target */
- rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
- rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+ /* For use by REJECT target */
+ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
+ rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
- /* Set up fake conntrack:
- - to never be deleted, not in any hashes */
+ /* Set up fake conntrack:
+ - to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
- nf_conntrack_untracked.ct_net = &init_net;
+ nf_conntrack_untracked.ct_net = &init_net;
#endif
- atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
- /* - and look it like as a confirmed connection */
- set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+ atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+ /* - and look it like as a confirmed connection */
+ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+ }
return ret;
out_fini_helper:
- nf_conntrack_helper_fini();
+ if (net_eq(net, &init_net))
+ nf_conntrack_helper_fini();
out_fini_expect:
nf_conntrack_expect_fini(net);
out_fini_proto:
- nf_conntrack_proto_fini();
+ if (net_eq(net, &init_net))
+ nf_conntrack_proto_fini();
err_free_conntrack_slab:
- kmem_cache_destroy(nf_conntrack_cachep);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_conntrack_cachep);
err_free_hash:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 6a09200..be08c87 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -563,12 +563,14 @@ int nf_conntrack_expect_init(struct net *net)
{
int err = -ENOMEM;
- if (!nf_ct_expect_hsize) {
- nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
- if (!nf_ct_expect_hsize)
- nf_ct_expect_hsize = 1;
+ if (net_eq(net, &init_net)) {
+ if (!nf_ct_expect_hsize) {
+ nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+ if (!nf_ct_expect_hsize)
+ nf_ct_expect_hsize = 1;
+ }
+ nf_ct_expect_max = nf_ct_expect_hsize * 4;
}
- nf_ct_expect_max = nf_ct_expect_hsize * 4;
net->ct.expect_count = 0;
net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
@@ -576,11 +578,13 @@ int nf_conntrack_expect_init(struct net *net)
if (net->ct.expect_hash == NULL)
goto err1;
- nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+ if (net_eq(net, &init_net)) {
+ nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
sizeof(struct nf_conntrack_expect),
0, 0, NULL);
- if (!nf_ct_expect_cachep)
- goto err2;
+ if (!nf_ct_expect_cachep)
+ goto err2;
+ }
err = exp_proc_init(net);
if (err < 0)
@@ -589,7 +593,8 @@ int nf_conntrack_expect_init(struct net *net)
return 0;
err3:
- kmem_cache_destroy(nf_ct_expect_cachep);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
err2:
nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
@@ -600,7 +605,8 @@ err1:
void nf_conntrack_expect_fini(struct net *net)
{
exp_proc_remove(net);
- kmem_cache_destroy(nf_ct_expect_cachep);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 17/33] netns ct: final init_net tweaks
2008-09-04 16:43 ` Patrick McHardy
` (16 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 19/33] netns ct: per-netns /proc/net/stat/nf_conntrack, /proc/net/stat/ip_conntrack Alexey Dobriyan
` (14 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
For now just counted separatedly, not shown.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f5447f1..c955610 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size;
extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
-#define NF_CT_STAT_INC_ATOMIC(count) \
+#define NF_CT_STAT_INC(net, count) \
+ (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+#define NF_CT_STAT_INC_ATOMIC(net, count) \
do { \
local_bh_disable(); \
- __get_cpu_var(nf_conntrack_stat).count++; \
+ per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \
local_bh_enable(); \
} while (0)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9d5c162..fc0a46d 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -12,6 +12,7 @@ struct netns_ct {
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+ struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index b294083..fdc85b3 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 52d0663..342a1f3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,9 +56,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
unsigned int nf_ct_log_invalid __read_mostly;
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
-
static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
@@ -203,7 +200,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
}
- NF_CT_STAT_INC(delete);
+ NF_CT_STAT_INC(nf_ct_net(ct), delete);
spin_unlock_bh(&nf_conntrack_lock);
if (ct->master)
@@ -230,7 +227,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
spin_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
- NF_CT_STAT_INC(delete_list);
+ NF_CT_STAT_INC(nf_ct_net(ct), delete_list);
clean_from_lists(ct);
spin_unlock_bh(&nf_conntrack_lock);
nf_ct_put(ct);
@@ -249,11 +246,11 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
local_bh_disable();
hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
if (nf_ct_tuple_equal(tuple, &h->tuple)) {
- NF_CT_STAT_INC(found);
+ NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
}
- NF_CT_STAT_INC(searched);
+ NF_CT_STAT_INC(net, searched);
}
local_bh_enable();
@@ -366,7 +363,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
- NF_CT_STAT_INC(insert);
+ NF_CT_STAT_INC(net, insert);
spin_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
@@ -381,7 +378,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- NF_CT_STAT_INC(insert_failed);
+ NF_CT_STAT_INC(net, insert_failed);
spin_unlock_bh(&nf_conntrack_lock);
return NF_DROP;
}
@@ -405,11 +402,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple)) {
- NF_CT_STAT_INC(found);
+ NF_CT_STAT_INC(net, found);
rcu_read_unlock_bh();
return 1;
}
- NF_CT_STAT_INC(searched);
+ NF_CT_STAT_INC(net, searched);
}
rcu_read_unlock_bh();
@@ -454,7 +451,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
dropped = 1;
- NF_CT_STAT_INC_ATOMIC(early_drop);
+ NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
nf_ct_put(ct);
return dropped;
@@ -581,7 +578,7 @@ init_conntrack(struct net *net,
ct->secmark = exp->master->secmark;
#endif
nf_conntrack_get(&ct->master->ct_general);
- NF_CT_STAT_INC(expect_new);
+ NF_CT_STAT_INC(net, expect_new);
} else {
struct nf_conntrack_helper *helper;
@@ -591,7 +588,7 @@ init_conntrack(struct net *net,
if (help)
rcu_assign_pointer(help->helper, helper);
}
- NF_CT_STAT_INC(new);
+ NF_CT_STAT_INC(net, new);
}
/* Overload tuple linked list to put us in unconfirmed list. */
@@ -683,7 +680,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if (skb->nfct) {
- NF_CT_STAT_INC_ATOMIC(ignore);
+ NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
@@ -693,8 +690,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
- NF_CT_STAT_INC_ATOMIC(error);
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
@@ -706,8 +703,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
if (l4proto->error != NULL) {
ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
if (ret <= 0) {
- NF_CT_STAT_INC_ATOMIC(error);
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
}
@@ -716,13 +713,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return NF_ACCEPT;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
- NF_CT_STAT_INC_ATOMIC(drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return NF_DROP;
}
@@ -735,7 +732,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
@@ -1047,6 +1044,7 @@ void nf_conntrack_cleanup(struct net *net)
if (net_eq(net, &init_net))
nf_conntrack_acct_fini();
nf_conntrack_expect_fini(net);
+ free_percpu(net->ct.stat);
if (net_eq(net, &init_net)) {
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
@@ -1164,6 +1162,9 @@ int nf_conntrack_init(struct net *net)
}
}
atomic_set(&net->ct.count, 0);
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat)
+ goto err_stat;
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
@@ -1241,5 +1242,7 @@ err_free_hash:
err_hash:
nf_conntrack_ecache_fini(net);
err_ecache:
+ free_percpu(net->ct.stat);
+err_stat:
return -ENOMEM;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index be08c87..37a703b 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -53,7 +53,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
master_help->expecting[exp->class]--;
nf_ct_expect_put(exp);
- NF_CT_STAT_INC(expect_delete);
+ NF_CT_STAT_INC(net, expect_delete);
}
EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
@@ -326,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
add_timer(&exp->timeout);
atomic_inc(&exp->use);
- NF_CT_STAT_INC(expect_create);
+ NF_CT_STAT_INC(net, expect_create);
}
/* Race with expectations being used means we could have none to find; OK. */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 02eaf87..a4fdbbf 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -203,7 +203,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -217,7 +217,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 19/33] netns ct: per-netns /proc/net/stat/nf_conntrack, /proc/net/stat/ip_conntrack
2008-09-04 16:43 ` Patrick McHardy
` (17 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 20/33] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
` (13 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Show correct conntrack count while I'm at it.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index fdc85b3..313ebf0 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -285,6 +285,7 @@ static const struct file_operations ip_exp_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -294,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -302,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -320,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -360,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ct_cpu_seq_fops = {
@@ -368,7 +372,7 @@ static const struct file_operations ct_cpu_seq_fops = {
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
static int __net_init ip_conntrack_net_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index a4fdbbf..169760d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -194,6 +194,7 @@ static const struct file_operations ct_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -203,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -211,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -229,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -269,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ct_cpu_seq_fops = {
@@ -277,7 +281,7 @@ static const struct file_operations ct_cpu_seq_fops = {
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
static int nf_conntrack_standalone_init_proc(struct net *net)
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 20/33] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl
2008-09-04 16:43 ` Patrick McHardy
` (18 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 19/33] netns ct: per-netns /proc/net/stat/nf_conntrack, /proc/net/stat/ip_conntrack Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 21/33] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
` (12 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Note, sysctl table is always duplicated, this is simpler, less special-cased
less mistakes (and I did one in first version of this patch)
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index fc0a46d..2b50758 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <asm/atomic.h>
+struct ctl_table_header;
struct nf_conntrack_ecache;
struct netns_ct {
@@ -16,6 +17,9 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
int hash_vmalloc;
int expect_vmalloc;
};
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 169760d..64b4f95 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static struct ctl_table_header *nf_ct_sysctl_header;
static struct ctl_table_header *nf_ct_netfilter_header;
static ctl_table nf_ct_sysctl_table[] = {
@@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = {
EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
- nf_ct_netfilter_header =
- register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
- if (!nf_ct_netfilter_header)
- goto out;
-
- nf_ct_sysctl_header =
- register_sysctl_paths(nf_net_netfilter_sysctl_path,
- nf_ct_sysctl_table);
- if (!nf_ct_sysctl_header)
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_netfilter_header =
+ register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+ if (!nf_ct_netfilter_header)
+ goto out;
+ }
+
+ table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out_kmemdup;
+
+ table[1].data = &net->ct.count;
+
+ net->ct.sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.sysctl_header)
goto out_unregister_netfilter;
return 0;
out_unregister_netfilter:
- unregister_sysctl_table(nf_ct_netfilter_header);
+ kfree(table);
+out_kmemdup:
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
out:
printk("nf_conntrack: can't register to sysctl.\n");
return -ENOMEM;
}
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
- unregister_sysctl_table(nf_ct_netfilter_header);
- unregister_sysctl_table(nf_ct_sysctl_header);
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
+ table = net->ct.sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.sysctl_header);
+ kfree(table);
}
#else
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
return 0;
}
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
}
#endif /* CONFIG_SYSCTL */
@@ -457,8 +474,13 @@ static int nf_conntrack_net_init(struct net *net)
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
+ ret = nf_conntrack_standalone_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
return 0;
+out_sysctl:
+ nf_conntrack_standalone_fini_proc(net);
out_proc:
nf_conntrack_cleanup(net);
out_init:
@@ -467,6 +489,7 @@ out_init:
static void nf_conntrack_net_exit(struct net *net)
{
+ nf_conntrack_standalone_fini_sysctl(net);
nf_conntrack_standalone_fini_proc(net);
nf_conntrack_cleanup(net);
}
@@ -478,25 +501,11 @@ static struct pernet_operations nf_conntrack_net_ops = {
static int __init nf_conntrack_standalone_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&nf_conntrack_net_ops);
- if (ret < 0)
- goto out;
- ret = nf_conntrack_standalone_init_sysctl();
- if (ret < 0)
- goto out_sysctl;
- return 0;
-
-out_sysctl:
- unregister_pernet_subsys(&nf_conntrack_net_ops);
-out:
- return ret;
+ return register_pernet_subsys(&nf_conntrack_net_ops);
}
static void __exit nf_conntrack_standalone_fini(void)
{
- nf_conntrack_standalone_fini_sysctl();
unregister_pernet_subsys(&nf_conntrack_net_ops);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 21/33] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl
2008-09-04 16:43 ` Patrick McHardy
` (19 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 20/33] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 22/33] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
` (11 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c955610..b76a868 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
-extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
#define NF_CT_STAT_INC(net, count) \
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 2b50758..38b6dae 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,6 +17,7 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_checksum;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e4dd3f..75871b1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
.procname = "ip_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 205ba39..ace66cb 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
}
/* See ip_conntrack_proto_tcp.c */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index df04de9..fa12e57 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -187,7 +187,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
return -NF_ACCEPT;
}
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed\n");
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6ead8da..769680e 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -575,7 +575,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
}
}
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
pf)) {
msg = "nf_ct_dccp: bad checksum ";
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5d62d6..131c9be 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -780,7 +780,7 @@ static int tcp_error(struct net *net,
* because the checksum is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index e0ee89e..3d3fffe 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -123,7 +123,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* We skip checking packets on the outgoing path
* because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5b77c8..3d1697c 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -129,7 +129,7 @@ static int udplite_error(struct net *net,
}
/* Checksum invalid? Ignore. */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
if (LOG_INVALID(IPPROTO_UDPLITE))
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 64b4f95..5cd0663 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -322,9 +322,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
/* Sysctl support */
-int nf_conntrack_checksum __read_mostly = 1;
-EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
-
#ifdef CONFIG_SYSCTL
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
@@ -360,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_CHECKSUM,
.procname = "nf_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -425,6 +422,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
goto out_kmemdup;
table[1].data = &net->ct.count;
+ table[3].data = &net->ct.sysctl_checksum;
net->ct.sysctl_header = register_net_sysctl_table(net,
nf_net_netfilter_sysctl_path, table);
@@ -474,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
+ net->ct.sysctl_checksum = 1;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 22/33] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl
2008-09-04 16:43 ` Patrick McHardy
` (20 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 21/33] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
@ 2008-09-08 3:02 ` Alexey Dobriyan
2008-09-08 3:03 ` Alexey Dobriyan
` (10 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:02 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 97723d3..7f2f43c 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
extern const struct nla_policy nf_ct_port_nla_policy[];
-/* Log invalid packets */
-extern unsigned int nf_ct_log_invalid;
-
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
- (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#define LOG_INVALID(net, proto) \
+ ((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
-#define LOG_INVALID(proto) \
- ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+#define LOG_INVALID(net, proto) \
+ (((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
-#define LOG_INVALID(proto) 0
+#define LOG_INVALID(net, proto) 0
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 38b6dae..503e375 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,7 @@ struct netns_ct {
struct nf_conntrack_ecache *ecache;
#endif
int sysctl_checksum;
+ unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 75871b1..af69acc 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
.procname = "ip_conntrack_log_invalid",
- .data = &nf_ct_log_invalid,
+ .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index ace66cb..4e88792 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -181,7 +181,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: short packet ");
return -NF_ACCEPT;
@@ -190,7 +190,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
@@ -203,7 +203,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fa12e57..0572617 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -181,7 +181,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- if (LOG_INVALID(IPPROTO_ICMPV6))
+ if (LOG_INVALID(net, IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 342a1f3..3d9a1d0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
struct nf_conn nf_conntrack_untracked __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
-unsigned int nf_ct_log_invalid __read_mostly;
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
static int nf_conntrack_hash_rnd_initted;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 769680e..8fcf176 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
+ struct net *net = nf_ct_net(ct);
struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
@@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
return false;
}
@@ -463,6 +464,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
u_int8_t pf, unsigned int hooknum)
{
+ struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
@@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
write_unlock_bh(&dccp_lock);
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
write_unlock_bh(&dccp_lock);
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
return -NF_ACCEPT;
@@ -590,7 +592,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 131c9be..f947ec4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -488,6 +488,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
const struct tcphdr *tcph,
u_int8_t pf)
{
+ struct net *net = nf_ct_net(ct);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
nf_ct_tcp_be_liberal)
res = true;
- if (!res && LOG_INVALID(IPPROTO_TCP))
+ if (!res && LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: %s ",
before(seq, sender->td_maxend + 1) ?
@@ -761,7 +762,7 @@ static int tcp_error(struct net *net,
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: short packet ");
return -NF_ACCEPT;
@@ -769,7 +770,7 @@ static int tcp_error(struct net *net,
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: truncated/malformed packet ");
return -NF_ACCEPT;
@@ -782,7 +783,7 @@ static int tcp_error(struct net *net,
/* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: bad TCP checksum ");
return -NF_ACCEPT;
@@ -791,7 +792,7 @@ static int tcp_error(struct net *net,
/* Check TCP flags. */
tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid TCP flag combination ");
return -NF_ACCEPT;
@@ -808,6 +809,7 @@ static int tcp_packet(struct nf_conn *ct,
u_int8_t pf,
unsigned int hooknum)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
enum ip_conntrack_dir dir;
@@ -886,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct,
* thus initiate a clean new session.
*/
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: killing out of sync session ");
nf_ct_kill(ct);
@@ -899,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct,
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored ");
return NF_ACCEPT;
@@ -908,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3d3fffe..7c2ca48 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -101,7 +101,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: short packet ");
return -NF_ACCEPT;
@@ -109,7 +109,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
@@ -125,7 +125,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 3d1697c..d22d839 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -104,7 +104,7 @@ static int udplite_error(struct net *net,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: short packet ");
return -NF_ACCEPT;
@@ -114,7 +114,7 @@ static int udplite_error(struct net *net,
if (cscov == 0)
cscov = udplen;
else if (cscov < sizeof(*hdr) || cscov > udplen) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: invalid checksum coverage ");
return -NF_ACCEPT;
@@ -122,7 +122,7 @@ static int udplite_error(struct net *net,
/* UDPLITE mandates checksums */
if (!hdr->check) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: checksum missing ");
return -NF_ACCEPT;
@@ -132,7 +132,7 @@ static int udplite_error(struct net *net,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: bad UDPLite checksum ");
return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5cd0663..98106d4 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -365,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
.procname = "nf_conntrack_log_invalid",
- .data = &nf_ct_log_invalid,
+ .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
@@ -403,8 +403,6 @@ static struct ctl_path nf_ct_path[] = {
{ }
};
-EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-
static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
struct ctl_table *table;
@@ -423,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[1].data = &net->ct.count;
table[3].data = &net->ct.sysctl_checksum;
+ table[4].data = &net->ct.sysctl_log_invalid;
net->ct.sysctl_header = register_net_sysctl_table(net,
nf_net_netfilter_sysctl_path, table);
@@ -473,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
if (ret < 0)
goto out_proc;
net->ct.sysctl_checksum = 1;
+ net->ct.sysctl_log_invalid = 0;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 22/33] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl
2008-09-04 16:43 ` Patrick McHardy
` (21 preceding siblings ...)
2008-09-08 3:02 ` [PATCH 22/33] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-09-08 3:03 ` [PATCH 24/33] netns ct: SIP conntracking in netns Alexey Dobriyan
` (9 subsequent siblings)
32 siblings, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 5d5ae55..03e218f 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -8,6 +8,7 @@
#ifndef _NF_CONNTRACK_ACCT_H
#define _NF_CONNTRACK_ACCT_H
+#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack.h>
@@ -18,8 +19,6 @@ struct nf_conn_counter {
u_int64_t bytes;
};
-extern int nf_ct_acct;
-
static inline
struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
{
@@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
static inline
struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conn_counter *acct;
- if (!nf_ct_acct)
+ if (!net->ct.sysctl_acct)
return NULL;
acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
@@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
extern unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
-extern int nf_conntrack_acct_init(void);
-extern void nf_conntrack_acct_fini(void);
+extern int nf_conntrack_acct_init(struct net *net);
+extern void nf_conntrack_acct_fini(struct net *net);
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 503e375..f4498a6 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,10 +17,12 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
+ struct ctl_table_header *acct_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 59bd8b9..03591d3 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,19 +22,17 @@
#define NF_CT_ACCT_DEFAULT 0
#endif
-int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
-EXPORT_SYMBOL_GPL(nf_ct_acct);
+static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *acct_sysctl_header;
static struct ctl_table acct_sysctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nf_conntrack_acct",
- .data = &nf_ct_acct,
+ .data = &init_net.ct.sysctl_acct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = {
.id = NF_CT_EXT_ACCT,
};
-int nf_conntrack_acct_init(void)
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_acct_init_sysctl(struct net *net)
{
- int ret;
+ struct ctl_table *table;
-#ifdef CONFIG_NF_CT_ACCT
- printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
- printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
- printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
+ table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_acct;
- ret = nf_ct_extend_register(&acct_extend);
- if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
- return ret;
+ net->ct.acct_sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.acct_sysctl_header) {
+ printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+ goto out_register;
}
+ return 0;
-#ifdef CONFIG_SYSCTL
- acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
- acct_sysctl_table);
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
- if (!acct_sysctl_header) {
- nf_ct_extend_unregister(&acct_extend);
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
- printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
- return -ENOMEM;
- }
+ table = net->ct.acct_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.acct_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_acct_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_acct = nf_ct_acct;
+
+ if (net_eq(net, &init_net)) {
+#ifdef CONFIG_NF_CT_ACCT
+ printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+ printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+ printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
#endif
+ ret = nf_ct_extend_register(&acct_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_acct_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
+out_extend_register:
+ return ret;
}
-void nf_conntrack_acct_fini(void)
+void nf_conntrack_acct_fini(struct net *net)
{
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(acct_sysctl_header);
-#endif
- nf_ct_extend_unregister(&acct_extend);
+ nf_conntrack_acct_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3d9a1d0..521c0e6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1040,8 +1040,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
- if (net_eq(net, &init_net))
- nf_conntrack_acct_fini();
+ nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
if (net_eq(net, &init_net)) {
@@ -1203,11 +1202,13 @@ int nf_conntrack_init(struct net *net)
ret = nf_conntrack_helper_init();
if (ret < 0)
goto out_fini_expect;
+ }
- ret = nf_conntrack_acct_init();
- if (ret < 0)
- goto out_fini_helper;
+ ret = nf_conntrack_acct_init(net);
+ if (ret < 0)
+ goto out_fini_helper;
+ if (net_eq(net, &init_net)) {
/* For use by REJECT target */
rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 24/33] netns ct: SIP conntracking in netns
2008-09-04 16:43 ` Patrick McHardy
` (22 preceding siblings ...)
2008-09-08 3:03 ` Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 8:52 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 25/33] netns ct: H323 " Alexey Dobriyan
` (8 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index a006080..6813f1c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
union nf_inet_addr *saddr;
struct nf_conntrack_tuple tuple;
@@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
rcu_read_lock();
do {
- exp = __nf_ct_expect_find(&init_net, &tuple);
+ exp = __nf_ct_expect_find(net, &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 25/33] netns ct: H323 conntracking in netns
2008-09-04 16:43 ` Patrick McHardy
` (23 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 24/33] netns ct: SIP conntracking in netns Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 8:52 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 26/33] netns ct: GRE " Alexey Dobriyan
` (7 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index dfb826c..c1504f7 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1210,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
union nf_inet_addr *addr,
__be16 port)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
@@ -1219,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
tuple.dst.u.tcp.port = port;
tuple.dst.protonum = IPPROTO_TCP;
- exp = __nf_ct_expect_find(&init_net, &tuple);
+ exp = __nf_ct_expect_find(net, &tuple);
if (exp && exp->master == ct)
return exp;
return NULL;
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 26/33] netns ct: GRE conntracking in netns
2008-09-04 16:43 ` Patrick McHardy
` (24 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 25/33] netns ct: H323 " Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 8:53 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 27/33] netns ct: PPTP " Alexey Dobriyan
` (6 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
* make keymap list per-netns
* per-netns keymal lock (not strictly necessary)
* flush keymap at netns stop and module unload.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 535e421..2a10efd 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
-extern void nf_ct_gre_keymap_flush(void);
+extern void nf_ct_gre_keymap_flush(struct net *net);
extern void nf_nat_need_gre(void);
#endif /* __KERNEL__ */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5db7df5..e47d5de 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -602,7 +602,7 @@ static int __init nf_conntrack_pptp_init(void)
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- nf_ct_gre_keymap_flush();
+ nf_ct_gre_keymap_flush(&init_net);
}
module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5b1273a..a2cdbcb 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -29,8 +29,11 @@
#include <linux/list.h>
#include <linux/seq_file.h>
#include <linux/in.h>
+#include <linux/netdevice.h>
#include <linux/skbuff.h>
-
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -40,19 +43,23 @@
#define GRE_TIMEOUT (30 * HZ)
#define GRE_STREAM_TIMEOUT (180 * HZ)
-static DEFINE_RWLOCK(nf_ct_gre_lock);
-static LIST_HEAD(gre_keymap_list);
+static int proto_gre_net_id;
+struct netns_proto_gre {
+ rwlock_t keymap_lock;
+ struct list_head keymap_list;
+};
-void nf_ct_gre_keymap_flush(void)
+void nf_ct_gre_keymap_flush(struct net *net)
{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
struct nf_ct_gre_keymap *km, *tmp;
- write_lock_bh(&nf_ct_gre_lock);
- list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+ write_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
list_del(&km->list);
kfree(km);
}
- write_unlock_bh(&nf_ct_gre_lock);
+ write_unlock_bh(&net_gre->keymap_lock);
}
EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
@@ -67,19 +74,20 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
}
/* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
+static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
struct nf_ct_gre_keymap *km;
__be16 key = 0;
- read_lock_bh(&nf_ct_gre_lock);
- list_for_each_entry(km, &gre_keymap_list, list) {
+ read_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry(km, &net_gre->keymap_list, list) {
if (gre_key_cmpfn(km, t)) {
key = km->tuple.src.u.gre.key;
break;
}
}
- read_unlock_bh(&nf_ct_gre_lock);
+ read_unlock_bh(&net_gre->keymap_lock);
pr_debug("lookup src key 0x%x for ", key);
nf_ct_dump_tuple(t);
@@ -91,20 +99,22 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
struct nf_conntrack_tuple *t)
{
+ struct net *net = nf_ct_net(ct);
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
struct nf_conn_help *help = nfct_help(ct);
struct nf_ct_gre_keymap **kmp, *km;
kmp = &help->help.ct_pptp_info.keymap[dir];
if (*kmp) {
/* check whether it's a retransmission */
- read_lock_bh(&nf_ct_gre_lock);
- list_for_each_entry(km, &gre_keymap_list, list) {
+ read_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry(km, &net_gre->keymap_list, list) {
if (gre_key_cmpfn(km, t) && km == *kmp) {
- read_unlock_bh(&nf_ct_gre_lock);
+ read_unlock_bh(&net_gre->keymap_lock);
return 0;
}
}
- read_unlock_bh(&nf_ct_gre_lock);
+ read_unlock_bh(&net_gre->keymap_lock);
pr_debug("trying to override keymap_%s for ct %p\n",
dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
return -EEXIST;
@@ -119,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
pr_debug("adding new entry %p: ", km);
nf_ct_dump_tuple(&km->tuple);
- write_lock_bh(&nf_ct_gre_lock);
- list_add_tail(&km->list, &gre_keymap_list);
- write_unlock_bh(&nf_ct_gre_lock);
+ write_lock_bh(&net_gre->keymap_lock);
+ list_add_tail(&km->list, &net_gre->keymap_list);
+ write_unlock_bh(&net_gre->keymap_lock);
return 0;
}
@@ -130,12 +140,14 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
/* destroy the keymap entries associated with specified master ct */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
struct nf_conn_help *help = nfct_help(ct);
enum ip_conntrack_dir dir;
pr_debug("entering for ct %p\n", ct);
- write_lock_bh(&nf_ct_gre_lock);
+ write_lock_bh(&net_gre->keymap_lock);
for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
if (help->help.ct_pptp_info.keymap[dir]) {
pr_debug("removing %p from list\n",
@@ -145,7 +157,7 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
help->help.ct_pptp_info.keymap[dir] = NULL;
}
}
- write_unlock_bh(&nf_ct_gre_lock);
+ write_unlock_bh(&net_gre->keymap_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
@@ -164,6 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
+ struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
const struct gre_hdr_pptp *pgrehdr;
struct gre_hdr_pptp _pgrehdr;
__be16 srckey;
@@ -190,7 +203,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
}
tuple->dst.u.gre.key = pgrehdr->call_id;
- srckey = gre_keymap_lookup(tuple);
+ srckey = gre_keymap_lookup(net, tuple);
tuple->src.u.gre.key = srckey;
return true;
@@ -285,15 +298,53 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
#endif
};
+static int proto_gre_net_init(struct net *net)
+{
+ struct netns_proto_gre *net_gre;
+ int rv;
+
+ net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL);
+ if (!net_gre)
+ return -ENOMEM;
+ rwlock_init(&net_gre->keymap_lock);
+ INIT_LIST_HEAD(&net_gre->keymap_list);
+
+ rv = net_assign_generic(net, proto_gre_net_id, net_gre);
+ if (rv < 0)
+ kfree(net_gre);
+ return rv;
+}
+
+static void proto_gre_net_exit(struct net *net)
+{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+
+ nf_ct_gre_keymap_flush(net);
+ kfree(net_gre);
+}
+
+static struct pernet_operations proto_gre_net_ops = {
+ .init = proto_gre_net_init,
+ .exit = proto_gre_net_exit,
+};
+
static int __init nf_ct_proto_gre_init(void)
{
- return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+ int rv;
+
+ rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+ if (rv < 0)
+ return rv;
+ rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops);
+ if (rv < 0)
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+ return rv;
}
static void nf_ct_proto_gre_fini(void)
{
nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
- nf_ct_gre_keymap_flush();
+ unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops);
}
module_init(nf_ct_proto_gre_init);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 27/33] netns ct: PPTP conntracking in netns
2008-09-04 16:43 ` Patrick McHardy
` (25 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 26/33] netns ct: GRE " Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 8:54 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 28/33] netns nat: fix ipt_MASQUERADE " Alexey Dobriyan
` (5 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e47d5de..373e51e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name);
static void pptp_expectfn(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct net *net = nf_ct_net(ct);
typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
pr_debug("increasing timeouts\n");
@@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
- exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
+ exp_other = nf_ct_expect_find_get(net, &inv_t);
if (exp_other) {
/* delete other expectation. */
pr_debug("found\n");
@@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct,
rcu_read_unlock();
}
-static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
+static int destroy_sibling_or_exp(struct net *net,
+ const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
@@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
- h = nf_conntrack_find_get(&init_net, t);
+ h = nf_conntrack_find_get(net, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
nf_ct_put(sibling);
return 1;
} else {
- exp = nf_ct_expect_find_get(&init_net, t);
+ exp = nf_ct_expect_find_get(net, t);
if (exp) {
pr_debug("unexpect_related of expect %p\n", exp);
nf_ct_unexpect_related(exp);
@@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
/* timeout GRE data connections */
static void pptp_destroy_siblings(struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
const struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_tuple t;
@@ -178,7 +181,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
t.dst.protonum = IPPROTO_GRE;
t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
- if (!destroy_sibling_or_exp(&t))
+ if (!destroy_sibling_or_exp(net, &t))
pr_debug("failed to timeout original pns->pac ct/exp\n");
/* try reply (pac->pns) tuple */
@@ -186,7 +189,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
t.dst.protonum = IPPROTO_GRE;
t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
- if (!destroy_sibling_or_exp(&t))
+ if (!destroy_sibling_or_exp(net, &t))
pr_debug("failed to timeout reply pac->pns ct/exp\n");
}
@@ -594,15 +597,32 @@ static struct nf_conntrack_helper pptp __read_mostly = {
.expect_policy = &pptp_exp_policy,
};
+static void nf_conntrack_pptp_net_exit(struct net *net)
+{
+ nf_ct_gre_keymap_flush(net);
+}
+
+static struct pernet_operations nf_conntrack_pptp_net_ops = {
+ .exit = nf_conntrack_pptp_net_exit,
+};
+
static int __init nf_conntrack_pptp_init(void)
{
- return nf_conntrack_helper_register(&pptp);
+ int rv;
+
+ rv = nf_conntrack_helper_register(&pptp);
+ if (rv < 0)
+ return rv;
+ rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
+ if (rv < 0)
+ nf_conntrack_helper_unregister(&pptp);
+ return rv;
}
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- nf_ct_gre_keymap_flush(&init_net);
+ unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
}
module_init(nf_conntrack_pptp_init);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 28/33] netns nat: fix ipt_MASQUERADE in netns
2008-09-04 16:43 ` Patrick McHardy
` (26 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 27/33] netns ct: PPTP " Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:06 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 29/33] netns nat: per-netns NAT table Alexey Dobriyan
` (4 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
First, allow entry in notifier hook.
Second, start conntrack cleanup in netns to which netdevice belongs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5e1c817..65c811b 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -119,9 +119,7 @@ static int masq_device_event(struct notifier_block *this,
void *ptr)
{
const struct net_device *dev = ptr;
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
+ struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
/* Device was downed. Search entire table for
@@ -129,7 +127,7 @@ static int masq_device_event(struct notifier_block *this,
and forget them. */
NF_CT_ASSERT(dev->ifindex != 0);
- nf_ct_iterate_cleanup(&init_net, device_cmp,
+ nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 29/33] netns nat: per-netns NAT table
2008-09-04 16:43 ` Patrick McHardy
` (27 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 28/33] netns nat: fix ipt_MASQUERADE " Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:08 ` Patrick McHardy
2008-10-02 9:09 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 30/33] netns nat: per-netns bysource hash Alexey Dobriyan
` (3 subsequent siblings)
32 siblings, 2 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Same story as with iptable_filter, iptables_raw tables.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a6ed838..b286b84 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -38,6 +38,7 @@ struct netns_ipv4 {
struct xt_table *iptable_raw;
struct xt_table *arptable_filter;
struct xt_table *iptable_security;
+ struct xt_table *nat_table;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index e8b4d0d..0a02a8c 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -33,7 +33,7 @@ static struct
struct ipt_replace repl;
struct ipt_standard entries[3];
struct ipt_error term;
-} nat_initial_table __initdata = {
+} nat_initial_table __net_initdata = {
.repl = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
@@ -58,14 +58,13 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table __nat_table = {
+static struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
.lock = __RW_LOCK_UNLOCKED(__nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
-static struct xt_table *nat_table;
/* Source NAT */
static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -194,9 +193,10 @@ int nf_nat_rule_find(struct sk_buff *skb,
const struct net_device *out,
struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
int ret;
- ret = ipt_do_table(skb, hooknum, in, out, nat_table);
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
if (ret == NF_ACCEPT) {
if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -226,14 +226,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
.family = AF_INET,
};
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+ net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+ &nat_initial_table.repl);
+ if (IS_ERR(net->ipv4.nat_table))
+ return PTR_ERR(net->ipv4.nat_table);
+ return 0;
+}
+
+static void __net_exit nf_nat_rule_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.nat_table);
+}
+
+static struct pernet_operations nf_nat_rule_net_ops = {
+ .init = nf_nat_rule_net_init,
+ .exit = nf_nat_rule_net_exit,
+};
+
int __init nf_nat_rule_init(void)
{
int ret;
- nat_table = ipt_register_table(&init_net, &__nat_table,
- &nat_initial_table.repl);
- if (IS_ERR(nat_table))
- return PTR_ERR(nat_table);
+ ret = register_pernet_subsys(&nf_nat_rule_net_ops);
+ if (ret != 0)
+ goto out;
ret = xt_register_target(&ipt_snat_reg);
if (ret != 0)
goto unregister_table;
@@ -247,8 +265,8 @@ int __init nf_nat_rule_init(void)
unregister_snat:
xt_unregister_target(&ipt_snat_reg);
unregister_table:
- ipt_unregister_table(nat_table);
-
+ unregister_pernet_subsys(&nf_nat_rule_net_ops);
+ out:
return ret;
}
@@ -256,5 +274,5 @@ void nf_nat_rule_cleanup(void)
{
xt_unregister_target(&ipt_dnat_reg);
xt_unregister_target(&ipt_snat_reg);
- ipt_unregister_table(nat_table);
+ unregister_pernet_subsys(&nf_nat_rule_net_ops);
}
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 30/33] netns nat: per-netns bysource hash
2008-09-04 16:43 ` Patrick McHardy
` (28 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 29/33] netns nat: per-netns NAT table Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:09 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 31/33] netns ct: fixup DNAT in netns Alexey Dobriyan
` (2 subsequent siblings)
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b286b84..ece1c92 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -39,6 +39,8 @@ struct netns_ipv4 {
struct xt_table *arptable_filter;
struct xt_table *iptable_security;
struct xt_table *nat_table;
+ struct hlist_head *nat_bysource;
+ int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 5d4a5b7..2ac9eaf 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly;
/* Calculated at init based on memory size */
static unsigned int nf_nat_htable_size __read_mostly;
-static int nf_nat_vmalloced;
-
-static struct hlist_head *bysource __read_mostly;
#define MAX_IP_NAT_PROTO 256
static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
@@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct,
/* Only called for SRC manip */
static int
-find_appropriate_src(const struct nf_conntrack_tuple *tuple,
+find_appropriate_src(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
@@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
const struct hlist_node *n;
rcu_read_lock();
- hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
+ hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
@@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
const struct nf_nat_protocol *proto;
/* 1) If this srcip/proto/src-proto-part is currently mapped,
@@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
manips not an issue. */
if (maniptype == IP_NAT_MANIP_SRC &&
!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
- if (find_appropriate_src(orig_tuple, tuple, range)) {
+ if (find_appropriate_src(net, orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
return;
@@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
@@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct,
/* nf_conntrack_alter_reply might re-allocate exntension aera */
nat = nfct_nat(ct);
nat->ct = ct;
- hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
+ hlist_add_head_rcu(&nat->bysource,
+ &net->ipv4.nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock);
}
@@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.flags = NF_CT_EXT_F_PREALLOC,
};
+static int __net_init nf_nat_net_init(struct net *net)
+{
+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+ &net->ipv4.nat_vmalloced);
+ if (!net->ipv4.nat_bysource)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ memset(nat, 0, sizeof(*nat));
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+ return 0;
+}
+
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+ nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+ synchronize_rcu();
+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
+ nf_nat_htable_size);
+}
+
+static struct pernet_operations nf_nat_net_ops = {
+ .init = nf_nat_net_init,
+ .exit = nf_nat_net_exit,
+};
+
static int __init nf_nat_init(void)
{
size_t i;
@@ -599,12 +634,9 @@ static int __init nf_nat_init(void)
/* Leave them the same for the moment. */
nf_nat_htable_size = nf_conntrack_htable_size;
- bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
- &nf_nat_vmalloced);
- if (!bysource) {
- ret = -ENOMEM;
+ ret = register_pernet_subsys(&nf_nat_net_ops);
+ if (ret < 0)
goto cleanup_extend;
- }
/* Sew in builtin protocols. */
spin_lock_bh(&nf_nat_lock);
@@ -629,23 +661,9 @@ static int __init nf_nat_init(void)
return ret;
}
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
- memset(nat, 0, sizeof(*nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
static void __exit nf_nat_cleanup(void)
{
- nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL);
- synchronize_rcu();
- nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
+ unregister_pernet_subsys(&nf_nat_net_ops);
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 31/33] netns ct: fixup DNAT in netns
2008-09-04 16:43 ` Patrick McHardy
` (29 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 30/33] netns nat: per-netns bysource hash Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:10 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 32/33] netns nat: PPTP NAT " Alexey Dobriyan
2008-09-08 3:03 ` [PATCH 33/33] Enable netfilter " Alexey Dobriyan
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 0a02a8c..f929352 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -91,13 +91,13 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
}
/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
{
static int warned = 0;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
struct rtable *rt;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return;
if (rt->rt_src != srcip && !warned) {
@@ -130,7 +130,7 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
if (hooknum == NF_INET_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle(ip_hdr(skb)->daddr,
+ warn_if_extra_mangle(dev_net(out), ip_hdr(skb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 32/33] netns nat: PPTP NAT in netns
2008-09-04 16:43 ` Patrick McHardy
` (30 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 31/33] netns ct: fixup DNAT in netns Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:11 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 33/33] Enable netfilter " Alexey Dobriyan
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e4bdddc..9eb1710 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp");
static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct net *net = nf_ct_net(ct);
const struct nf_conn *master = ct->master;
struct nf_conntrack_expect *other_exp;
struct nf_conntrack_tuple t;
@@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
- other_exp = nf_ct_expect_find_get(&init_net, &t);
+ other_exp = nf_ct_expect_find_get(net, &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* [PATCH 33/33] Enable netfilter in netns
2008-09-04 16:43 ` Patrick McHardy
` (31 preceding siblings ...)
2008-09-08 3:03 ` [PATCH 32/33] netns nat: PPTP NAT " Alexey Dobriyan
@ 2008-09-08 3:03 ` Alexey Dobriyan
2008-10-02 9:12 ` Patrick McHardy
32 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-08 3:03 UTC (permalink / raw)
To: kaber; +Cc: netdev, netfilter-devel, containers
>From kernel perspective, allow entrance in nf_hook_slow().
Stuff which uses nf_register_hook/nf_register_hooks, but otherwise not netns-ready:
DECnet netfilter
ipt_CLUSTERIP
nf_nat_standalone.c together with XFRM (?)
IPVS
several individual match modules (like hashlimit)
ctnetlink
NOTRACK
all sorts of queueing and reporting to userspace
L3 and L4 protocol sysctls, bridge sysctls
probably something else
Anyway critical mass has been achieved, there is no reason to hide netfilter any longer.
>From userspace perspective, allow to manipulate all sorts of
iptables/ip6tables/arptables rules.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b16cd79..a90ac83 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -165,14 +165,6 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
unsigned int verdict;
int ret = 0;
-#ifdef CONFIG_NET_NS
- struct net *net;
-
- net = indev == NULL ? dev_net(outdev) : dev_net(indev);
- if (net != &init_net)
- return 1;
-#endif
-
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index f9b46de..8ab829f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -65,9 +65,6 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
{
struct nf_sockopt_ops *ops;
- if (!net_eq(sock_net(sk), &init_net))
- return ERR_PTR(-ENOPROTOOPT);
-
if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
return ERR_PTR(-EINTR);
^ permalink raw reply related [flat|nested] 88+ messages in thread
* Re: [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet
2008-09-08 3:02 ` [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet Alexey Dobriyan
@ 2008-09-09 5:39 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:39 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> It does "kfree(list_head)" which looks wrong because entity that was
> allocated is definitely not list_head.
>
> However, this all works because list_head is first item in
> struct nf_ct_gre_keymap .
The first three patches are already in Linus' tree.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns
2008-09-08 3:02 ` [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns Alexey Dobriyan
@ 2008-09-09 5:44 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:44 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Take netns from skb->dst->dev. It should be safe because, they are called
> from LOCAL_OUT hook where dst is valid (though, I'm not exactly sure about
> IPVS and queueing packets to userspace).
Its safe in all cases since they already expect to only get
called when skb->dst is valid.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 05/33] netns ct: per-netns expectations
2008-09-08 3:02 ` [PATCH 05/33] netns ct: per-netns expectations Alexey Dobriyan
@ 2008-09-09 5:49 ` Patrick McHardy
2008-09-09 7:07 ` Alexey Dobriyan
0 siblings, 1 reply; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:49 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Make per-netns a) expectation hash and b) expectations count.
>
> Expectations always belongs to netns to which it's master conntrack belong.
> This is natural and doesn't bloat expectation.
>
> Proc files and leaf users are stubbed to init_net, this is temporary.
Looks fine, applied.
> @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
> }
> }
>
> - if (nf_ct_expect_count >= nf_ct_expect_max) {
> + if (net->ct.expect_count >= nf_ct_expect_max) {
> if (net_ratelimit())
> printk(KERN_WARNING
> "nf_conntrack: expectation table full\n");
I assume these message are globally visible even with namespaces?
Can we make this (and the corresponding ct hash message) refer to
the namespace? Otherwise it might be a bit confusing.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 06/33] netns ct: per-netns unconfirmed list
2008-09-08 3:02 ` [PATCH 06/33] netns ct: per-netns unconfirmed list Alexey Dobriyan
@ 2008-09-09 5:50 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:50 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> What is confirmed connection in one netns can very well be unconfirmed
> in another one.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in()
2008-09-08 3:02 ` [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in() Alexey Dobriyan
@ 2008-09-09 5:52 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:52 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> It's deducible from skb->dev or skb->dst->dev, but we know netns at
> the moment of call, so pass it down and use for finding and creating
> conntracks.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook
2008-09-08 3:02 ` [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook Alexey Dobriyan
@ 2008-09-09 5:54 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:54 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Again, it's deducible from skb, but we're going to use it for
> nf_conntrack_checksum and statistics, so just pass it from upper layer.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack
2008-09-08 3:02 ` [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack Alexey Dobriyan
@ 2008-09-09 5:56 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:56 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect
2008-09-08 3:02 ` [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect Alexey Dobriyan
@ 2008-09-09 5:57 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:57 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect
2008-09-08 3:02 ` [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect Alexey Dobriyan
@ 2008-09-09 5:59 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:59 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 12/33] netns ct: export netns list
2008-09-08 3:02 ` [PATCH 12/33] netns ct: export netns list Alexey Dobriyan
@ 2008-09-09 5:59 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 5:59 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Conntrack code will use it for
> a) removing expectations and helpers when corresponding module is removed, and
> b) removing conntracks when L3 protocol conntrack module is removed.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 13/33] netns ct: unregister helper in every netns
2008-09-08 3:02 ` [PATCH 13/33] netns ct: unregister helper in every netns Alexey Dobriyan
@ 2008-09-09 6:01 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 6:01 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister in every netns
2008-09-08 3:02 ` [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister " Alexey Dobriyan
@ 2008-09-09 6:03 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 6:03 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applthanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb
2008-09-08 3:02 ` [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb Alexey Dobriyan
@ 2008-09-09 6:04 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 6:04 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> This is cleaner, we already know conntrack to which event is relevant.
Applied, thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 16/33] netns ct: per-netns event cache
[not found] ` <1220842990-30500-16-git-send-email-adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2008-09-09 6:12 ` Patrick McHardy
2008-09-09 7:07 ` Alexey Dobriyan
0 siblings, 1 reply; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 6:12 UTC (permalink / raw)
To: Alexey Dobriyan
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
netfilter-devel-u79uwXL29TY76Z2rM5mHXA
Alexey Dobriyan wrote:
> Heh, last minute proof-reading of this patch made me think,
> that this is actually unneeded, simply because "ct" pointers will be
> different for different conntracks in different netns, just like they
> are different in one netns.
>
> Not so sure anymore.
Its necessary because the cache needs to be flushed on netns exit
and this is only allowed while its not in use anymore.
I don't see anything in this series actually making sure nothing
hits the cache on exit though. Am I missing something?
Additionally (I might have missed a following patch moving it
out though) this doesn't belong in the netns exit path:
void nf_conntrack_cleanup(struct net *net)
{
rcu_assign_pointer(ip_ct_attach, NULL);
...
rcu_assign_pointer(nf_ct_destroy, NULL);
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 16/33] netns ct: per-netns event cache
2008-09-09 7:07 ` Alexey Dobriyan
@ 2008-09-09 7:07 ` Patrick McHardy
2008-09-09 7:16 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 7:07 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> On Tue, Sep 09, 2008 at 08:12:27AM +0200, Patrick McHardy wrote:
>> Alexey Dobriyan wrote:
>>> Heh, last minute proof-reading of this patch made me think,
>>> that this is actually unneeded, simply because "ct" pointers will be
>>> different for different conntracks in different netns, just like they
>>> are different in one netns.
>>>
>>> Not so sure anymore.
>> Its necessary because the cache needs to be flushed on netns exit
>> and this is only allowed while its not in use anymore.
>>
>> I don't see anything in this series actually making sure nothing
>> hits the cache on exit though. Am I missing something?
>
> When netns refcount hits zero, netdevices in it will start dropping packets.
> And there is synchronize_net() call before cache flush.
>
> I think this is enough.
Thanks for the explanation, I have a closer look at this.
>> Additionally (I might have missed a following patch moving it
>> out though) this doesn't belong in the netns exit path:
>>
>> void nf_conntrack_cleanup(struct net *net)
>> {
>> rcu_assign_pointer(ip_ct_attach, NULL);
>> ...
>> rcu_assign_pointer(nf_ct_destroy, NULL);
>
> This is dealt with in 17/33
>
> Have you got 18/33, archives show it's missing?
There are two patches labeled 17/33, I assume the second one is
actually 18/33.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 05/33] netns ct: per-netns expectations
2008-09-09 5:49 ` Patrick McHardy
@ 2008-09-09 7:07 ` Alexey Dobriyan
2008-09-09 7:10 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-09 7:07 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Tue, Sep 09, 2008 at 07:49:34AM +0200, Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> Make per-netns a) expectation hash and b) expectations count.
>>
>> Expectations always belongs to netns to which it's master conntrack belong.
>> This is natural and doesn't bloat expectation.
>>
>> Proc files and leaf users are stubbed to init_net, this is temporary.
>
> Looks fine, applied.
>
>> @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
>> }
>> }
>> - if (nf_ct_expect_count >= nf_ct_expect_max) {
>> + if (net->ct.expect_count >= nf_ct_expect_max) {
>> if (net_ratelimit())
>> printk(KERN_WARNING
>> "nf_conntrack: expectation table full\n");
>
> I assume these message are globally visible even with namespaces?
> Can we make this (and the corresponding ct hash message) refer to
> the namespace? Otherwise it might be a bit confusing.
This is somewhat peculiar situation.
netns doesn't have unique ID like, say, ifindex.
The only number related to netns is "struct net *". They can be
distinguised by pointer value, but userspace when does clone(CLONE_NEWNET)
do not, obviously, control it and after creation doesn't have a way to find
it out.
And if we print with "%p, net" kernelspace pointer get exposed which is
not nice.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 16/33] netns ct: per-netns event cache
2008-09-09 6:12 ` Patrick McHardy
@ 2008-09-09 7:07 ` Alexey Dobriyan
2008-09-09 7:07 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-09 7:07 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Tue, Sep 09, 2008 at 08:12:27AM +0200, Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> Heh, last minute proof-reading of this patch made me think,
>> that this is actually unneeded, simply because "ct" pointers will be
>> different for different conntracks in different netns, just like they
>> are different in one netns.
>>
>> Not so sure anymore.
>
> Its necessary because the cache needs to be flushed on netns exit
> and this is only allowed while its not in use anymore.
>
> I don't see anything in this series actually making sure nothing
> hits the cache on exit though. Am I missing something?
When netns refcount hits zero, netdevices in it will start dropping packets.
And there is synchronize_net() call before cache flush.
I think this is enough.
> Additionally (I might have missed a following patch moving it
> out though) this doesn't belong in the netns exit path:
>
> void nf_conntrack_cleanup(struct net *net)
> {
> rcu_assign_pointer(ip_ct_attach, NULL);
> ...
> rcu_assign_pointer(nf_ct_destroy, NULL);
This is dealt with in 17/33
Have you got 18/33, archives show it's missing?
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 05/33] netns ct: per-netns expectations
2008-09-09 7:07 ` Alexey Dobriyan
@ 2008-09-09 7:10 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 7:10 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> On Tue, Sep 09, 2008 at 07:49:34AM +0200, Patrick McHardy wrote:
>>> @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
>>> }
>>> }
>>> - if (nf_ct_expect_count >= nf_ct_expect_max) {
>>> + if (net->ct.expect_count >= nf_ct_expect_max) {
>>> if (net_ratelimit())
>>> printk(KERN_WARNING
>>> "nf_conntrack: expectation table full\n");
>> I assume these message are globally visible even with namespaces?
>> Can we make this (and the corresponding ct hash message) refer to
>> the namespace? Otherwise it might be a bit confusing.
>
> This is somewhat peculiar situation.
>
> netns doesn't have unique ID like, say, ifindex.
>
> The only number related to netns is "struct net *". They can be
> distinguised by pointer value, but userspace when does clone(CLONE_NEWNET)
> do not, obviously, control it and after creation doesn't have a way to find
> it out.
>
> And if we print with "%p, net" kernelspace pointer get exposed which is
> not nice.
No, that also wouldn't solve the confusion. I guess conntrack is not
the only subsystem which prints netns related messages, so how about
adding an unique identifier that can be associated by userspace?
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 16/33] netns ct: per-netns event cache
2008-09-09 7:07 ` Patrick McHardy
@ 2008-09-09 7:16 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 7:16 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> On Tue, Sep 09, 2008 at 08:12:27AM +0200, Patrick McHardy wrote:
>>> Alexey Dobriyan wrote:
>>>> Heh, last minute proof-reading of this patch made me think,
>>>> that this is actually unneeded, simply because "ct" pointers will be
>>>> different for different conntracks in different netns, just like they
>>>> are different in one netns.
>>>>
>>>> Not so sure anymore.
>>> Its necessary because the cache needs to be flushed on netns exit
>>> and this is only allowed while its not in use anymore.
>>>
>>> I don't see anything in this series actually making sure nothing
>>> hits the cache on exit though. Am I missing something?
>>
>> When netns refcount hits zero, netdevices in it will start dropping
>> packets.
>> And there is synchronize_net() call before cache flush.
>>
>> I think this is enough.
>
> Thanks for the explanation, I have a closer look at this.
Yes, that looks fine. Applied, thanks.
BTW, doesn't __vlan_hwaccel_rx() also needs a netns_alive() check
to avoid passing packets to AF_PACKET sockets in dead namespaces?
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
@ 2008-09-09 7:20 ` Patrick McHardy
2008-09-09 7:32 ` Alexey Dobriyan
0 siblings, 1 reply; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 7:20 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Add checks for init_net to not create kmem caches twice and so on.
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index b55944e..52d0663 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -1016,7 +1016,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
> supposed to kill the mall. */
> void nf_conntrack_cleanup(struct net *net)
> {
> - rcu_assign_pointer(ip_ct_attach, NULL);
> + if (net_eq(net, &init_net))
> + rcu_assign_pointer(ip_ct_attach, NULL);
Having multiple of these net_eq checks per function (14 total) is
not a very nice way to do this. How about splitting the code into
a netns and a global part instead?
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-09 7:20 ` Patrick McHardy
@ 2008-09-09 7:32 ` Alexey Dobriyan
2008-09-09 7:51 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-09 7:32 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Tue, Sep 09, 2008 at 09:20:42AM +0200, Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> Add checks for init_net to not create kmem caches twice and so on.
>>
>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>>
>> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
>> index b55944e..52d0663 100644
>> --- a/net/netfilter/nf_conntrack_core.c
>> +++ b/net/netfilter/nf_conntrack_core.c
>> @@ -1016,7 +1016,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
>> supposed to kill the mall. */
>> void nf_conntrack_cleanup(struct net *net)
>> {
>> - rcu_assign_pointer(ip_ct_attach, NULL);
>> + if (net_eq(net, &init_net))
>> + rcu_assign_pointer(ip_ct_attach, NULL);
>
> Having multiple of these net_eq checks per function (14 total) is
> not a very nice way to do this.
Yep, I was just afraid of some subtle ordering rules and to keep
potential init_net breakage to minimum.
> How about splitting the code into a netns and a global part instead?
Prebably they aren't strict at all.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-09 7:32 ` Alexey Dobriyan
@ 2008-09-09 7:51 ` Patrick McHardy
2008-09-13 10:45 ` Alexey Dobriyan
` (6 more replies)
0 siblings, 7 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-09 7:51 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> On Tue, Sep 09, 2008 at 09:20:42AM +0200, Patrick McHardy wrote:
>> Having multiple of these net_eq checks per function (14 total) is
>> not a very nice way to do this.
>
> Yep, I was just afraid of some subtle ordering rules and to keep
> potential init_net breakage to minimum.
Me too, but I still prefer to do it properly once.
>> How about splitting the code into a netns and a global part instead?
>
> Prebably they aren't strict at all.
Not particulary. For cleanup a three stage approach with
1. init_net deactivation (ip_ct_attach = NULL)
2. generic netns cleanup
3. init_net specific final cleanup (slab cache, nf_conntrack_cachep,
accounting, helpers, protocols, ...)
should work fine.
The initialization should be OK with just a init_net part
and a generic netns part.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-09 7:51 ` Patrick McHardy
@ 2008-09-13 10:45 ` Alexey Dobriyan
2008-09-27 0:00 ` Alexey Dobriyan
2008-09-13 10:48 ` [PATCH v2 1/6] netns ct: per-netns statistics Alexey Dobriyan
` (5 subsequent siblings)
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:45 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Tue, Sep 09, 2008 at 09:51:56AM +0200, Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> On Tue, Sep 09, 2008 at 09:20:42AM +0200, Patrick McHardy wrote:
>>> Having multiple of these net_eq checks per function (14 total) is
>>> not a very nice way to do this.
>>
>> Yep, I was just afraid of some subtle ordering rules and to keep
>> potential init_net breakage to minimum.
>
> Me too, but I still prefer to do it properly once.
>
>>> How about splitting the code into a netns and a global part instead?
>>
>> Prebably they aren't strict at all.
>
> Not particulary. For cleanup a three stage approach with
>
> 1. init_net deactivation (ip_ct_attach = NULL)
> 2. generic netns cleanup
> 3. init_net specific final cleanup (slab cache, nf_conntrack_cachep,
> accounting, helpers, protocols, ...)
>
> should work fine.
>
> The initialization should be OK with just a init_net part
> and a generic netns part.
Ugh, I'm still finding the least ugly way to put init_net checks, and
it's better to do it at the very end.
So, slight reordering.
See per-netns statistics, nf_conntrack_count, nf_conntrack_checksum,
nf_conntrack_log_invalid and accounting.
The rest (SIP, H323, GRE, PPTP, per-netns NAT) remains the same and can
be applied independently of init_net checks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 1/6] netns ct: per-netns statistics
2008-09-09 7:51 ` Patrick McHardy
2008-09-13 10:45 ` Alexey Dobriyan
@ 2008-09-13 10:48 ` Alexey Dobriyan
2008-10-02 7:58 ` Patrick McHardy
2008-09-13 10:49 ` [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack Alexey Dobriyan
` (4 subsequent siblings)
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:48 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack.h | 8 +-
include/net/netns/conntrack.h | 1
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 -
net/netfilter/nf_conntrack_core.c | 49 +++++++++---------
net/netfilter/nf_conntrack_expect.c | 4 -
net/netfilter/nf_conntrack_standalone.c | 4 -
6 files changed, 38 insertions(+), 32 deletions(-)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size;
extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
-#define NF_CT_STAT_INC_ATOMIC(count) \
+#define NF_CT_STAT_INC(net, count) \
+ (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+#define NF_CT_STAT_INC_ATOMIC(net, count) \
do { \
local_bh_disable(); \
- __get_cpu_var(nf_conntrack_stat).count++; \
+ per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \
local_bh_enable(); \
} while (0)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -12,6 +12,7 @@ struct netns_ct {
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+ struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,9 +56,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
unsigned int nf_ct_log_invalid __read_mostly;
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
-
static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
@@ -171,6 +168,7 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
@@ -203,7 +201,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
}
- NF_CT_STAT_INC(delete);
+ NF_CT_STAT_INC(net, delete);
spin_unlock_bh(&nf_conntrack_lock);
if (ct->master)
@@ -216,6 +214,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
static void death_by_timeout(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
+ struct net *net = nf_ct_net(ct);
struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_helper *helper;
@@ -230,7 +229,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
spin_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
- NF_CT_STAT_INC(delete_list);
+ NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
spin_unlock_bh(&nf_conntrack_lock);
nf_ct_put(ct);
@@ -249,11 +248,11 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
local_bh_disable();
hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
if (nf_ct_tuple_equal(tuple, &h->tuple)) {
- NF_CT_STAT_INC(found);
+ NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
}
- NF_CT_STAT_INC(searched);
+ NF_CT_STAT_INC(net, searched);
}
local_bh_enable();
@@ -366,7 +365,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
- NF_CT_STAT_INC(insert);
+ NF_CT_STAT_INC(net, insert);
spin_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
@@ -381,7 +380,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- NF_CT_STAT_INC(insert_failed);
+ NF_CT_STAT_INC(net, insert_failed);
spin_unlock_bh(&nf_conntrack_lock);
return NF_DROP;
}
@@ -405,11 +404,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple)) {
- NF_CT_STAT_INC(found);
+ NF_CT_STAT_INC(net, found);
rcu_read_unlock_bh();
return 1;
}
- NF_CT_STAT_INC(searched);
+ NF_CT_STAT_INC(net, searched);
}
rcu_read_unlock_bh();
@@ -454,7 +453,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
dropped = 1;
- NF_CT_STAT_INC_ATOMIC(early_drop);
+ NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
nf_ct_put(ct);
return dropped;
@@ -581,7 +580,7 @@ init_conntrack(struct net *net,
ct->secmark = exp->master->secmark;
#endif
nf_conntrack_get(&ct->master->ct_general);
- NF_CT_STAT_INC(expect_new);
+ NF_CT_STAT_INC(net, expect_new);
} else {
struct nf_conntrack_helper *helper;
@@ -591,7 +590,7 @@ init_conntrack(struct net *net,
if (help)
rcu_assign_pointer(help->helper, helper);
}
- NF_CT_STAT_INC(new);
+ NF_CT_STAT_INC(net, new);
}
/* Overload tuple linked list to put us in unconfirmed list. */
@@ -683,7 +682,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if (skb->nfct) {
- NF_CT_STAT_INC_ATOMIC(ignore);
+ NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
@@ -693,8 +692,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
- NF_CT_STAT_INC_ATOMIC(error);
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
@@ -706,8 +705,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
if (l4proto->error != NULL) {
ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
if (ret <= 0) {
- NF_CT_STAT_INC_ATOMIC(error);
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
}
@@ -716,13 +715,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return NF_ACCEPT;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
- NF_CT_STAT_INC_ATOMIC(drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return NF_DROP;
}
@@ -735,7 +734,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
- NF_CT_STAT_INC_ATOMIC(invalid);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
return -ret;
}
@@ -1043,6 +1042,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_conntrack_acct_fini();
nf_conntrack_expect_fini(net);
+ free_percpu(net->ct.stat);
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
}
@@ -1152,6 +1152,9 @@ int nf_conntrack_init(struct net *net)
max_factor = 4;
}
atomic_set(&net->ct.count, 0);
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat)
+ goto err_stat;
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
@@ -1222,5 +1225,7 @@ err_free_hash:
err_hash:
nf_conntrack_ecache_fini(net);
err_ecache:
+ free_percpu(net->ct.stat);
+err_stat:
return -ENOMEM;
}
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -53,7 +53,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
master_help->expecting[exp->class]--;
nf_ct_expect_put(exp);
- NF_CT_STAT_INC(expect_delete);
+ NF_CT_STAT_INC(net, expect_delete);
}
EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
@@ -326,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
add_timer(&exp->timeout);
atomic_inc(&exp->use);
- NF_CT_STAT_INC(expect_create);
+ NF_CT_STAT_INC(net, expect_create);
}
/* Race with expectations being used means we could have none to find; OK. */
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -203,7 +203,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -217,7 +217,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack
2008-09-09 7:51 ` Patrick McHardy
2008-09-13 10:45 ` Alexey Dobriyan
2008-09-13 10:48 ` [PATCH v2 1/6] netns ct: per-netns statistics Alexey Dobriyan
@ 2008-09-13 10:49 ` Alexey Dobriyan
2008-10-02 7:59 ` Patrick McHardy
2008-09-13 10:51 ` [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
` (3 subsequent siblings)
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:49 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Show correct conntrack count, while I'm at it.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 +++++++++-----
net/netfilter/nf_conntrack_standalone.c | 14 +++++++++-----
2 files changed, 18 insertions(+), 10 deletions(-)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -285,6 +285,7 @@ static const struct file_operations ip_exp_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -294,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -302,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -320,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -360,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ct_cpu_seq_fops = {
@@ -368,7 +372,7 @@ static const struct file_operations ct_cpu_seq_fops = {
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
static int __net_init ip_conntrack_net_init(struct net *net)
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -194,6 +194,7 @@ static const struct file_operations ct_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -203,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -211,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -229,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -269,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ct_cpu_seq_fops = {
@@ -277,7 +281,7 @@ static const struct file_operations ct_cpu_seq_fops = {
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
static int nf_conntrack_standalone_init_proc(struct net *net)
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl
2008-09-09 7:51 ` Patrick McHardy
` (2 preceding siblings ...)
2008-09-13 10:49 ` [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack Alexey Dobriyan
@ 2008-09-13 10:51 ` Alexey Dobriyan
2008-10-02 8:00 ` Patrick McHardy
2008-09-13 10:52 ` [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
` (2 subsequent siblings)
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:51 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Note, sysctl table is always duplicated, this is simpler and less
special-cased.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netns/conntrack.h | 4 +
net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++++++---------------
2 files changed, 45 insertions(+), 32 deletions(-)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <asm/atomic.h>
+struct ctl_table_header;
struct nf_conntrack_ecache;
struct netns_ct {
@@ -16,6 +17,9 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
int hash_vmalloc;
int expect_vmalloc;
};
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static struct ctl_table_header *nf_ct_sysctl_header;
static struct ctl_table_header *nf_ct_netfilter_header;
static ctl_table nf_ct_sysctl_table[] = {
@@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = {
EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
- nf_ct_netfilter_header =
- register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
- if (!nf_ct_netfilter_header)
- goto out;
-
- nf_ct_sysctl_header =
- register_sysctl_paths(nf_net_netfilter_sysctl_path,
- nf_ct_sysctl_table);
- if (!nf_ct_sysctl_header)
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_netfilter_header =
+ register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+ if (!nf_ct_netfilter_header)
+ goto out;
+ }
+
+ table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out_kmemdup;
+
+ table[1].data = &net->ct.count;
+
+ net->ct.sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.sysctl_header)
goto out_unregister_netfilter;
return 0;
out_unregister_netfilter:
- unregister_sysctl_table(nf_ct_netfilter_header);
+ kfree(table);
+out_kmemdup:
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
out:
printk("nf_conntrack: can't register to sysctl.\n");
return -ENOMEM;
}
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
- unregister_sysctl_table(nf_ct_netfilter_header);
- unregister_sysctl_table(nf_ct_sysctl_header);
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
+ table = net->ct.sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.sysctl_header);
+ kfree(table);
}
#else
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
return 0;
}
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
}
#endif /* CONFIG_SYSCTL */
@@ -457,8 +474,13 @@ static int nf_conntrack_net_init(struct net *net)
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
+ ret = nf_conntrack_standalone_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
return 0;
+out_sysctl:
+ nf_conntrack_standalone_fini_proc(net);
out_proc:
nf_conntrack_cleanup(net);
out_init:
@@ -467,6 +489,7 @@ out_init:
static void nf_conntrack_net_exit(struct net *net)
{
+ nf_conntrack_standalone_fini_sysctl(net);
nf_conntrack_standalone_fini_proc(net);
nf_conntrack_cleanup(net);
}
@@ -478,25 +501,11 @@ static struct pernet_operations nf_conntrack_net_ops = {
static int __init nf_conntrack_standalone_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&nf_conntrack_net_ops);
- if (ret < 0)
- goto out;
- ret = nf_conntrack_standalone_init_sysctl();
- if (ret < 0)
- goto out_sysctl;
- return 0;
-
-out_sysctl:
- unregister_pernet_subsys(&nf_conntrack_net_ops);
-out:
- return ret;
+ return register_pernet_subsys(&nf_conntrack_net_ops);
}
static void __exit nf_conntrack_standalone_fini(void)
{
- nf_conntrack_standalone_fini_sysctl();
unregister_pernet_subsys(&nf_conntrack_net_ops);
}
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl
2008-09-09 7:51 ` Patrick McHardy
` (3 preceding siblings ...)
2008-09-13 10:51 ` [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
@ 2008-09-13 10:52 ` Alexey Dobriyan
2008-10-02 8:02 ` Patrick McHardy
2008-09-13 10:53 ` [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
2008-09-13 10:55 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Alexey Dobriyan
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:52 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack.h | 1 -
include/net/netns/conntrack.h | 1 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +-
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
net/netfilter/nf_conntrack_proto_dccp.c | 2 +-
net/netfilter/nf_conntrack_proto_tcp.c | 2 +-
net/netfilter/nf_conntrack_proto_udp.c | 2 +-
net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
net/netfilter/nf_conntrack_standalone.c | 7 +++----
10 files changed, 11 insertions(+), 12 deletions(-)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
-extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
#define NF_CT_STAT_INC(net, count) \
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,6 +17,7 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_checksum;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
.procname = "ip_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
}
/* See ip_conntrack_proto_tcp.c */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -187,7 +187,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
return -NF_ACCEPT;
}
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed\n");
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -575,7 +575,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
}
}
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
pf)) {
msg = "nf_ct_dccp: bad checksum ";
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -780,7 +780,7 @@ static int tcp_error(struct net *net,
* because the checksum is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -123,7 +123,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* We skip checking packets on the outgoing path
* because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -129,7 +129,7 @@ static int udplite_error(struct net *net,
}
/* Checksum invalid? Ignore. */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
if (LOG_INVALID(IPPROTO_UDPLITE))
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -322,9 +322,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
/* Sysctl support */
-int nf_conntrack_checksum __read_mostly = 1;
-EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
-
#ifdef CONFIG_SYSCTL
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
@@ -360,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_CHECKSUM,
.procname = "nf_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -425,6 +422,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
goto out_kmemdup;
table[1].data = &net->ct.count;
+ table[3].data = &net->ct.sysctl_checksum;
net->ct.sysctl_header = register_net_sysctl_table(net,
nf_net_netfilter_sysctl_path, table);
@@ -474,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
+ net->ct.sysctl_checksum = 1;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl
2008-09-09 7:51 ` Patrick McHardy
` (4 preceding siblings ...)
2008-09-13 10:52 ` [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
@ 2008-09-13 10:53 ` Alexey Dobriyan
2008-10-02 8:04 ` Patrick McHardy
2008-09-13 10:55 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Alexey Dobriyan
6 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:53 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++--------
include/net/netns/conntrack.h | 1 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++---
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
net/netfilter/nf_conntrack_core.c | 1 -
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++++++----
net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++--------
net/netfilter/nf_conntrack_proto_udp.c | 6 +++---
net/netfilter/nf_conntrack_proto_udplite.c | 8 ++++----
net/netfilter/nf_conntrack_standalone.c | 6 +++---
11 files changed, 39 insertions(+), 36 deletions(-)
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
extern const struct nla_policy nf_ct_port_nla_policy[];
-/* Log invalid packets */
-extern unsigned int nf_ct_log_invalid;
-
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
- (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#define LOG_INVALID(net, proto) \
+ ((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
-#define LOG_INVALID(proto) \
- ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+#define LOG_INVALID(net, proto) \
+ (((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
-#define LOG_INVALID(proto) 0
+#define LOG_INVALID(net, proto) 0
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,7 @@ struct netns_ct {
struct nf_conntrack_ecache *ecache;
#endif
int sysctl_checksum;
+ unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
.procname = "ip_conntrack_log_invalid",
- .data = &nf_ct_log_invalid,
+ .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -181,7 +181,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: short packet ");
return -NF_ACCEPT;
@@ -190,7 +190,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
@@ -203,7 +203,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
+ if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -181,7 +181,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- if (LOG_INVALID(IPPROTO_ICMPV6))
+ if (LOG_INVALID(net, IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
struct nf_conn nf_conntrack_untracked __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
-unsigned int nf_ct_log_invalid __read_mostly;
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
static int nf_conntrack_hash_rnd_initted;
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
+ struct net *net = nf_ct_net(ct);
struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
@@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
return false;
}
@@ -463,6 +464,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
u_int8_t pf, unsigned int hooknum)
{
+ struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
@@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
write_unlock_bh(&dccp_lock);
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
write_unlock_bh(&dccp_lock);
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
return -NF_ACCEPT;
@@ -590,7 +592,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(IPPROTO_DCCP))
+ if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
return -NF_ACCEPT;
}
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -488,6 +488,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
const struct tcphdr *tcph,
u_int8_t pf)
{
+ struct net *net = nf_ct_net(ct);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
nf_ct_tcp_be_liberal)
res = true;
- if (!res && LOG_INVALID(IPPROTO_TCP))
+ if (!res && LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: %s ",
before(seq, sender->td_maxend + 1) ?
@@ -761,7 +762,7 @@ static int tcp_error(struct net *net,
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: short packet ");
return -NF_ACCEPT;
@@ -769,7 +770,7 @@ static int tcp_error(struct net *net,
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: truncated/malformed packet ");
return -NF_ACCEPT;
@@ -782,7 +783,7 @@ static int tcp_error(struct net *net,
/* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: bad TCP checksum ");
return -NF_ACCEPT;
@@ -791,7 +792,7 @@ static int tcp_error(struct net *net,
/* Check TCP flags. */
tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid TCP flag combination ");
return -NF_ACCEPT;
@@ -808,6 +809,7 @@ static int tcp_packet(struct nf_conn *ct,
u_int8_t pf,
unsigned int hooknum)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
enum ip_conntrack_dir dir;
@@ -886,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct,
* thus initiate a clean new session.
*/
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: killing out of sync session ");
nf_ct_kill(ct);
@@ -899,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct,
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored ");
return NF_ACCEPT;
@@ -908,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
+ if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
return -NF_ACCEPT;
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -101,7 +101,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: short packet ");
return -NF_ACCEPT;
@@ -109,7 +109,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
@@ -125,7 +125,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- if (LOG_INVALID(IPPROTO_UDP))
+ if (LOG_INVALID(net, IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -104,7 +104,7 @@ static int udplite_error(struct net *net,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: short packet ");
return -NF_ACCEPT;
@@ -114,7 +114,7 @@ static int udplite_error(struct net *net,
if (cscov == 0)
cscov = udplen;
else if (cscov < sizeof(*hdr) || cscov > udplen) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: invalid checksum coverage ");
return -NF_ACCEPT;
@@ -122,7 +122,7 @@ static int udplite_error(struct net *net,
/* UDPLITE mandates checksums */
if (!hdr->check) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: checksum missing ");
return -NF_ACCEPT;
@@ -132,7 +132,7 @@ static int udplite_error(struct net *net,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
- if (LOG_INVALID(IPPROTO_UDPLITE))
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: bad UDPLite checksum ");
return -NF_ACCEPT;
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -365,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
.procname = "nf_conntrack_log_invalid",
- .data = &nf_ct_log_invalid,
+ .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
@@ -403,8 +403,6 @@ static struct ctl_path nf_ct_path[] = {
{ }
};
-EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-
static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
struct ctl_table *table;
@@ -423,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[1].data = &net->ct.count;
table[3].data = &net->ct.sysctl_checksum;
+ table[4].data = &net->ct.sysctl_log_invalid;
net->ct.sysctl_header = register_net_sysctl_table(net,
nf_net_netfilter_sysctl_path, table);
@@ -473,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
if (ret < 0)
goto out_proc;
net->ct.sysctl_checksum = 1;
+ net->ct.sysctl_log_invalid = 0;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 6/6] netns ct: per-netns conntrack accounting
2008-09-09 7:51 ` Patrick McHardy
` (5 preceding siblings ...)
2008-09-13 10:53 ` [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
@ 2008-09-13 10:55 ` Alexey Dobriyan
2008-09-26 23:59 ` [PATCH v2 7/6] netns ct: final netns tweaks Alexey Dobriyan
2008-10-02 8:06 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Patrick McHardy
6 siblings, 2 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-13 10:55 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_acct.h | 10 +--
include/net/netns/conntrack.h | 2
net/netfilter/nf_conntrack_acct.c | 100 +++++++++++++++++++++---------
net/netfilter/nf_conntrack_core.c | 4 -
4 files changed, 81 insertions(+), 35 deletions(-)
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -8,6 +8,7 @@
#ifndef _NF_CONNTRACK_ACCT_H
#define _NF_CONNTRACK_ACCT_H
+#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack.h>
@@ -18,8 +19,6 @@ struct nf_conn_counter {
u_int64_t bytes;
};
-extern int nf_ct_acct;
-
static inline
struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
{
@@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
static inline
struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conn_counter *acct;
- if (!nf_ct_acct)
+ if (!net->ct.sysctl_acct)
return NULL;
acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
@@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
extern unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
-extern int nf_conntrack_acct_init(void);
-extern void nf_conntrack_acct_fini(void);
+extern int nf_conntrack_acct_init(struct net *net);
+extern void nf_conntrack_acct_fini(struct net *net);
#endif /* _NF_CONNTRACK_ACCT_H */
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,10 +17,12 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
+ struct ctl_table_header *acct_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,19 +22,17 @@
#define NF_CT_ACCT_DEFAULT 0
#endif
-int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
-EXPORT_SYMBOL_GPL(nf_ct_acct);
+static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *acct_sysctl_header;
static struct ctl_table acct_sysctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nf_conntrack_acct",
- .data = &nf_ct_acct,
+ .data = &init_net.ct.sysctl_acct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = {
.id = NF_CT_EXT_ACCT,
};
-int nf_conntrack_acct_init(void)
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_acct_init_sysctl(struct net *net)
{
- int ret;
+ struct ctl_table *table;
-#ifdef CONFIG_NF_CT_ACCT
- printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
- printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
- printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
+ table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_acct;
- ret = nf_ct_extend_register(&acct_extend);
- if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
- return ret;
+ net->ct.acct_sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.acct_sysctl_header) {
+ printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+ goto out_register;
}
+ return 0;
-#ifdef CONFIG_SYSCTL
- acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
- acct_sysctl_table);
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
- if (!acct_sysctl_header) {
- nf_ct_extend_unregister(&acct_extend);
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
- printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
- return -ENOMEM;
- }
+ table = net->ct.acct_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.acct_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_acct_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_acct = nf_ct_acct;
+
+ if (net_eq(net, &init_net)) {
+#ifdef CONFIG_NF_CT_ACCT
+ printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+ printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+ printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
#endif
+ ret = nf_ct_extend_register(&acct_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_acct_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
+out_extend_register:
+ return ret;
}
-void nf_conntrack_acct_fini(void)
+void nf_conntrack_acct_fini(struct net *net)
{
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(acct_sysctl_header);
-#endif
- nf_ct_extend_unregister(&acct_extend);
+ nf_conntrack_acct_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
}
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1039,7 +1039,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
- nf_conntrack_acct_fini();
+ nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
nf_conntrack_helper_fini();
@@ -1191,7 +1191,7 @@ int nf_conntrack_init(struct net *net)
if (ret < 0)
goto out_fini_expect;
- ret = nf_conntrack_acct_init();
+ ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto out_fini_helper;
^ permalink raw reply [flat|nested] 88+ messages in thread
* [PATCH v2 7/6] netns ct: final netns tweaks
2008-09-13 10:55 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Alexey Dobriyan
@ 2008-09-26 23:59 ` Alexey Dobriyan
2008-10-02 8:11 ` Patrick McHardy
2008-10-02 8:06 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Patrick McHardy
1 sibling, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-26 23:59 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
Add init_net checks to not remove kmem_caches twice and so on.
Refactor functions to split code which should be executed only for
init_net into one place.
ip_ct_attach and ip_ct_destroy assignments remain separate, because
they're separate stages in setup and teardown.
NOTE: NOTRACK code is in for-every-net part. It will be made per-netns
after we decidce how to do it correctly.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
net/netfilter/nf_conntrack_core.c | 151 +++++++++++++++++++++++-------------
net/netfilter/nf_conntrack_expect.c | 26 +++---
2 files changed, 114 insertions(+), 63 deletions(-)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1010,17 +1010,15 @@ void nf_conntrack_flush(struct net *net)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush);
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void nf_conntrack_cleanup(struct net *net)
+static void nf_conntrack_cleanup_init_net(void)
{
- rcu_assign_pointer(ip_ct_attach, NULL);
-
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
- synchronize_net();
+ nf_conntrack_helper_fini();
+ nf_conntrack_proto_fini();
+ kmem_cache_destroy(nf_conntrack_cachep);
+}
+static void nf_conntrack_cleanup_net(struct net *net)
+{
nf_ct_event_cache_flush(net);
nf_conntrack_ecache_fini(net);
i_see_dead_people:
@@ -1033,17 +1031,31 @@ void nf_conntrack_cleanup(struct net *net)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
- rcu_assign_pointer(nf_ct_destroy, NULL);
-
- kmem_cache_destroy(nf_conntrack_cachep);
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
-
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
- nf_conntrack_helper_fini();
- nf_conntrack_proto_fini();
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+void nf_conntrack_cleanup(struct net *net)
+{
+ if (net_eq(net, &init_net))
+ rcu_assign_pointer(ip_ct_attach, NULL);
+
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+ delete... */
+ synchronize_net();
+
+ nf_conntrack_cleanup_net(net);
+
+ if (net_eq(net, &init_net)) {
+ rcu_assign_pointer(nf_ct_destroy, NULL);
+ nf_conntrack_cleanup_init_net();
+ }
}
struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1128,7 +1140,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
-int nf_conntrack_init(struct net *net)
+static int nf_conntrack_init_init_net(void)
{
int max_factor = 8;
int ret;
@@ -1150,21 +1162,6 @@ int nf_conntrack_init(struct net *net)
* entries. */
max_factor = 4;
}
- atomic_set(&net->ct.count, 0);
- net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
- if (!net->ct.stat)
- goto err_stat;
- ret = nf_conntrack_ecache_init(net);
- if (ret < 0)
- goto err_ecache;
- net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
- &net->ct.hash_vmalloc);
- if (!net->ct.hash) {
- printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
- goto err_hash;
- }
- INIT_HLIST_HEAD(&net->ct.unconfirmed);
-
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
printk("nf_conntrack version %s (%u buckets, %d max)\n",
@@ -1176,28 +1173,55 @@ int nf_conntrack_init(struct net *net)
0, 0, NULL);
if (!nf_conntrack_cachep) {
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- goto err_free_hash;
+ ret = -ENOMEM;
+ goto err_cache;
}
ret = nf_conntrack_proto_init();
if (ret < 0)
- goto err_free_conntrack_slab;
-
- ret = nf_conntrack_expect_init(net);
- if (ret < 0)
- goto out_fini_proto;
+ goto err_proto;
ret = nf_conntrack_helper_init();
if (ret < 0)
- goto out_fini_expect;
+ goto err_helper;
+
+ return 0;
+
+err_helper:
+ nf_conntrack_proto_fini();
+err_proto:
+ kmem_cache_destroy(nf_conntrack_cachep);
+err_cache:
+ return ret;
+}
+
+static int nf_conntrack_init_net(struct net *net)
+{
+ int ret;
+ atomic_set(&net->ct.count, 0);
+ INIT_HLIST_HEAD(&net->ct.unconfirmed);
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat) {
+ ret = -ENOMEM;
+ goto err_stat;
+ }
+ ret = nf_conntrack_ecache_init(net);
+ if (ret < 0)
+ goto err_ecache;
+ net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+ &net->ct.hash_vmalloc);
+ if (!net->ct.hash) {
+ ret = -ENOMEM;
+ printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
+ goto err_hash;
+ }
+ ret = nf_conntrack_expect_init(net);
+ if (ret < 0)
+ goto err_expect;
ret = nf_conntrack_acct_init(net);
if (ret < 0)
- goto out_fini_helper;
-
- /* For use by REJECT target */
- rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
- rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+ goto err_acct;
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
@@ -1208,17 +1232,11 @@ int nf_conntrack_init(struct net *net)
/* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
- return ret;
+ return 0;
-out_fini_helper:
- nf_conntrack_helper_fini();
-out_fini_expect:
+err_acct:
nf_conntrack_expect_fini(net);
-out_fini_proto:
- nf_conntrack_proto_fini();
-err_free_conntrack_slab:
- kmem_cache_destroy(nf_conntrack_cachep);
-err_free_hash:
+err_expect:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
err_hash:
@@ -1226,5 +1244,32 @@ err_hash:
err_ecache:
free_percpu(net->ct.stat);
err_stat:
- return -ENOMEM;
+ return ret;
+}
+
+int nf_conntrack_init(struct net *net)
+{
+ int ret;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_conntrack_init_init_net();
+ if (ret < 0)
+ goto out_init_net;
+ }
+ ret = nf_conntrack_init_net(net);
+ if (ret < 0)
+ goto out_net;
+
+ if (net_eq(net, &init_net)) {
+ /* For use by REJECT target */
+ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
+ rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+ }
+ return 0;
+
+out_net:
+ if (net_eq(net, &init_net))
+ nf_conntrack_cleanup_init_net();
+out_init_net:
+ return ret;
}
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -563,12 +563,14 @@ int nf_conntrack_expect_init(struct net *net)
{
int err = -ENOMEM;
- if (!nf_ct_expect_hsize) {
- nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
- if (!nf_ct_expect_hsize)
- nf_ct_expect_hsize = 1;
+ if (net_eq(net, &init_net)) {
+ if (!nf_ct_expect_hsize) {
+ nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+ if (!nf_ct_expect_hsize)
+ nf_ct_expect_hsize = 1;
+ }
+ nf_ct_expect_max = nf_ct_expect_hsize * 4;
}
- nf_ct_expect_max = nf_ct_expect_hsize * 4;
net->ct.expect_count = 0;
net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
@@ -576,11 +578,13 @@ int nf_conntrack_expect_init(struct net *net)
if (net->ct.expect_hash == NULL)
goto err1;
- nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+ if (net_eq(net, &init_net)) {
+ nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
sizeof(struct nf_conntrack_expect),
0, 0, NULL);
- if (!nf_ct_expect_cachep)
- goto err2;
+ if (!nf_ct_expect_cachep)
+ goto err2;
+ }
err = exp_proc_init(net);
if (err < 0)
@@ -589,7 +593,8 @@ int nf_conntrack_expect_init(struct net *net)
return 0;
err3:
- kmem_cache_destroy(nf_ct_expect_cachep);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
err2:
nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
@@ -600,7 +605,8 @@ err1:
void nf_conntrack_expect_fini(struct net *net)
{
exp_proc_remove(net);
- kmem_cache_destroy(nf_ct_expect_cachep);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
}
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-13 10:45 ` Alexey Dobriyan
@ 2008-09-27 0:00 ` Alexey Dobriyan
2008-09-28 10:31 ` Patrick McHardy
0 siblings, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-09-27 0:00 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Sat, Sep 13, 2008 at 02:45:15PM +0400, Alexey Dobriyan wrote:
> On Tue, Sep 09, 2008 at 09:51:56AM +0200, Patrick McHardy wrote:
> > Alexey Dobriyan wrote:
> >> On Tue, Sep 09, 2008 at 09:20:42AM +0200, Patrick McHardy wrote:
> >>> Having multiple of these net_eq checks per function (14 total) is
> >>> not a very nice way to do this.
> >>
> >> Yep, I was just afraid of some subtle ordering rules and to keep
> >> potential init_net breakage to minimum.
> >
> > Me too, but I still prefer to do it properly once.
> >
> >>> How about splitting the code into a netns and a global part instead?
> >>
> >> Prebably they aren't strict at all.
> >
> > Not particulary. For cleanup a three stage approach with
> >
> > 1. init_net deactivation (ip_ct_attach = NULL)
> > 2. generic netns cleanup
> > 3. init_net specific final cleanup (slab cache, nf_conntrack_cachep,
> > accounting, helpers, protocols, ...)
> >
> > should work fine.
> >
> > The initialization should be OK with just a init_net part
> > and a generic netns part.
>
> Ugh, I'm still finding the least ugly way to put init_net checks, and
> it's better to do it at the very end.
>
> So, slight reordering.
>
> See per-netns statistics, nf_conntrack_count, nf_conntrack_checksum,
> nf_conntrack_log_invalid and accounting.
>
> The rest (SIP, H323, GRE, PPTP, per-netns NAT) remains the same and can
> be applied independently of init_net checks.
Ping!
I've just sent patch which adds init_net checks in somewhat nicer way.
Please, review and apply the rest.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 17/33] netns ct: final init_net tweaks
2008-09-27 0:00 ` Alexey Dobriyan
@ 2008-09-28 10:31 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-09-28 10:31 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Ping!
>
> I've just sent patch which adds init_net checks in somewhat nicer way.
Thanks.
> Please, review and apply the rest.
I'll do that this week during the netfilter workshop.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 1/6] netns ct: per-netns statistics
2008-09-13 10:48 ` [PATCH v2 1/6] netns ct: per-netns statistics Alexey Dobriyan
@ 2008-10-02 7:58 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 7:58 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> include/net/netfilter/nf_conntrack.h | 8 +-
> include/net/netns/conntrack.h | 1
> net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 -
> net/netfilter/nf_conntrack_core.c | 49 +++++++++---------
> net/netfilter/nf_conntrack_expect.c | 4 -
> net/netfilter/nf_conntrack_standalone.c | 4 -
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack
2008-09-13 10:49 ` [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack Alexey Dobriyan
@ 2008-10-02 7:59 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 7:59 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Show correct conntrack count, while I'm at it.
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl
2008-09-13 10:51 ` [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
@ 2008-10-02 8:00 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:00 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Note, sysctl table is always duplicated, this is simpler and less
> special-cased.
>
>
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl
2008-09-13 10:52 ` [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
@ 2008-10-02 8:02 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:02 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> include/net/netfilter/nf_conntrack.h | 1 -
> include/net/netns/conntrack.h | 1 +
> net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
> net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +-
> net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
> net/netfilter/nf_conntrack_proto_dccp.c | 2 +-
> net/netfilter/nf_conntrack_proto_tcp.c | 2 +-
> net/netfilter/nf_conntrack_proto_udp.c | 2 +-
> net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
> net/netfilter/nf_conntrack_standalone.c | 7 +++----
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl
2008-09-13 10:53 ` [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
@ 2008-10-02 8:04 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:04 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++--------
> include/net/netns/conntrack.h | 1 +
> net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
> net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++---
> net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
> net/netfilter/nf_conntrack_core.c | 1 -
> net/netfilter/nf_conntrack_proto_dccp.c | 10 ++++++----
> net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++--------
> net/netfilter/nf_conntrack_proto_udp.c | 6 +++---
> net/netfilter/nf_conntrack_proto_udplite.c | 8 ++++----
> net/netfilter/nf_conntrack_standalone.c | 6 +++---
> 11 files changed, 39 insertions(+), 36 deletions(-)
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 6/6] netns ct: per-netns conntrack accounting
2008-09-13 10:55 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Alexey Dobriyan
2008-09-26 23:59 ` [PATCH v2 7/6] netns ct: final netns tweaks Alexey Dobriyan
@ 2008-10-02 8:06 ` Patrick McHardy
1 sibling, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:06 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> include/net/netfilter/nf_conntrack_acct.h | 10 +--
> include/net/netns/conntrack.h | 2
> net/netfilter/nf_conntrack_acct.c | 100 +++++++++++++++++++++---------
> net/netfilter/nf_conntrack_core.c | 4 -
> 4 files changed, 81 insertions(+), 35 deletions(-)
>
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH v2 7/6] netns ct: final netns tweaks
2008-09-26 23:59 ` [PATCH v2 7/6] netns ct: final netns tweaks Alexey Dobriyan
@ 2008-10-02 8:11 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:11 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Add init_net checks to not remove kmem_caches twice and so on.
>
> Refactor functions to split code which should be executed only for
> init_net into one place.
>
> ip_ct_attach and ip_ct_destroy assignments remain separate, because
> they're separate stages in setup and teardown.
>
> NOTE: NOTRACK code is in for-every-net part. It will be made per-netns
> after we decidce how to do it correctly.
Also applied. thanks.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 24/33] netns ct: SIP conntracking in netns
2008-09-08 3:03 ` [PATCH 24/33] netns ct: SIP conntracking in netns Alexey Dobriyan
@ 2008-10-02 8:52 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:52 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 25/33] netns ct: H323 conntracking in netns
2008-09-08 3:03 ` [PATCH 25/33] netns ct: H323 " Alexey Dobriyan
@ 2008-10-02 8:52 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:52 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 26/33] netns ct: GRE conntracking in netns
2008-09-08 3:03 ` [PATCH 26/33] netns ct: GRE " Alexey Dobriyan
@ 2008-10-02 8:53 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:53 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> * make keymap list per-netns
> * per-netns keymal lock (not strictly necessary)
> * flush keymap at netns stop and module unload.
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 27/33] netns ct: PPTP conntracking in netns
2008-09-08 3:03 ` [PATCH 27/33] netns ct: PPTP " Alexey Dobriyan
@ 2008-10-02 8:54 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 8:54 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 28/33] netns nat: fix ipt_MASQUERADE in netns
2008-09-08 3:03 ` [PATCH 28/33] netns nat: fix ipt_MASQUERADE " Alexey Dobriyan
@ 2008-10-02 9:06 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:06 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> First, allow entry in notifier hook.
> Second, start conntrack cleanup in netns to which netdevice belongs.
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 29/33] netns nat: per-netns NAT table
2008-09-08 3:03 ` [PATCH 29/33] netns nat: per-netns NAT table Alexey Dobriyan
@ 2008-10-02 9:08 ` Patrick McHardy
2008-10-02 9:09 ` Patrick McHardy
1 sibling, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:08 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Same story as with iptable_filter, iptables_raw tables.
>
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 29/33] netns nat: per-netns NAT table
2008-09-08 3:03 ` [PATCH 29/33] netns nat: per-netns NAT table Alexey Dobriyan
2008-10-02 9:08 ` Patrick McHardy
@ 2008-10-02 9:09 ` Patrick McHardy
1 sibling, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:09 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> Same story as with iptable_filter, iptables_raw tables.
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 30/33] netns nat: per-netns bysource hash
2008-09-08 3:03 ` [PATCH 30/33] netns nat: per-netns bysource hash Alexey Dobriyan
@ 2008-10-02 9:09 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:09 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 31/33] netns ct: fixup DNAT in netns
2008-09-08 3:03 ` [PATCH 31/33] netns ct: fixup DNAT in netns Alexey Dobriyan
@ 2008-10-02 9:10 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:10 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 32/33] netns nat: PPTP NAT in netns
2008-09-08 3:03 ` [PATCH 32/33] netns nat: PPTP NAT " Alexey Dobriyan
@ 2008-10-02 9:11 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:11 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
Applied.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 33/33] Enable netfilter in netns
2008-09-08 3:03 ` [PATCH 33/33] Enable netfilter " Alexey Dobriyan
@ 2008-10-02 9:12 ` Patrick McHardy
2008-10-02 9:51 ` Alexey Dobriyan
2008-10-02 9:53 ` Alexey Dobriyan
0 siblings, 2 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 9:12 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> >From kernel perspective, allow entrance in nf_hook_slow().
>
> Stuff which uses nf_register_hook/nf_register_hooks, but otherwise not netns-ready:
>
> DECnet netfilter
> ipt_CLUSTERIP
> nf_nat_standalone.c together with XFRM (?)
> IPVS
> several individual match modules (like hashlimit)
> ctnetlink
> NOTRACK
> all sorts of queueing and reporting to userspace
> L3 and L4 protocol sysctls, bridge sysctls
> probably something else
>
> Anyway critical mass has been achieved, there is no reason to hide netfilter any longer.
>
> >From userspace perspective, allow to manipulate all sorts of
> iptables/ip6tables/arptables rules.
>
Applied. thanks Alexey.
Is there an easy way to test all this stuff?
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 33/33] Enable netfilter in netns
2008-10-02 9:12 ` Patrick McHardy
@ 2008-10-02 9:51 ` Alexey Dobriyan
2008-10-02 10:00 ` Patrick McHardy
2008-10-02 9:53 ` Alexey Dobriyan
1 sibling, 1 reply; 88+ messages in thread
From: Alexey Dobriyan @ 2008-10-02 9:51 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
On Thu, Oct 02, 2008 at 11:12:08AM +0200, Patrick McHardy wrote:
> Alexey Dobriyan wrote:
>> >From kernel perspective, allow entrance in nf_hook_slow().
>>
>> Stuff which uses nf_register_hook/nf_register_hooks, but otherwise not netns-ready:
>>
>> DECnet netfilter
>> ipt_CLUSTERIP
>> nf_nat_standalone.c together with XFRM (?)
>> IPVS
>> several individual match modules (like hashlimit)
>> ctnetlink
>> NOTRACK
>> all sorts of queueing and reporting to userspace
>> L3 and L4 protocol sysctls, bridge sysctls
>> probably something else
>>
>> Anyway critical mass has been achieved, there is no reason to hide netfilter any longer.
>>
>> >From userspace perspective, allow to manipulate all sorts of
>>
>>
>> iptables/ip6tables/arptables rules.
>>
>
> Applied. thanks Alexey.
>
> Is there an easy way to test all this stuff?
I used the following:
0) netns is currently mutually exclusive with sysfs, so depending on
sanity of distro initscripts booting sysfs-less kernel can be tricky.
In Gentoo, for example a) rm -rf /sys (sic!), b) RC_USE_FSTAB="yes",
c) RC_DEVICES="static" in /etc/conf.d/rc are needed.
1) netns creation tool (attached, some container guy posted it somewhere)
# ns_exec -n /bin/sh
2) shutdown network in init_net
sudo /etc/init.d/ntpd stop
sudo /etc/init.d/sshd stop
sudo /etc/init.d/iptables stop
sudo /etc/init.d/ip6tables stop
sudo /etc/init.d/net.eth1 stop
sudo /etc/init.d/net.eth0 stop
3) move netdevices to netns
ip l s dev eth0 netns "$1"
ip l s dev eth1 netns "$1"
where $1 is PID of shell from 1)
4) in netns start everything back
sudo /etc/init.d/net.eth0 start
sudo /etc/init.d/net.eth1 start
sudo /etc/init.d/iptables start
sudo /etc/init.d/ip6tables start
sudo /etc/init.d/sshd start
sudo /etc/init.d/ntpd start
5) at this point my usual NAT setup is back working for me and everything should
be like in init_net (modulo aforementioned exceptions) and
independent from init_net.
Leaked netns are in /proc/slabinfo under "net_namespace".
Some IPv6 printks can be annoying, so mute them.
Object poisoning with SLUB won't work for irrelevant reasons, so use SLAB.
Something like that.
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 33/33] Enable netfilter in netns
2008-10-02 9:12 ` Patrick McHardy
2008-10-02 9:51 ` Alexey Dobriyan
@ 2008-10-02 9:53 ` Alexey Dobriyan
1 sibling, 0 replies; 88+ messages in thread
From: Alexey Dobriyan @ 2008-10-02 9:53 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev, netfilter-devel, containers
^ permalink raw reply [flat|nested] 88+ messages in thread
* Re: [PATCH 33/33] Enable netfilter in netns
2008-10-02 9:51 ` Alexey Dobriyan
@ 2008-10-02 10:00 ` Patrick McHardy
0 siblings, 0 replies; 88+ messages in thread
From: Patrick McHardy @ 2008-10-02 10:00 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, containers
Alexey Dobriyan wrote:
> On Thu, Oct 02, 2008 at 11:12:08AM +0200, Patrick McHardy wrote:
>
>> Is there an easy way to test all this stuff?
>>
>
> I used the following:
>
> 0) netns is currently mutually exclusive with sysfs, so depending on
> sanity of distro initscripts booting sysfs-less kernel can be tricky.
>
> In Gentoo, for example a) rm -rf /sys (sic!), b) RC_USE_FSTAB="yes",
> c) RC_DEVICES="static" in /etc/conf.d/rc are needed.
>
> 1) netns creation tool (attached, some container guy posted it somewhere)
>
> # ns_exec -n /bin/sh
>
> 2) shutdown network in init_net
>
> sudo /etc/init.d/ntpd stop
> sudo /etc/init.d/sshd stop
> sudo /etc/init.d/iptables stop
> sudo /etc/init.d/ip6tables stop
> sudo /etc/init.d/net.eth1 stop
> sudo /etc/init.d/net.eth0 stop
>
> 3) move netdevices to netns
>
> ip l s dev eth0 netns "$1"
> ip l s dev eth1 netns "$1"
>
> where $1 is PID of shell from 1)
>
> 4) in netns start everything back
>
> sudo /etc/init.d/net.eth0 start
> sudo /etc/init.d/net.eth1 start
> sudo /etc/init.d/iptables start
> sudo /etc/init.d/ip6tables start
> sudo /etc/init.d/sshd start
> sudo /etc/init.d/ntpd start
>
> 5) at this point my usual NAT setup is back working for me and everything should
> be like in init_net (modulo aforementioned exceptions) and
> independent from init_net.
>
> Leaked netns are in /proc/slabinfo under "net_namespace".
>
> Some IPv6 printks can be annoying, so mute them.
>
> Object poisoning with SLUB won't work for irrelevant reasons, so use SLAB.
>
> Something like that.
>
Thanks, I'll give it a try.
^ permalink raw reply [flat|nested] 88+ messages in thread
end of thread, other threads:[~2008-10-02 10:01 UTC | newest]
Thread overview: 88+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-21 22:00 [PATCH 10/38] netns ct: per-netns expectations adobriyan
2008-09-04 16:43 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 01/33] nf_conntrack_sip: de-static helper pointers Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 02/33] nf_conntrack_gre: more locking around keymap list Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 03/33] nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet Alexey Dobriyan
2008-09-09 5:39 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 04/33] Fix {ip,6}_route_me_harder() in netns Alexey Dobriyan
2008-09-09 5:44 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 05/33] netns ct: per-netns expectations Alexey Dobriyan
2008-09-09 5:49 ` Patrick McHardy
2008-09-09 7:07 ` Alexey Dobriyan
2008-09-09 7:10 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 06/33] netns ct: per-netns unconfirmed list Alexey Dobriyan
2008-09-09 5:50 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 07/33] netns ct: pass netns pointer to nf_conntrack_in() Alexey Dobriyan
2008-09-09 5:52 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 08/33] netns ct: pass netns pointer to L4 protocol's ->error hook Alexey Dobriyan
2008-09-09 5:54 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 09/33] netns ct: per-netns /proc/net/nf_conntrack, /proc/net/stat/nf_conntrack Alexey Dobriyan
2008-09-09 5:56 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 10/33] netns ct: per-netns /proc/net/nf_conntrack_expect Alexey Dobriyan
2008-09-09 5:57 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 11/33] netns ct: per-netns /proc/net/ip_conntrack, /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect Alexey Dobriyan
2008-09-09 5:59 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 12/33] netns ct: export netns list Alexey Dobriyan
2008-09-09 5:59 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 13/33] netns ct: unregister helper in every netns Alexey Dobriyan
2008-09-09 6:01 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 14/33] netns ct: cleanup after L3 and L4 proto unregister " Alexey Dobriyan
2008-09-09 6:03 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 15/33] netns ct: pass conntrack to nf_conntrack_event_cache() not skb Alexey Dobriyan
2008-09-09 6:04 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 16/33] netns ct: per-netns event cache Alexey Dobriyan
[not found] ` <1220842990-30500-16-git-send-email-adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-09-09 6:12 ` Patrick McHardy
2008-09-09 7:07 ` Alexey Dobriyan
2008-09-09 7:07 ` Patrick McHardy
2008-09-09 7:16 ` Patrick McHardy
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
2008-09-09 7:20 ` Patrick McHardy
2008-09-09 7:32 ` Alexey Dobriyan
2008-09-09 7:51 ` Patrick McHardy
2008-09-13 10:45 ` Alexey Dobriyan
2008-09-27 0:00 ` Alexey Dobriyan
2008-09-28 10:31 ` Patrick McHardy
2008-09-13 10:48 ` [PATCH v2 1/6] netns ct: per-netns statistics Alexey Dobriyan
2008-10-02 7:58 ` Patrick McHardy
2008-09-13 10:49 ` [PATCH v2 2/6] netns ct: per-netns /proc/net/stat/nf_conntrack , /proc/net/stat/ip_conntrack Alexey Dobriyan
2008-10-02 7:59 ` Patrick McHardy
2008-09-13 10:51 ` [PATCH v2 3/6] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
2008-10-02 8:00 ` Patrick McHardy
2008-09-13 10:52 ` [PATCH v2 4/6] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
2008-10-02 8:02 ` Patrick McHardy
2008-09-13 10:53 ` [PATCH v2 5/6] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
2008-10-02 8:04 ` Patrick McHardy
2008-09-13 10:55 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Alexey Dobriyan
2008-09-26 23:59 ` [PATCH v2 7/6] netns ct: final netns tweaks Alexey Dobriyan
2008-10-02 8:11 ` Patrick McHardy
2008-10-02 8:06 ` [PATCH v2 6/6] netns ct: per-netns conntrack accounting Patrick McHardy
2008-09-08 3:02 ` [PATCH 17/33] netns ct: final init_net tweaks Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 19/33] netns ct: per-netns /proc/net/stat/nf_conntrack, /proc/net/stat/ip_conntrack Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 20/33] netns ct: per-netns net.netfilter.nf_conntrack_count sysctl Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 21/33] netns ct: per-netns net.netfilter.nf_conntrack_checksum sysctl Alexey Dobriyan
2008-09-08 3:02 ` [PATCH 22/33] netns ct: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Alexey Dobriyan
2008-09-08 3:03 ` Alexey Dobriyan
2008-09-08 3:03 ` [PATCH 24/33] netns ct: SIP conntracking in netns Alexey Dobriyan
2008-10-02 8:52 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 25/33] netns ct: H323 " Alexey Dobriyan
2008-10-02 8:52 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 26/33] netns ct: GRE " Alexey Dobriyan
2008-10-02 8:53 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 27/33] netns ct: PPTP " Alexey Dobriyan
2008-10-02 8:54 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 28/33] netns nat: fix ipt_MASQUERADE " Alexey Dobriyan
2008-10-02 9:06 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 29/33] netns nat: per-netns NAT table Alexey Dobriyan
2008-10-02 9:08 ` Patrick McHardy
2008-10-02 9:09 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 30/33] netns nat: per-netns bysource hash Alexey Dobriyan
2008-10-02 9:09 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 31/33] netns ct: fixup DNAT in netns Alexey Dobriyan
2008-10-02 9:10 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 32/33] netns nat: PPTP NAT " Alexey Dobriyan
2008-10-02 9:11 ` Patrick McHardy
2008-09-08 3:03 ` [PATCH 33/33] Enable netfilter " Alexey Dobriyan
2008-10-02 9:12 ` Patrick McHardy
2008-10-02 9:51 ` Alexey Dobriyan
2008-10-02 10:00 ` Patrick McHardy
2008-10-02 9:53 ` Alexey Dobriyan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).