* [PATCH nf-next 1/3] netfilter: conntrack: make netns address part of nat bysrc hash
2016-05-09 14:24 [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Florian Westphal
@ 2016-05-09 14:24 ` Florian Westphal
2016-05-09 14:24 ` [PATCH nf-next 2/3] netfilter: conntrack: use a single nat bysource table for all namespaces Florian Westphal
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Florian Westphal @ 2016-05-09 14:24 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Will be needed soon when we place all in the same hash table.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/netfilter/nf_nat_core.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d74e716..069912c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -118,7 +118,7 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
-hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
@@ -126,9 +126,9 @@ hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_nat_hash_rnd);
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- return reciprocal_scale(hash, net->ct.nat_htable_size);
+ return reciprocal_scale(hash, n->ct.nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
--
2.7.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH nf-next 2/3] netfilter: conntrack: use a single nat bysource table for all namespaces
2016-05-09 14:24 [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Florian Westphal
2016-05-09 14:24 ` [PATCH nf-next 1/3] netfilter: conntrack: make netns address part of nat bysrc hash Florian Westphal
@ 2016-05-09 14:24 ` Florian Westphal
2016-05-09 14:24 ` [PATCH nf-next 3/3] netfilter: conntrack: use single slab cache Florian Westphal
2016-05-09 14:45 ` [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Pablo Neira Ayuso
3 siblings, 0 replies; 5+ messages in thread
From: Florian Westphal @ 2016-05-09 14:24 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
We already include netns address in the hash, so we only need to use
net_eq in find_appropriate_src and can then put all entries into
same table.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
include/net/netns/conntrack.h | 4 ----
net/netfilter/nf_nat_core.c | 33 +++++++++++++++++----------------
2 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 2811ddc..1e751bf 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -103,9 +103,5 @@ struct netns_ct {
unsigned int labels_used;
u8 label_words;
#endif
-#ifdef CONFIG_NF_NAT_NEEDED
- struct hlist_head *nat_bysource;
- unsigned int nat_htable_size;
-#endif
};
#endif
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 069912c..6877a39 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -37,6 +37,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
+
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
@@ -128,7 +131,7 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- return reciprocal_scale(hash, n->ct.nat_htable_size);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -198,9 +201,10 @@ find_appropriate_src(struct net *net,
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
- hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
+ hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
@@ -433,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct,
nat = nfct_nat(ct);
nat->ct = ct;
hlist_add_head_rcu(&nat->bysource,
- &net->ct.nat_bysource[srchash]);
+ &nf_nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock);
}
@@ -821,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
-static int __net_init nf_nat_net_init(struct net *net)
-{
- /* Leave them the same for the moment. */
- net->ct.nat_htable_size = nf_conntrack_htable_size;
- net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
- if (!net->ct.nat_bysource)
- return -ENOMEM;
- return 0;
-}
-
static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
- synchronize_rcu();
- nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
- .init = nf_nat_net_init,
.exit = nf_nat_net_exit,
};
@@ -854,8 +845,16 @@ static int __init nf_nat_init(void)
{
int ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
+
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -879,6 +878,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -897,6 +897,7 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
--
2.7.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH nf-next 3/3] netfilter: conntrack: use single slab cache
2016-05-09 14:24 [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Florian Westphal
2016-05-09 14:24 ` [PATCH nf-next 1/3] netfilter: conntrack: make netns address part of nat bysrc hash Florian Westphal
2016-05-09 14:24 ` [PATCH nf-next 2/3] netfilter: conntrack: use a single nat bysource table for all namespaces Florian Westphal
@ 2016-05-09 14:24 ` Florian Westphal
2016-05-09 14:45 ` [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Pablo Neira Ayuso
3 siblings, 0 replies; 5+ messages in thread
From: Florian Westphal @ 2016-05-09 14:24 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
An earlier patch changed lookup side to also net_eq() namespaces after
obtaining a reference on the conntrack, so a single kmemcache can be used.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
include/net/netns/conntrack.h | 2 --
net/netfilter/nf_conntrack_core.c | 36 ++++++++++++++----------------------
2 files changed, 14 insertions(+), 24 deletions(-)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 1e751bf..38b1a80 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -84,7 +84,6 @@ struct netns_ct {
struct ctl_table_header *event_sysctl_header;
struct ctl_table_header *helper_sysctl_header;
#endif
- char *slabname;
unsigned int sysctl_log_invalid; /* Log invalid packets */
int sysctl_events;
int sysctl_acct;
@@ -93,7 +92,6 @@ struct netns_ct {
int sysctl_tstamp;
int sysctl_checksum;
- struct kmem_cache *nf_conntrack_cachep;
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f58a704..0cd2936 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,6 +72,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly bool nf_conntrack_locks_all;
@@ -910,7 +911,7 @@ __nf_conntrack_alloc(struct net *net,
* Do not use kmem_cache_zalloc(), as this cache uses
* SLAB_DESTROY_BY_RCU.
*/
- ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
+ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
if (ct == NULL)
goto out;
@@ -937,7 +938,7 @@ __nf_conntrack_alloc(struct net *net,
atomic_set(&ct->ct_general.use, 0);
return ct;
out_free:
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
out:
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
@@ -964,7 +965,7 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
}
@@ -1587,8 +1588,6 @@ i_see_dead_people:
nf_conntrack_tstamp_pernet_fini(net);
nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
- kfree(net->ct.slabname);
free_percpu(net->ct.stat);
free_percpu(net->ct.pcpu_lists);
}
@@ -1693,7 +1692,8 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
int nf_conntrack_init_start(void)
{
int max_factor = 8;
- int i, ret, cpu;
+ int ret = -ENOMEM;
+ int i, cpu;
seqcount_init(&nf_conntrack_generation);
@@ -1729,6 +1729,12 @@ int nf_conntrack_init_start(void)
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+ nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+ sizeof(struct nf_conn), 0,
+ SLAB_DESTROY_BY_RCU, NULL);
+ if (!nf_conntrack_cachep)
+ goto err_cachep;
+
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
nf_conntrack_max);
@@ -1805,6 +1811,8 @@ err_tstamp:
err_acct:
nf_conntrack_expect_fini();
err_expect:
+ kmem_cache_destroy(nf_conntrack_cachep);
+err_cachep:
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
return ret;
}
@@ -1846,18 +1854,6 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
- if (!net->ct.slabname)
- goto err_slabname;
-
- net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
- sizeof(struct nf_conn), 0,
- SLAB_DESTROY_BY_RCU, NULL);
- if (!net->ct.nf_conntrack_cachep) {
- printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- goto err_cache;
- }
-
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
goto err_expect;
@@ -1889,10 +1885,6 @@ err_tstamp:
err_acct:
nf_conntrack_expect_pernet_fini(net);
err_expect:
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-err_cache:
- kfree(net->ct.slabname);
-err_slabname:
free_percpu(net->ct.stat);
err_pcpu_lists:
free_percpu(net->ct.pcpu_lists);
--
2.7.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3
2016-05-09 14:24 [PATCH nf-next 0/3] netfilter: remove per-netns conntrack tables, part 3 Florian Westphal
` (2 preceding siblings ...)
2016-05-09 14:24 ` [PATCH nf-next 3/3] netfilter: conntrack: use single slab cache Florian Westphal
@ 2016-05-09 14:45 ` Pablo Neira Ayuso
3 siblings, 0 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2016-05-09 14:45 UTC (permalink / raw)
To: Florian Westphal; +Cc: netfilter-devel
On Mon, May 09, 2016 at 04:24:29PM +0200, Florian Westphal wrote:
> This series removes the per-namespace duplication of the NAT bysource table.
[...]
> I have another patch queued to convert nat bysource table to an rhashtable.
> However; it seems better to wait until the next merge window so that bug
> reports can be more easily pointed to the de-duplication work or a rhashtable
> conversion, I will therefore hold this back for now.
Agreed, thanks Florian.
Series applied.
Will be sending a batch with pending nf-next updates today, including
these three patches.
^ permalink raw reply [flat|nested] 5+ messages in thread