* [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
@ 2010-08-19 14:33 Changli Gao
2010-08-19 15:21 ` Mathieu Desnoyers
0 siblings, 1 reply; 8+ messages in thread
From: Changli Gao @ 2010-08-19 14:33 UTC (permalink / raw)
To: Patrick McHardy
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, netfilter-devel,
netdev, Changli Gao
Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.
__hash_conntrack() is split into two steps: ____hash_conntrack() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.
In SYN-flood case, early_drop() doesn't need to recompute the hash again.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
v2: use cmpxchg() to save 2 variables.
net/netfilter/nf_conntrack_core.c | 114 ++++++++++++++++++++++++++------------
1 file changed, 79 insertions(+), 35 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df3eedb..0e02205 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,14 +65,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
-static int nf_conntrack_hash_rnd_initted;
-static unsigned int nf_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size, unsigned int rnd)
+static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
{
unsigned int n;
u_int32_t h;
+ static unsigned int rnd;
+
+ if (unlikely(!rnd)) {
+ unsigned int rand;
+
+ get_random_bytes(&rand, sizeof(rand));
+ if (!rand)
+ rand = 1;
+ cmpxchg(&rnd, 0, rand);
+ }
/* The direction must be ignored, so we hash everything up to the
* destination ports (which is a multiple of 4) and treat the last
@@ -83,14 +89,29 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
tuple->dst.protonum));
- return ((u64)h * size) >> 32;
+ return h;
+}
+
+static u32 __hash_bucket(u32 __hash, unsigned int size)
+{
+ return ((u64)__hash * size) >> 32;
+}
+
+static u32 hash_bucket(u32 __hash, const struct net *net)
+{
+ return __hash_bucket(__hash, net->ct.htable_size);
+}
+
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+ u16 zone, unsigned int size)
+{
+ return __hash_bucket(____hash_conntrack(tuple, zone), size);
}
static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size,
- nf_conntrack_hash_rnd);
+ return __hash_conntrack(tuple, zone, net->ct.htable_size);
}
bool
@@ -292,13 +313,13 @@ static void death_by_timeout(unsigned long ul_conntrack)
* OR
* - Caller must lock nf_conntrack_lock before calling this function
*/
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int hash = hash_bucket(__hash, net);
/* Disable BHs the entire time since we normally need to disable them
* at least once for the stats anyway.
@@ -327,19 +348,27 @@ begin:
return NULL;
}
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return ____nf_conntrack_find(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
-struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
rcu_read_lock();
begin:
- h = __nf_conntrack_find(net, zone, tuple);
+ h = ____nf_conntrack_find(net, zone, tuple, __hash);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (unlikely(nf_ct_is_dying(ct) ||
@@ -357,6 +386,14 @@ begin:
return h;
}
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return __nf_conntrack_find_get(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -409,7 +446,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ /* reuse the __hash saved before */
+ hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
@@ -567,25 +605,20 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_tuple *repl,
- gfp_t gfp)
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp, u32 __hash)
{
struct nf_conn *ct;
- if (unlikely(!nf_conntrack_hash_rnd_initted)) {
- get_random_bytes(&nf_conntrack_hash_rnd,
- sizeof(nf_conntrack_hash_rnd));
- nf_conntrack_hash_rnd_initted = 1;
- }
-
/* We don't want any race condition at early drop stage */
atomic_inc(&net->ct.count);
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- unsigned int hash = hash_conntrack(net, zone, orig);
+ unsigned int hash = hash_bucket(__hash, net);
if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
@@ -616,7 +649,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
+ /* save __hash for reusing when confirming */
+ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = __hash;
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
@@ -643,6 +677,14 @@ out_free:
return ERR_PTR(-ENOMEM);
#endif
}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp)
+{
+ return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *ct)
@@ -664,7 +706,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, u32 __hash)
{
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -678,7 +720,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
- ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
+ ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+ __hash);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
@@ -755,6 +798,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ u32 __hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
@@ -764,10 +808,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- h = nf_conntrack_find_get(net, zone, &tuple);
+ __hash = ____hash_conntrack(&tuple, zone);
+ h = __nf_conntrack_find_get(net, zone, &tuple, __hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
- skb, dataoff);
+ skb, dataoff, __hash);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -1307,8 +1352,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize,
- nf_conntrack_hash_rnd);
+ hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 14:33 [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
@ 2010-08-19 15:21 ` Mathieu Desnoyers
2010-08-19 15:35 ` Changli Gao
0 siblings, 1 reply; 8+ messages in thread
From: Mathieu Desnoyers @ 2010-08-19 15:21 UTC (permalink / raw)
To: Changli Gao
Cc: Patrick McHardy, David S. Miller, Eric Dumazet, netfilter-devel,
netdev
* Changli Gao (xiaosuo@gmail.com) wrote:
> Since we don't change the tuple in the original direction, we can save it
> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
> use.
>
> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
> to get the raw hash, and __hash_bucket() is used to get the bucket id.
>
> In SYN-flood case, early_drop() doesn't need to recompute the hash again.
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
> v2: use cmpxchg() to save 2 variables.
> net/netfilter/nf_conntrack_core.c | 114 ++++++++++++++++++++++++++------------
> 1 file changed, 79 insertions(+), 35 deletions(-)
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index df3eedb..0e02205 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -65,14 +65,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
> DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
> EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
>
> -static int nf_conntrack_hash_rnd_initted;
> -static unsigned int nf_conntrack_hash_rnd;
> -
> -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> - u16 zone, unsigned int size, unsigned int rnd)
> +static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
> {
> unsigned int n;
> u_int32_t h;
> + static unsigned int rnd;
Why are you putting a hidden static variable here ? It should probably
be declared outside of the function scope.
> +
> + if (unlikely(!rnd)) {
> + unsigned int rand;
> +
> + get_random_bytes(&rand, sizeof(rand));
> + if (!rand)
> + rand = 1;
> + cmpxchg(&rnd, 0, rand);
This really belongs to a "init()" function, not in a dynamic check in
the __hash_conntrack hot path. If you do that a init time, then you
don't even need the cmpxchg.
Or maybe I completely misunderstand you goal here; maybe you are really
trying to re-calculate random bytes. But more explanation is called for,
because I really don't see where the value is brought back to 0.
Thanks,
Mathieu
> + }
>
> /* The direction must be ignored, so we hash everything up to the
> * destination ports (which is a multiple of 4) and treat the last
> @@ -83,14 +89,29 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
> tuple->dst.protonum));
>
> - return ((u64)h * size) >> 32;
> + return h;
> +}
> +
> +static u32 __hash_bucket(u32 __hash, unsigned int size)
> +{
> + return ((u64)__hash * size) >> 32;
> +}
> +
> +static u32 hash_bucket(u32 __hash, const struct net *net)
> +{
> + return __hash_bucket(__hash, net->ct.htable_size);
> +}
> +
> +static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> + u16 zone, unsigned int size)
> +{
> + return __hash_bucket(____hash_conntrack(tuple, zone), size);
> }
>
> static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
> const struct nf_conntrack_tuple *tuple)
> {
> - return __hash_conntrack(tuple, zone, net->ct.htable_size,
> - nf_conntrack_hash_rnd);
> + return __hash_conntrack(tuple, zone, net->ct.htable_size);
> }
>
> bool
> @@ -292,13 +313,13 @@ static void death_by_timeout(unsigned long ul_conntrack)
> * OR
> * - Caller must lock nf_conntrack_lock before calling this function
> */
> -struct nf_conntrack_tuple_hash *
> -__nf_conntrack_find(struct net *net, u16 zone,
> - const struct nf_conntrack_tuple *tuple)
> +static struct nf_conntrack_tuple_hash *
> +____nf_conntrack_find(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *tuple, u32 __hash)
> {
> struct nf_conntrack_tuple_hash *h;
> struct hlist_nulls_node *n;
> - unsigned int hash = hash_conntrack(net, zone, tuple);
> + unsigned int hash = hash_bucket(__hash, net);
>
> /* Disable BHs the entire time since we normally need to disable them
> * at least once for the stats anyway.
> @@ -327,19 +348,27 @@ begin:
>
> return NULL;
> }
> +
> +struct nf_conntrack_tuple_hash *
> +__nf_conntrack_find(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *tuple)
> +{
> + return ____nf_conntrack_find(net, zone, tuple,
> + ____hash_conntrack(tuple, zone));
> +}
> EXPORT_SYMBOL_GPL(__nf_conntrack_find);
>
> /* Find a connection corresponding to a tuple. */
> -struct nf_conntrack_tuple_hash *
> -nf_conntrack_find_get(struct net *net, u16 zone,
> - const struct nf_conntrack_tuple *tuple)
> +static struct nf_conntrack_tuple_hash *
> +__nf_conntrack_find_get(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *tuple, u32 __hash)
> {
> struct nf_conntrack_tuple_hash *h;
> struct nf_conn *ct;
>
> rcu_read_lock();
> begin:
> - h = __nf_conntrack_find(net, zone, tuple);
> + h = ____nf_conntrack_find(net, zone, tuple, __hash);
> if (h) {
> ct = nf_ct_tuplehash_to_ctrack(h);
> if (unlikely(nf_ct_is_dying(ct) ||
> @@ -357,6 +386,14 @@ begin:
>
> return h;
> }
> +
> +struct nf_conntrack_tuple_hash *
> +nf_conntrack_find_get(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *tuple)
> +{
> + return __nf_conntrack_find_get(net, zone, tuple,
> + ____hash_conntrack(tuple, zone));
> +}
> EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
>
> static void __nf_conntrack_hash_insert(struct nf_conn *ct,
> @@ -409,7 +446,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
> return NF_ACCEPT;
>
> zone = nf_ct_zone(ct);
> - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
> + /* reuse the __hash saved before */
> + hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
> repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
>
> /* We're not in hash table, and we refuse to set up related
> @@ -567,25 +605,20 @@ static noinline int early_drop(struct net *net, unsigned int hash)
> return dropped;
> }
>
> -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
> - const struct nf_conntrack_tuple *orig,
> - const struct nf_conntrack_tuple *repl,
> - gfp_t gfp)
> +static struct nf_conn *
> +__nf_conntrack_alloc(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *orig,
> + const struct nf_conntrack_tuple *repl,
> + gfp_t gfp, u32 __hash)
> {
> struct nf_conn *ct;
>
> - if (unlikely(!nf_conntrack_hash_rnd_initted)) {
> - get_random_bytes(&nf_conntrack_hash_rnd,
> - sizeof(nf_conntrack_hash_rnd));
> - nf_conntrack_hash_rnd_initted = 1;
> - }
> -
> /* We don't want any race condition at early drop stage */
> atomic_inc(&net->ct.count);
>
> if (nf_conntrack_max &&
> unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
> - unsigned int hash = hash_conntrack(net, zone, orig);
> + unsigned int hash = hash_bucket(__hash, net);
> if (!early_drop(net, hash)) {
> atomic_dec(&net->ct.count);
> if (net_ratelimit())
> @@ -616,7 +649,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
> ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
> ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
> ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
> - ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
> + /* save __hash for reusing when confirming */
> + *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = __hash;
> /* Don't set timer yet: wait for confirmation */
> setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
> write_pnet(&ct->ct_net, net);
> @@ -643,6 +677,14 @@ out_free:
> return ERR_PTR(-ENOMEM);
> #endif
> }
> +
> +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
> + const struct nf_conntrack_tuple *orig,
> + const struct nf_conntrack_tuple *repl,
> + gfp_t gfp)
> +{
> + return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
> +}
> EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
>
> void nf_conntrack_free(struct nf_conn *ct)
> @@ -664,7 +706,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
> struct nf_conntrack_l3proto *l3proto,
> struct nf_conntrack_l4proto *l4proto,
> struct sk_buff *skb,
> - unsigned int dataoff)
> + unsigned int dataoff, u32 __hash)
> {
> struct nf_conn *ct;
> struct nf_conn_help *help;
> @@ -678,7 +720,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
> return NULL;
> }
>
> - ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
> + ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
> + __hash);
> if (IS_ERR(ct)) {
> pr_debug("Can't allocate conntrack.\n");
> return (struct nf_conntrack_tuple_hash *)ct;
> @@ -755,6 +798,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
> struct nf_conntrack_tuple_hash *h;
> struct nf_conn *ct;
> u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
> + u32 __hash;
>
> if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
> dataoff, l3num, protonum, &tuple, l3proto,
> @@ -764,10 +808,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
> }
>
> /* look for tuple match */
> - h = nf_conntrack_find_get(net, zone, &tuple);
> + __hash = ____hash_conntrack(&tuple, zone);
> + h = __nf_conntrack_find_get(net, zone, &tuple, __hash);
> if (!h) {
> h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
> - skb, dataoff);
> + skb, dataoff, __hash);
> if (!h)
> return NULL;
> if (IS_ERR(h))
> @@ -1307,8 +1352,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
> ct = nf_ct_tuplehash_to_ctrack(h);
> hlist_nulls_del_rcu(&h->hnnode);
> bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
> - hashsize,
> - nf_conntrack_hash_rnd);
> + hashsize);
> hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
> }
> }
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 15:21 ` Mathieu Desnoyers
@ 2010-08-19 15:35 ` Changli Gao
2010-08-19 15:41 ` yao zhao
2010-08-19 15:46 ` Eric Dumazet
0 siblings, 2 replies; 8+ messages in thread
From: Changli Gao @ 2010-08-19 15:35 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Patrick McHardy, David S. Miller, Eric Dumazet, netfilter-devel,
netdev
On Thu, Aug 19, 2010 at 11:21 PM, Mathieu Desnoyers
<mathieu.desnoyers@polymtl.ca> wrote:
> * Changli Gao (xiaosuo@gmail.com) wrote:
>> Since we don't change the tuple in the original direction, we can save it
>> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
>> use.
>>
>> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
>> to get the raw hash, and __hash_bucket() is used to get the bucket id.
>>
>> In SYN-flood case, early_drop() doesn't need to recompute the hash again.
>>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ---
>> v2: use cmpxchg() to save 2 variables.
>> net/netfilter/nf_conntrack_core.c | 114 ++++++++++++++++++++++++++------------
>> 1 file changed, 79 insertions(+), 35 deletions(-)
>> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
>> index df3eedb..0e02205 100644
>> --- a/net/netfilter/nf_conntrack_core.c
>> +++ b/net/netfilter/nf_conntrack_core.c
>> @@ -65,14 +65,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
>> DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
>> EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
>>
>> -static int nf_conntrack_hash_rnd_initted;
>> -static unsigned int nf_conntrack_hash_rnd;
>> -
>> -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
>> - u16 zone, unsigned int size, unsigned int rnd)
>> +static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
>> {
>> unsigned int n;
>> u_int32_t h;
>> + static unsigned int rnd;
>
> Why are you putting a hidden static variable here ? It should probably
> be declared outside of the function scope.
It is only used in this function.
>
>> +
>> + if (unlikely(!rnd)) {
>> + unsigned int rand;
>> +
>> + get_random_bytes(&rand, sizeof(rand));
>> + if (!rand)
>> + rand = 1;
>> + cmpxchg(&rnd, 0, rand);
>
> This really belongs to a "init()" function, not in a dynamic check in
> the __hash_conntrack hot path. If you do that a init time, then you
> don't even need the cmpxchg.
>
> Or maybe I completely misunderstand you goal here; maybe you are really
> trying to re-calculate random bytes. But more explanation is called for,
> because I really don't see where the value is brought back to 0.
>
In fact, I don't know the reason clearly. This code is derived from
the older one. Maybe there isn't enough entropy when initializing.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 15:35 ` Changli Gao
@ 2010-08-19 15:41 ` yao zhao
2010-08-19 15:46 ` Eric Dumazet
1 sibling, 0 replies; 8+ messages in thread
From: yao zhao @ 2010-08-19 15:41 UTC (permalink / raw)
To: Changli Gao
Cc: Mathieu Desnoyers, Patrick McHardy, David S. Miller, Eric Dumazet,
netfilter-devel, netdev
On Thu, Aug 19, 2010 at 11:35 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> On Thu, Aug 19, 2010 at 11:21 PM, Mathieu Desnoyers
> <mathieu.desnoyers@polymtl.ca> wrote:
>> * Changli Gao (xiaosuo@gmail.com) wrote:
>>> Since we don't change the tuple in the original direction, we can save it
>>> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
>>> use.
>>>
>>> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
>>> to get the raw hash, and __hash_bucket() is used to get the bucket id.
>>>
>>> In SYN-flood case, early_drop() doesn't need to recompute the hash again.
>>>
>>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>>> ---
>>> v2: use cmpxchg() to save 2 variables.
>>> net/netfilter/nf_conntrack_core.c | 114 ++++++++++++++++++++++++++------------
>>> 1 file changed, 79 insertions(+), 35 deletions(-)
>>> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
>>> index df3eedb..0e02205 100644
>>> --- a/net/netfilter/nf_conntrack_core.c
>>> +++ b/net/netfilter/nf_conntrack_core.c
>>> @@ -65,14 +65,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
>>> DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
>>> EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
>>>
>>> -static int nf_conntrack_hash_rnd_initted;
>>> -static unsigned int nf_conntrack_hash_rnd;
>>> -
>>> -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
>>> - u16 zone, unsigned int size, unsigned int rnd)
>>> +static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
>>> {
>>> unsigned int n;
>>> u_int32_t h;
>>> + static unsigned int rnd;
>>
>> Why are you putting a hidden static variable here ? It should probably
>> be declared outside of the function scope.
>
> It is only used in this function.
Looks like it is only initialized once to rnd then it should go a "init".
>
>>
>>> +
>>> + if (unlikely(!rnd)) {
>>> + unsigned int rand;
>>> +
>>> + get_random_bytes(&rand, sizeof(rand));
>>> + if (!rand)
>>> + rand = 1;
>>> + cmpxchg(&rnd, 0, rand);
>>
>> This really belongs to a "init()" function, not in a dynamic check in
>> the __hash_conntrack hot path. If you do that a init time, then you
>> don't even need the cmpxchg.
>>
>> Or maybe I completely misunderstand you goal here; maybe you are really
>> trying to re-calculate random bytes. But more explanation is called for,
>> because I really don't see where the value is brought back to 0.
>>
>
> In fact, I don't know the reason clearly. This code is derived from
> the older one. Maybe there isn't enough entropy when initializing.
>
> --
> Regards,
> Changli Gao(xiaosuo@gmail.com)
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
yao
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 15:35 ` Changli Gao
2010-08-19 15:41 ` yao zhao
@ 2010-08-19 15:46 ` Eric Dumazet
2010-08-19 16:11 ` Mathieu Desnoyers
1 sibling, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2010-08-19 15:46 UTC (permalink / raw)
To: Changli Gao
Cc: Mathieu Desnoyers, Patrick McHardy, David S. Miller,
netfilter-devel, netdev
Le jeudi 19 août 2010 à 23:35 +0800, Changli Gao a écrit :
>
> In fact, I don't know the reason clearly. This code is derived from
> the older one. Maybe there isn't enough entropy when initializing.
>
Yes, we wanted to initialize these random data as late as possible, that
is when the first entry is added in conntrack hash.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 15:46 ` Eric Dumazet
@ 2010-08-19 16:11 ` Mathieu Desnoyers
2010-08-19 23:10 ` Changli Gao
0 siblings, 1 reply; 8+ messages in thread
From: Mathieu Desnoyers @ 2010-08-19 16:11 UTC (permalink / raw)
To: Eric Dumazet
Cc: Changli Gao, Patrick McHardy, David S. Miller, netfilter-devel,
netdev
* Eric Dumazet (eric.dumazet@gmail.com) wrote:
> Le jeudi 19 août 2010 à 23:35 +0800, Changli Gao a écrit :
>
> >
> > In fact, I don't know the reason clearly. This code is derived from
> > the older one. Maybe there isn't enough entropy when initializing.
> >
>
> Yes, we wanted to initialize these random data as late as possible, that
> is when the first entry is added in conntrack hash.
Ah, I see. But I think the static variable should stay declared outside
of the function scope, with a nice comment explaining why it's not
initialized at init-time.
Hiding global state in function code is usually frowned upon.
Thanks,
Mathieu
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 16:11 ` Mathieu Desnoyers
@ 2010-08-19 23:10 ` Changli Gao
2010-08-20 13:43 ` Mathieu Desnoyers
0 siblings, 1 reply; 8+ messages in thread
From: Changli Gao @ 2010-08-19 23:10 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Eric Dumazet, Patrick McHardy, David S. Miller, netfilter-devel,
netdev
On Fri, Aug 20, 2010 at 12:11 AM, Mathieu Desnoyers
<mathieu.desnoyers@polymtl.ca> wrote:
>
> Ah, I see. But I think the static variable should stay declared outside
> of the function scope, with a nice comment explaining why it's not
> initialized at init-time.
>
> Hiding global state in function code is usually frowned upon.
>
I don't agree with you. We'd better not expose the variable which
isn't expected to be used by others. If not, maybe someone will misuse
it. The user should only reply on the interface, but not the internal
implementation.
Thanks.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 23:10 ` Changli Gao
@ 2010-08-20 13:43 ` Mathieu Desnoyers
0 siblings, 0 replies; 8+ messages in thread
From: Mathieu Desnoyers @ 2010-08-20 13:43 UTC (permalink / raw)
To: Changli Gao
Cc: Eric Dumazet, Patrick McHardy, David S. Miller, netfilter-devel,
netdev, akpm, linux-kernel
[ executive summary: trying to explain why static variable declarations
within the body of functions is bad in kernel code. ]
* Changli Gao (xiaosuo@gmail.com) wrote:
> On Fri, Aug 20, 2010 at 12:11 AM, Mathieu Desnoyers
> <mathieu.desnoyers@polymtl.ca> wrote:
> >
> > Ah, I see. But I think the static variable should stay declared outside
> > of the function scope, with a nice comment explaining why it's not
> > initialized at init-time.
> >
> > Hiding global state in function code is usually frowned upon.
> >
>
> I don't agree with you. We'd better not expose the variable which
> isn't expected to be used by others. If not, maybe someone will misuse
> it.
We are talking about a static variable usable only within a single file,
where is the problem ? A nice comment can have this effect.
> The user should only reply on the interface, but not the internal
> implementation.
I'm talking about good practice to help code review. This is way more
important than the potential "encapsulation" benefit of hiding global
state (a static variable) within the body of a function.
I remember that Andrew Morton has strong opinions about this, and I
remember being in complete agreement with him on the topic.
Thanks,
Mathieu
>
> Thanks.
>
> --
> Regards,
> Changli Gao(xiaosuo@gmail.com)
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2010-08-20 13:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-19 14:33 [PATCH v2] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
2010-08-19 15:21 ` Mathieu Desnoyers
2010-08-19 15:35 ` Changli Gao
2010-08-19 15:41 ` yao zhao
2010-08-19 15:46 ` Eric Dumazet
2010-08-19 16:11 ` Mathieu Desnoyers
2010-08-19 23:10 ` Changli Gao
2010-08-20 13:43 ` Mathieu Desnoyers
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).