* [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
@ 2010-08-20 22:49 Changli Gao
2010-09-16 6:18 ` Patrick McHardy
0 siblings, 1 reply; 6+ messages in thread
From: Changli Gao @ 2010-08-20 22:49 UTC (permalink / raw)
To: Patrick McHardy
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel, Changli Gao
Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.
__hash_conntrack() is split into two steps: ____hash_conntrack() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.
In SYN-flood case, early_drop() doesn't need to recompute the hash again.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
v5: respin it due to the change of 1/2.
v4: init rnd when allocating conntrack.
v3: define static variable rnd out of the function ____hash_conntrack(),
and call get_random_bytes() until we get a non-zero random int.
v2: use cmpxchg() to save 2 variables.
net/netfilter/nf_conntrack_core.c | 104 +++++++++++++++++++++++++++-----------
1 file changed, 75 insertions(+), 29 deletions(-)
net/netfilter/nf_conntrack_core.c | 104 +++++++++++++++++++++++++++-----------
1 file changed, 75 insertions(+), 29 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4c0ad9b..1087161 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,8 +67,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
static unsigned int nf_conntrack_hash_rnd __read_mostly;
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size, unsigned int rnd)
+static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
{
unsigned int n;
u_int32_t h;
@@ -78,18 +77,33 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
* three bytes manually.
*/
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- h = jhash2((u32 *)tuple, n,
- zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
- tuple->dst.protonum));
+ h = jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+ (((__force __u16)tuple->dst.u.all << 16) |
+ tuple->dst.protonum));
+
+ return h;
+}
+
+static u32 __hash_bucket(u32 __hash, unsigned int size)
+{
+ return ((u64)__hash * size) >> 32;
+}
+
+static u32 hash_bucket(u32 __hash, const struct net *net)
+{
+ return __hash_bucket(__hash, net->ct.htable_size);
+}
- return ((u64)h * size) >> 32;
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+ u16 zone, unsigned int size)
+{
+ return __hash_bucket(____hash_conntrack(tuple, zone), size);
}
static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size,
- nf_conntrack_hash_rnd);
+ return __hash_conntrack(tuple, zone, net->ct.htable_size);
}
bool
@@ -291,13 +305,13 @@ static void death_by_timeout(unsigned long ul_conntrack)
* OR
* - Caller must lock nf_conntrack_lock before calling this function
*/
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int hash = hash_bucket(__hash, net);
/* Disable BHs the entire time since we normally need to disable them
* at least once for the stats anyway.
@@ -326,19 +340,27 @@ begin:
return NULL;
}
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return ____nf_conntrack_find(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
-struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
rcu_read_lock();
begin:
- h = __nf_conntrack_find(net, zone, tuple);
+ h = ____nf_conntrack_find(net, zone, tuple, __hash);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (unlikely(nf_ct_is_dying(ct) ||
@@ -356,6 +378,14 @@ begin:
return h;
}
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return __nf_conntrack_find_get(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -408,7 +438,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ /* reuse the __hash saved before */
+ hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
@@ -566,10 +597,11 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_tuple *repl,
- gfp_t gfp)
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp, u32 __hash)
{
struct nf_conn *ct;
@@ -585,6 +617,9 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
get_random_bytes(&rand, sizeof(rand));
} while (!rand);
cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+
+ /* recompute the hash as nf_conntrack_hash_rnd is initialized */
+ __hash = ____hash_conntrack(orig, zone);
}
/* We don't want any race condition at early drop stage */
@@ -592,7 +627,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- unsigned int hash = hash_conntrack(net, zone, orig);
+ unsigned int hash = hash_bucket(__hash, net);
if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
@@ -623,7 +658,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
+ /* save __hash for reusing when confirming */
+ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = __hash;
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
@@ -650,6 +686,14 @@ out_free:
return ERR_PTR(-ENOMEM);
#endif
}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp)
+{
+ return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *ct)
@@ -671,7 +715,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, u32 __hash)
{
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -685,7 +729,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
- ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
+ ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+ __hash);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
@@ -762,6 +807,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ u32 __hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
@@ -771,10 +817,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- h = nf_conntrack_find_get(net, zone, &tuple);
+ __hash = ____hash_conntrack(&tuple, zone);
+ h = __nf_conntrack_find_get(net, zone, &tuple, __hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
- skb, dataoff);
+ skb, dataoff, __hash);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -1314,8 +1361,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize,
- nf_conntrack_hash_rnd);
+ hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-20 22:49 [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
@ 2010-09-16 6:18 ` Patrick McHardy
2010-09-20 15:04 ` Changli Gao
0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2010-09-16 6:18 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel
On 21.08.2010 00:49, Changli Gao wrote:
> Since we don't change the tuple in the original direction, we can save it
> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
> use.
I like this idea. We could actually do the same for the reply tuple
and invalidate the saved hash in case the reply tuple is changed
(nf_conntrack_alter_reply()), which only happens when NAT is used.
> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
> to get the raw hash, and __hash_bucket() is used to get the bucket id.
This patch uses underscores a bit excessively, how about renaming:
- ____hash_conntrack() => hash_conntrack_raw()
- __hash variables => hash
- hash variables => bucket
> @@ -408,7 +438,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
> return NF_ACCEPT;
>
> zone = nf_ct_zone(ct);
> - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
> + /* reuse the __hash saved before */
> + hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
Please try to stay at least close to the 80 characters limit.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
2010-09-16 6:18 ` Patrick McHardy
@ 2010-09-20 15:04 ` Changli Gao
2010-09-20 17:08 ` Patrick McHardy
0 siblings, 1 reply; 6+ messages in thread
From: Changli Gao @ 2010-09-20 15:04 UTC (permalink / raw)
To: Patrick McHardy
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel
On Thu, Sep 16, 2010 at 2:18 PM, Patrick McHardy <kaber@trash.net> wrote:
> On 21.08.2010 00:49, Changli Gao wrote:
>> Since we don't change the tuple in the original direction, we can save it
>> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
>> use.
>
> I like this idea. We could actually do the same for the reply tuple
> and invalidate the saved hash in case the reply tuple is changed
> (nf_conntrack_alter_reply()), which only happens when NAT is used.
>
We can't do that, as the unconfirmed ct owned maybe dropped, and
pre-computing will wast CPU cycles in this case.
>> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
>> to get the raw hash, and __hash_bucket() is used to get the bucket id.
>
> This patch uses underscores a bit excessively, how about renaming:
>
> - ____hash_conntrack() => hash_conntrack_raw()
> - __hash variables => hash
> - hash variables => bucket
OK. Thanks.
>
>> @@ -408,7 +438,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
>> return NF_ACCEPT;
>>
>> zone = nf_ct_zone(ct);
>> - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
>> + /* reuse the __hash saved before */
>> + hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
>
> Please try to stay at least close to the 80 characters limit.
>
OK. Thanks.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
2010-09-20 15:04 ` Changli Gao
@ 2010-09-20 17:08 ` Patrick McHardy
2010-09-21 0:02 ` Changli Gao
0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2010-09-20 17:08 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel
On 20.09.2010 17:04, Changli Gao wrote:
> On Thu, Sep 16, 2010 at 2:18 PM, Patrick McHardy <kaber@trash.net> wrote:
>> On 21.08.2010 00:49, Changli Gao wrote:
>>> Since we don't change the tuple in the original direction, we can save it
>>> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
>>> use.
>>
>> I like this idea. We could actually do the same for the reply tuple
>> and invalidate the saved hash in case the reply tuple is changed
>> (nf_conntrack_alter_reply()), which only happens when NAT is used.
>>
>
> We can't do that, as the unconfirmed ct owned maybe dropped, and
> pre-computing will wast CPU cycles in this case.
Sure we can, dropping unconfirmed conntracks is a rare exception,
not a common case. Even under DoS we usually drop *unassured*
conntracks, which have already enterered the hash. If we're unable
to do that, we won't even allocate a new conntrack.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
2010-09-20 17:08 ` Patrick McHardy
@ 2010-09-21 0:02 ` Changli Gao
2010-09-21 15:32 ` Patrick McHardy
0 siblings, 1 reply; 6+ messages in thread
From: Changli Gao @ 2010-09-21 0:02 UTC (permalink / raw)
To: Patrick McHardy
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel
On Tue, Sep 21, 2010 at 1:08 AM, Patrick McHardy <kaber@trash.net> wrote:
>
> Sure we can, dropping unconfirmed conntracks is a rare exception,
> not a common case. Even under DoS we usually drop *unassured*
> conntracks, which have already enterered the hash. If we're unable
> to do that, we won't even allocate a new conntrack.
>
Even so, saving the hash of the reply tuple isn't a good idea.
If NAT is turned on, the current code is:
mangle the reply tuple -> compute the hash of the reply tuple ->
insert into the conntrack hash table.
the new code is
compute the hash of the reply tuple -> mangle the reply tuple ->
recompute the hash of the reply tuple -> insert into the conntrack
hash table.
As you see, the hash computing is done twice, and we use more CPU
cycles than before.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use
2010-09-21 0:02 ` Changli Gao
@ 2010-09-21 15:32 ` Patrick McHardy
0 siblings, 0 replies; 6+ messages in thread
From: Patrick McHardy @ 2010-09-21 15:32 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Eric Dumazet, Mathieu Desnoyers, akpm,
netfilter-devel, netdev, linux-kernel
Am 21.09.2010 02:02, schrieb Changli Gao:
> On Tue, Sep 21, 2010 at 1:08 AM, Patrick McHardy <kaber@trash.net> wrote:
>>
>> Sure we can, dropping unconfirmed conntracks is a rare exception,
>> not a common case. Even under DoS we usually drop *unassured*
>> conntracks, which have already enterered the hash. If we're unable
>> to do that, we won't even allocate a new conntrack.
>>
>
> Even so, saving the hash of the reply tuple isn't a good idea.
>
> If NAT is turned on, the current code is:
>
> mangle the reply tuple -> compute the hash of the reply tuple ->
> insert into the conntrack hash table.
>
> the new code is
>
> compute the hash of the reply tuple -> mangle the reply tuple ->
> recompute the hash of the reply tuple -> insert into the conntrack
> hash table.
>
> As you see, the hash computing is done twice, and we use more CPU
> cycles than before.
You're right of course, we actually don't compute the reply hash
before inserting the conntrack into the hash table (except in a
few NAT cases, but we can look at those later).
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2010-09-21 15:32 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-20 22:49 [PATCH v5 2/2] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
2010-09-16 6:18 ` Patrick McHardy
2010-09-20 15:04 ` Changli Gao
2010-09-20 17:08 ` Patrick McHardy
2010-09-21 0:02 ` Changli Gao
2010-09-21 15:32 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).