From: kaber@trash.net
To: davem@davemloft.net
Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH 09/72] netfilter: save the hash of the tuple in the original direction for latter use
Date: Thu, 21 Oct 2010 17:18:56 +0200 [thread overview]
Message-ID: <1287674399-31455-10-git-send-email-kaber@trash.net> (raw)
In-Reply-To: <1287674399-31455-1-git-send-email-kaber@trash.net>
From: Changli Gao <xiaosuo@gmail.com>
Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.
__hash_conntrack() is split into two steps: hash_conntrack_raw() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.
In SYN-flood case, early_drop() doesn't need to recompute the hash again.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/nf_conntrack_core.c | 112 +++++++++++++++++++++++++-----------
1 files changed, 78 insertions(+), 34 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4c0ad9b..1eacf8d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,29 +67,40 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
static unsigned int nf_conntrack_hash_rnd __read_mostly;
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size, unsigned int rnd)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
{
unsigned int n;
- u_int32_t h;
/* The direction must be ignored, so we hash everything up to the
* destination ports (which is a multiple of 4) and treat the last
* three bytes manually.
*/
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- h = jhash2((u32 *)tuple, n,
- zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
- tuple->dst.protonum));
+ return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+ (((__force __u16)tuple->dst.u.all << 16) |
+ tuple->dst.protonum));
+}
+
+static u32 __hash_bucket(u32 hash, unsigned int size)
+{
+ return ((u64)hash * size) >> 32;
+}
+
+static u32 hash_bucket(u32 hash, const struct net *net)
+{
+ return __hash_bucket(hash, net->ct.htable_size);
+}
- return ((u64)h * size) >> 32;
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+ u16 zone, unsigned int size)
+{
+ return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
}
static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size,
- nf_conntrack_hash_rnd);
+ return __hash_conntrack(tuple, zone, net->ct.htable_size);
}
bool
@@ -291,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
* OR
* - Caller must lock nf_conntrack_lock before calling this function
*/
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int bucket = hash_bucket(hash, net);
/* Disable BHs the entire time since we normally need to disable them
* at least once for the stats anyway.
*/
local_bh_disable();
begin:
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
+ hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
if (nf_ct_tuple_equal(tuple, &h->tuple) &&
nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
NF_CT_STAT_INC(net, found);
@@ -318,7 +329,7 @@ begin:
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
- if (get_nulls_value(n) != hash) {
+ if (get_nulls_value(n) != bucket) {
NF_CT_STAT_INC(net, search_restart);
goto begin;
}
@@ -326,19 +337,27 @@ begin:
return NULL;
}
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return ____nf_conntrack_find(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone));
+}
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
-struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
rcu_read_lock();
begin:
- h = __nf_conntrack_find(net, zone, tuple);
+ h = ____nf_conntrack_find(net, zone, tuple, hash);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (unlikely(nf_ct_is_dying(ct) ||
@@ -356,6 +375,14 @@ begin:
return h;
}
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone));
+}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -408,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ /* reuse the hash saved before */
+ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+ hash = hash_bucket(hash, net);
+ repl_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
connections for unconfirmed conns. But packet copies and
@@ -566,10 +596,11 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_tuple *repl,
- gfp_t gfp)
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp, u32 hash)
{
struct nf_conn *ct;
@@ -585,6 +616,9 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
get_random_bytes(&rand, sizeof(rand));
} while (!rand);
cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+
+ /* recompute the hash as nf_conntrack_hash_rnd is initialized */
+ hash = hash_conntrack_raw(orig, zone);
}
/* We don't want any race condition at early drop stage */
@@ -592,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- unsigned int hash = hash_conntrack(net, zone, orig);
- if (!early_drop(net, hash)) {
+ if (!early_drop(net, hash_bucket(hash, net))) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
printk(KERN_WARNING
@@ -623,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
+ /* save hash for reusing when confirming */
+ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
@@ -650,6 +684,14 @@ out_free:
return ERR_PTR(-ENOMEM);
#endif
}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp)
+{
+ return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *ct)
@@ -671,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, u32 hash)
{
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -685,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
- ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
+ ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+ hash);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
@@ -762,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
@@ -771,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- h = nf_conntrack_find_get(net, zone, &tuple);
+ hash = hash_conntrack_raw(&tuple, zone);
+ h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
- skb, dataoff);
+ skb, dataoff, hash);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -1314,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize,
- nf_conntrack_hash_rnd);
+ hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
--
1.7.1
next prev parent reply other threads:[~2010-10-21 15:18 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-21 15:18 [PATCH 00/72] netfilter: netfilter update for 2.6.37 kaber
2010-10-21 15:18 ` [PATCH 01/72] netfilter: nf_nat: add nf_nat_csum() kaber
2010-10-21 15:18 ` [PATCH 02/72] netfilter: use NFPROTO_IPV4 instead of AF_INET kaber
2010-10-21 15:18 ` [PATCH 03/72] netfilter: nf_nat_core: don't check if the tuple is used if there is no other choice kaber
2010-10-21 15:18 ` [PATCH 04/72] netfilter: nf_nat: no IP_NAT_RANGE_MAP_IPS flags when alloc_null_binding() kaber
2010-10-21 15:18 ` [PATCH 05/72] netfilter: nf_conntrack: fix the hash random initializing race kaber
2010-10-21 15:18 ` [PATCH 06/72] ipvs: extend connection flags to 32 bits kaber
2010-10-21 15:18 ` [PATCH 07/72] ipvs: netfilter connection tracking changes kaber
2010-10-21 15:18 ` [PATCH 08/72] ipvs: make rerouting optional with snat_reroute kaber
2010-10-21 15:18 ` kaber [this message]
2010-10-21 15:18 ` [PATCH 10/72] ipvs: changes related to service usecnt kaber
2010-10-21 15:18 ` [PATCH 11/72] netfilter: nf_nat: better error handling of nf_ct_expect_related() in helpers kaber
2010-10-21 15:18 ` [PATCH 12/72] netfilter: ctnetlink: missing validation of CTA_EXPECT_ZONE attribute kaber
2010-10-21 15:19 ` [PATCH 13/72] netfilter: ctnetlink: allow to specify the expectation flags kaber
2010-10-21 15:19 ` [PATCH 14/72] netfilter: ctnetlink: add support for user-space expectation helpers kaber
2010-10-21 15:19 ` [PATCH 15/72] netfilter: nf_conntrack_sip: Allow ct_sip_get_header() to be called with a null ct argument kaber
2010-10-21 15:19 ` [PATCH 16/72] netfilter: nf_conntrack_sip: Add callid parser kaber
2010-10-21 15:19 ` [PATCH 17/72] IPVS: compact ip_vs_sched_persist() kaber
2010-10-21 15:19 ` [PATCH 18/72] IPVS: Add struct ip_vs_conn_param kaber
2010-10-21 15:19 ` [PATCH 19/72] IPVS: Allow null argument to ip_vs_scheduler_put() kaber
2010-10-21 15:19 ` [PATCH 20/72] IPVS: ip_vs_{un,}bind_scheduler NULL arguments kaber
2010-10-21 15:19 ` [PATCH 21/72] IPVS: Add struct ip_vs_pe kaber
2010-10-21 15:19 ` [PATCH 22/72] IPVS: Add persistence engine data to /proc/net/ip_vs_conn kaber
2010-10-21 15:19 ` [PATCH 23/72] IPVS: management of persistence engine modules kaber
2010-10-21 15:19 ` [PATCH 24/72] IPVS: Allow configuration of persistence engines kaber
2010-10-21 15:19 ` [PATCH 25/72] IPVS: Fallback if persistence engine fails kaber
2010-10-21 15:19 ` [PATCH 26/72] IPVS: sip persistence engine kaber
2010-10-21 15:19 ` [PATCH 27/72] netfilter: nf_nat: make find/put static kaber
2010-10-21 15:19 ` [PATCH 28/72] netfilter: ipt_LOG: add bufferisation to call printk() once kaber
2010-10-21 15:19 ` [PATCH 29/72] netfilter: remove duplicated include kaber
2010-10-21 15:19 ` [PATCH 30/72] netfilter: unregister nf hooks, matches and targets in the reverse order kaber
2010-10-21 15:19 ` [PATCH 31/72] netfilter: add missing xt_log.h file kaber
2010-10-21 15:19 ` [PATCH 32/72] netfilter: xtables: resolve indirect macros 1/3 kaber
2010-10-21 15:19 ` [PATCH 33/72] netfilter: xtables: resolve indirect macros 2/3 kaber
2010-10-21 15:19 ` [PATCH 34/72] netfilter: xtables: resolve indirect macros 3/3 kaber
2010-10-21 15:19 ` [PATCH 35/72] netfilter: xtables: unify {ip,ip6,arp}t_error_target kaber
2010-10-21 15:19 ` [PATCH 36/72] netfilter: xtables: remove unused defines kaber
2010-10-21 15:19 ` [PATCH 37/72] IPVS: ip_vs_dbg_callid() is only needed for debugging kaber
2010-10-21 15:19 ` [PATCH 38/72] netfilter: fix kconfig unmet dependency warning kaber
2010-10-21 15:19 ` [PATCH 39/72] netfilter: install missing ebtables headers for userspace kaber
2010-10-21 15:19 ` [PATCH 40/72] netfilter: ctnetlink: add expectation deletion events kaber
2010-10-21 15:19 ` [PATCH 41/72] ipvs: IPv6 tunnel mode kaber
2010-10-21 15:19 ` [PATCH 42/72] Fixed race condition at ip_vs.ko module init kaber
2010-10-21 15:19 ` [PATCH 43/72] ipvs: fix CHECKSUM_PARTIAL for TCP, UDP kaber
2010-10-21 15:19 ` [PATCH 44/72] ipvs: optimize checksums for apps kaber
2010-10-21 15:19 ` [PATCH 45/72] ipvs: switch to notrack mode kaber
2010-10-21 15:19 ` [PATCH 46/72] ipvs: do not schedule conns from real servers kaber
2010-10-21 15:19 ` [PATCH 47/72] ipvs: stop ICMP from FORWARD to local kaber
2010-10-21 15:19 ` [PATCH 48/72] ipvs: fix CHECKSUM_PARTIAL for TUN method kaber
2010-10-21 15:19 ` [PATCH 49/72] ipvs: create ip_vs_defrag_user kaber
2010-10-21 15:19 ` [PATCH 50/72] ipvs: move ip_route_me_harder for ICMP kaber
2010-10-21 15:19 ` [PATCH 51/72] ipvs: changes for local real server kaber
2010-10-21 15:19 ` [PATCH 52/72] ipvs: changes for local client kaber
2010-10-21 15:19 ` [PATCH 53/72] ipvs: inherit forwarding method in backup kaber
2010-10-21 15:19 ` [PATCH 54/72] ipvs: provide address family for debugging kaber
2010-10-21 15:19 ` [PATCH 55/72] tproxy: kick out TIME_WAIT sockets in case a new connection comes in with the same tuple kaber
2010-10-21 15:19 ` [PATCH 56/72] tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4() kaber
2010-10-21 15:19 ` [PATCH 57/72] tproxy: fix hash locking issue when using port redirection in __inet_inherit_port() kaber
2010-10-21 15:19 ` [PATCH 58/72] nf_nat: restrict ICMP translation for embedded header kaber
2010-10-21 15:19 ` [PATCH 59/72] tproxy: split off ipv6 defragmentation to a separate module kaber
2010-10-21 15:19 ` [PATCH 60/72] tproxy: added const specifiers to udp lookup functions kaber
2010-10-21 15:19 ` [PATCH 61/72] tproxy: added udp6_lib_lookup function kaber
2010-10-21 15:19 ` [PATCH 62/72] tproxy: added tproxy sockopt interface in the IPV6 layer kaber
2010-10-21 15:19 ` [PATCH 63/72] tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled kaber
2010-10-21 21:07 ` YOSHIFUJI Hideaki
2010-10-21 15:19 ` [PATCH 64/72] tproxy: added IPv6 socket lookup function to nf_tproxy_core kaber
2010-10-21 15:19 ` [PATCH 65/72] tproxy: added IPv6 support to the TPROXY target kaber
2010-10-21 15:19 ` [PATCH 66/72] tproxy: added IPv6 support to the socket match kaber
2010-10-21 15:19 ` [PATCH 67/72] tproxy: use the interface primary IP address as a default value for --on-ip kaber
2010-10-21 15:19 ` [PATCH 68/72] netfilter: ebtables: remove unused definitions kaber
2010-10-21 15:19 ` [PATCH 69/72] netfilter: xtables: add a missing pair of parentheses kaber
2010-10-21 15:19 ` [PATCH 70/72] netfilter: ebtables: replace EBT_ENTRY_ITERATE macro kaber
2010-10-21 15:19 ` [PATCH 71/72] netfilter: ebtables: replace EBT_MATCH_ITERATE macro kaber
2010-10-21 15:19 ` [PATCH 72/72] netfilter: ebtables: replace EBT_WATCHER_ITERATE macro kaber
2010-10-21 15:40 ` [PATCH 00/72] netfilter: netfilter update for 2.6.37 David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1287674399-31455-10-git-send-email-kaber@trash.net \
--to=kaber@trash.net \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).