netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: kaber@trash.net
To: davem@davemloft.net
Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH 09/72] netfilter: save the hash of the tuple in the original direction for latter use
Date: Thu, 21 Oct 2010 17:18:56 +0200	[thread overview]
Message-ID: <1287674399-31455-10-git-send-email-kaber@trash.net> (raw)
In-Reply-To: <1287674399-31455-1-git-send-email-kaber@trash.net>

From: Changli Gao <xiaosuo@gmail.com>

Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.

__hash_conntrack() is split into two steps: hash_conntrack_raw() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.

In SYN-flood case, early_drop() doesn't need to recompute the hash again.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c |  112 +++++++++++++++++++++++++-----------
 1 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4c0ad9b..1eacf8d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,29 +67,40 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-				  u16 zone, unsigned int size, unsigned int rnd)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
 {
 	unsigned int n;
-	u_int32_t h;
 
 	/* The direction must be ignored, so we hash everything up to the
 	 * destination ports (which is a multiple of 4) and treat the last
 	 * three bytes manually.
 	 */
 	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
-	h = jhash2((u32 *)tuple, n,
-		   zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
-				 tuple->dst.protonum));
+	return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+		      (((__force __u16)tuple->dst.u.all << 16) |
+		      tuple->dst.protonum));
+}
+
+static u32 __hash_bucket(u32 hash, unsigned int size)
+{
+	return ((u64)hash * size) >> 32;
+}
+
+static u32 hash_bucket(u32 hash, const struct net *net)
+{
+	return __hash_bucket(hash, net->ct.htable_size);
+}
 
-	return ((u64)h * size) >> 32;
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+				  u16 zone, unsigned int size)
+{
+	return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
 }
 
 static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
 				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, zone, net->ct.htable_size,
-				nf_conntrack_hash_rnd);
+	return __hash_conntrack(tuple, zone, net->ct.htable_size);
 }
 
 bool
@@ -291,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
  * OR
  * - Caller must lock nf_conntrack_lock before calling this function
  */
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
-		    const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple, u32 hash)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, zone, tuple);
+	unsigned int bucket = hash_bucket(hash, net);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
 	 */
 	local_bh_disable();
 begin:
-	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
+	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
 		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
 		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
 			NF_CT_STAT_INC(net, found);
@@ -318,7 +329,7 @@ begin:
 	 * not the expected one, we must restart lookup.
 	 * We probably met an item that was moved to another chain.
 	 */
-	if (get_nulls_value(n) != hash) {
+	if (get_nulls_value(n) != bucket) {
 		NF_CT_STAT_INC(net, search_restart);
 		goto begin;
 	}
@@ -326,19 +337,27 @@ begin:
 
 	return NULL;
 }
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
+{
+	return ____nf_conntrack_find(net, zone, tuple,
+				     hash_conntrack_raw(tuple, zone));
+}
 EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
-struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
-		      const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, u16 zone,
+			const struct nf_conntrack_tuple *tuple, u32 hash)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
 begin:
-	h = __nf_conntrack_find(net, zone, tuple);
+	h = ____nf_conntrack_find(net, zone, tuple, hash);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(nf_ct_is_dying(ct) ||
@@ -356,6 +375,14 @@ begin:
 
 	return h;
 }
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
+{
+	return __nf_conntrack_find_get(net, zone, tuple,
+				       hash_conntrack_raw(tuple, zone));
+}
 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 
 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -408,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 		return NF_ACCEPT;
 
 	zone = nf_ct_zone(ct);
-	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	/* reuse the hash saved before */
+	hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+	hash = hash_bucket(hash, net);
+	repl_hash = hash_conntrack(net, zone,
+				   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -566,10 +596,11 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
-				   const struct nf_conntrack_tuple *orig,
-				   const struct nf_conntrack_tuple *repl,
-				   gfp_t gfp)
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net, u16 zone,
+		     const struct nf_conntrack_tuple *orig,
+		     const struct nf_conntrack_tuple *repl,
+		     gfp_t gfp, u32 hash)
 {
 	struct nf_conn *ct;
 
@@ -585,6 +616,9 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 			get_random_bytes(&rand, sizeof(rand));
 		} while (!rand);
 		cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+
+		/* recompute the hash as nf_conntrack_hash_rnd is initialized */
+		hash = hash_conntrack_raw(orig, zone);
 	}
 
 	/* We don't want any race condition at early drop stage */
@@ -592,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(net, zone, orig);
-		if (!early_drop(net, hash)) {
+		if (!early_drop(net, hash_bucket(hash, net))) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
@@ -623,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
-	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
+	/* save hash for reusing when confirming */
+	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
 	write_pnet(&ct->ct_net, net);
@@ -650,6 +684,14 @@ out_free:
 	return ERR_PTR(-ENOMEM);
 #endif
 }
+
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+				   const struct nf_conntrack_tuple *orig,
+				   const struct nf_conntrack_tuple *repl,
+				   gfp_t gfp)
+{
+	return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 
 void nf_conntrack_free(struct nf_conn *ct)
@@ -671,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
-	       unsigned int dataoff)
+	       unsigned int dataoff, u32 hash)
 {
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
@@ -685,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
+	ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+				  hash);
 	if (IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -762,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+	u32 hash;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
@@ -771,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(net, zone, &tuple);
+	hash = hash_conntrack_raw(&tuple, zone);
+	h = __nf_conntrack_find_get(net, zone, &tuple, hash);
 	if (!h) {
 		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
-				   skb, dataoff);
+				   skb, dataoff, hash);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -1314,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			hlist_nulls_del_rcu(&h->hnnode);
 			bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
-						  hashsize,
-						  nf_conntrack_hash_rnd);
+						  hashsize);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-- 
1.7.1


  parent reply	other threads:[~2010-10-21 15:18 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-21 15:18 [PATCH 00/72] netfilter: netfilter update for 2.6.37 kaber
2010-10-21 15:18 ` [PATCH 01/72] netfilter: nf_nat: add nf_nat_csum() kaber
2010-10-21 15:18 ` [PATCH 02/72] netfilter: use NFPROTO_IPV4 instead of AF_INET kaber
2010-10-21 15:18 ` [PATCH 03/72] netfilter: nf_nat_core: don't check if the tuple is used if there is no other choice kaber
2010-10-21 15:18 ` [PATCH 04/72] netfilter: nf_nat: no IP_NAT_RANGE_MAP_IPS flags when alloc_null_binding() kaber
2010-10-21 15:18 ` [PATCH 05/72] netfilter: nf_conntrack: fix the hash random initializing race kaber
2010-10-21 15:18 ` [PATCH 06/72] ipvs: extend connection flags to 32 bits kaber
2010-10-21 15:18 ` [PATCH 07/72] ipvs: netfilter connection tracking changes kaber
2010-10-21 15:18 ` [PATCH 08/72] ipvs: make rerouting optional with snat_reroute kaber
2010-10-21 15:18 ` kaber [this message]
2010-10-21 15:18 ` [PATCH 10/72] ipvs: changes related to service usecnt kaber
2010-10-21 15:18 ` [PATCH 11/72] netfilter: nf_nat: better error handling of nf_ct_expect_related() in helpers kaber
2010-10-21 15:18 ` [PATCH 12/72] netfilter: ctnetlink: missing validation of CTA_EXPECT_ZONE attribute kaber
2010-10-21 15:19 ` [PATCH 13/72] netfilter: ctnetlink: allow to specify the expectation flags kaber
2010-10-21 15:19 ` [PATCH 14/72] netfilter: ctnetlink: add support for user-space expectation helpers kaber
2010-10-21 15:19 ` [PATCH 15/72] netfilter: nf_conntrack_sip: Allow ct_sip_get_header() to be called with a null ct argument kaber
2010-10-21 15:19 ` [PATCH 16/72] netfilter: nf_conntrack_sip: Add callid parser kaber
2010-10-21 15:19 ` [PATCH 17/72] IPVS: compact ip_vs_sched_persist() kaber
2010-10-21 15:19 ` [PATCH 18/72] IPVS: Add struct ip_vs_conn_param kaber
2010-10-21 15:19 ` [PATCH 19/72] IPVS: Allow null argument to ip_vs_scheduler_put() kaber
2010-10-21 15:19 ` [PATCH 20/72] IPVS: ip_vs_{un,}bind_scheduler NULL arguments kaber
2010-10-21 15:19 ` [PATCH 21/72] IPVS: Add struct ip_vs_pe kaber
2010-10-21 15:19 ` [PATCH 22/72] IPVS: Add persistence engine data to /proc/net/ip_vs_conn kaber
2010-10-21 15:19 ` [PATCH 23/72] IPVS: management of persistence engine modules kaber
2010-10-21 15:19 ` [PATCH 24/72] IPVS: Allow configuration of persistence engines kaber
2010-10-21 15:19 ` [PATCH 25/72] IPVS: Fallback if persistence engine fails kaber
2010-10-21 15:19 ` [PATCH 26/72] IPVS: sip persistence engine kaber
2010-10-21 15:19 ` [PATCH 27/72] netfilter: nf_nat: make find/put static kaber
2010-10-21 15:19 ` [PATCH 28/72] netfilter: ipt_LOG: add bufferisation to call printk() once kaber
2010-10-21 15:19 ` [PATCH 29/72] netfilter: remove duplicated include kaber
2010-10-21 15:19 ` [PATCH 30/72] netfilter: unregister nf hooks, matches and targets in the reverse order kaber
2010-10-21 15:19 ` [PATCH 31/72] netfilter: add missing xt_log.h file kaber
2010-10-21 15:19 ` [PATCH 32/72] netfilter: xtables: resolve indirect macros 1/3 kaber
2010-10-21 15:19 ` [PATCH 33/72] netfilter: xtables: resolve indirect macros 2/3 kaber
2010-10-21 15:19 ` [PATCH 34/72] netfilter: xtables: resolve indirect macros 3/3 kaber
2010-10-21 15:19 ` [PATCH 35/72] netfilter: xtables: unify {ip,ip6,arp}t_error_target kaber
2010-10-21 15:19 ` [PATCH 36/72] netfilter: xtables: remove unused defines kaber
2010-10-21 15:19 ` [PATCH 37/72] IPVS: ip_vs_dbg_callid() is only needed for debugging kaber
2010-10-21 15:19 ` [PATCH 38/72] netfilter: fix kconfig unmet dependency warning kaber
2010-10-21 15:19 ` [PATCH 39/72] netfilter: install missing ebtables headers for userspace kaber
2010-10-21 15:19 ` [PATCH 40/72] netfilter: ctnetlink: add expectation deletion events kaber
2010-10-21 15:19 ` [PATCH 41/72] ipvs: IPv6 tunnel mode kaber
2010-10-21 15:19 ` [PATCH 42/72] Fixed race condition at ip_vs.ko module init kaber
2010-10-21 15:19 ` [PATCH 43/72] ipvs: fix CHECKSUM_PARTIAL for TCP, UDP kaber
2010-10-21 15:19 ` [PATCH 44/72] ipvs: optimize checksums for apps kaber
2010-10-21 15:19 ` [PATCH 45/72] ipvs: switch to notrack mode kaber
2010-10-21 15:19 ` [PATCH 46/72] ipvs: do not schedule conns from real servers kaber
2010-10-21 15:19 ` [PATCH 47/72] ipvs: stop ICMP from FORWARD to local kaber
2010-10-21 15:19 ` [PATCH 48/72] ipvs: fix CHECKSUM_PARTIAL for TUN method kaber
2010-10-21 15:19 ` [PATCH 49/72] ipvs: create ip_vs_defrag_user kaber
2010-10-21 15:19 ` [PATCH 50/72] ipvs: move ip_route_me_harder for ICMP kaber
2010-10-21 15:19 ` [PATCH 51/72] ipvs: changes for local real server kaber
2010-10-21 15:19 ` [PATCH 52/72] ipvs: changes for local client kaber
2010-10-21 15:19 ` [PATCH 53/72] ipvs: inherit forwarding method in backup kaber
2010-10-21 15:19 ` [PATCH 54/72] ipvs: provide address family for debugging kaber
2010-10-21 15:19 ` [PATCH 55/72] tproxy: kick out TIME_WAIT sockets in case a new connection comes in with the same tuple kaber
2010-10-21 15:19 ` [PATCH 56/72] tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4() kaber
2010-10-21 15:19 ` [PATCH 57/72] tproxy: fix hash locking issue when using port redirection in __inet_inherit_port() kaber
2010-10-21 15:19 ` [PATCH 58/72] nf_nat: restrict ICMP translation for embedded header kaber
2010-10-21 15:19 ` [PATCH 59/72] tproxy: split off ipv6 defragmentation to a separate module kaber
2010-10-21 15:19 ` [PATCH 60/72] tproxy: added const specifiers to udp lookup functions kaber
2010-10-21 15:19 ` [PATCH 61/72] tproxy: added udp6_lib_lookup function kaber
2010-10-21 15:19 ` [PATCH 62/72] tproxy: added tproxy sockopt interface in the IPV6 layer kaber
2010-10-21 15:19 ` [PATCH 63/72] tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled kaber
2010-10-21 21:07   ` YOSHIFUJI Hideaki
2010-10-21 15:19 ` [PATCH 64/72] tproxy: added IPv6 socket lookup function to nf_tproxy_core kaber
2010-10-21 15:19 ` [PATCH 65/72] tproxy: added IPv6 support to the TPROXY target kaber
2010-10-21 15:19 ` [PATCH 66/72] tproxy: added IPv6 support to the socket match kaber
2010-10-21 15:19 ` [PATCH 67/72] tproxy: use the interface primary IP address as a default value for --on-ip kaber
2010-10-21 15:19 ` [PATCH 68/72] netfilter: ebtables: remove unused definitions kaber
2010-10-21 15:19 ` [PATCH 69/72] netfilter: xtables: add a missing pair of parentheses kaber
2010-10-21 15:19 ` [PATCH 70/72] netfilter: ebtables: replace EBT_ENTRY_ITERATE macro kaber
2010-10-21 15:19 ` [PATCH 71/72] netfilter: ebtables: replace EBT_MATCH_ITERATE macro kaber
2010-10-21 15:19 ` [PATCH 72/72] netfilter: ebtables: replace EBT_WATCHER_ITERATE macro kaber
2010-10-21 15:40 ` [PATCH 00/72] netfilter: netfilter update for 2.6.37 David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1287674399-31455-10-git-send-email-kaber@trash.net \
    --to=kaber@trash.net \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).