From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 6/9] netfilter: nf_conncount: merge lookup and add functions
Date: Sat, 29 Dec 2018 13:58:00 +0100 [thread overview]
Message-ID: <20181229125803.7415-7-pablo@netfilter.org> (raw)
In-Reply-To: <20181229125803.7415-1-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
'lookup' is always followed by 'add'.
Merge both and make the list-walk part of nf_conncount_add().
This also avoids one unneeded unlock/re-lock pair.
Extra care needs to be taken in count_tree, as we only hold rcu
read lock, i.e. we can only insert to an existing tree node after
acquiring its lock and making sure it has a nonzero count.
As a zero count should be rare, just fall back to insert_tree()
(which acquires tree lock).
This issue and its solution were pointed out by Shawn Bohrer
during patch review.
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_count.h | 18 +---
net/netfilter/nf_conncount.c | 146 +++++++++++++----------------
net/netfilter/nft_connlimit.c | 14 +--
3 files changed, 72 insertions(+), 106 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 4b2b2baf8ab4..aa66775c15f4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,12 +5,6 @@
struct nf_conncount_data;
-enum nf_conncount_list_add {
- NF_CONNCOUNT_ADDED, /* list add was ok */
- NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */
- NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */
-};
-
struct nf_conncount_list {
spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */
@@ -29,18 +23,12 @@ unsigned int nf_conncount_count(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
-void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit);
+int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone);
void nf_conncount_list_init(struct nf_conncount_list *list);
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone);
-
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list);
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 0a83c694a8f1..ce7f7d1212a6 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -83,38 +83,6 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- struct nf_conncount_tuple *conn;
-
- if (WARN_ON_ONCE(list->count > INT_MAX))
- return NF_CONNCOUNT_ERR;
-
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return NF_CONNCOUNT_ERR;
-
- conn->tuple = *tuple;
- conn->zone = *zone;
- conn->cpu = raw_smp_processor_id();
- conn->jiffies32 = (u32)jiffies;
- conn->dead = false;
- spin_lock_bh(&list->list_lock);
- if (list->dead == true) {
- kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_SKIP;
- }
- list_add_tail(&conn->node, &list->head);
- list->count++;
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_ADDED;
-}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
-
static void __conn_free(struct rcu_head *h)
{
struct nf_conncount_tuple *conn;
@@ -177,11 +145,10 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
return ERR_PTR(-EAGAIN);
}
-void nf_conncount_lookup(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
+static int __nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
@@ -189,9 +156,6 @@ void nf_conncount_lookup(struct net *net,
unsigned int collect = 0;
bool free_entry = false;
- /* best effort only */
- *addit = tuple ? true : false;
-
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
@@ -201,21 +165,19 @@ void nf_conncount_lookup(struct net *net,
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
- if (!tuple)
- continue;
-
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
- *addit = false;
- } else if (PTR_ERR(found) == -ENOENT)
+ return 0; /* already exists */
+ } else {
collect++;
+ }
continue;
}
found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* We should not see tuples twice unless someone hooks
@@ -223,7 +185,8 @@ void nf_conncount_lookup(struct net *net,
*
* Attempt to avoid a re-add in this case.
*/
- *addit = false;
+ nf_ct_put(found_ct);
+ return 0;
} else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
@@ -237,8 +200,38 @@ void nf_conncount_lookup(struct net *net,
nf_ct_put(found_ct);
}
+
+ if (WARN_ON_ONCE(list->count > INT_MAX))
+ return -EOVERFLOW;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return -ENOMEM;
+
+ conn->tuple = *tuple;
+ conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
+ list_add_tail(&conn->node, &list->head);
+ list->count++;
+ return 0;
}
-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+
+int nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ int ret;
+
+ /* check the saved connections */
+ spin_lock_bh(&list->list_lock);
+ ret = __nf_conncount_add(net, list, tuple, zone);
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
@@ -339,13 +332,11 @@ insert_tree(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
- bool node_found = false;
bool do_gc = true;
spin_lock_bh(&nf_conncount_locks[hash]);
@@ -363,20 +354,15 @@ insert_tree(struct net *net,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
- /* unlikely: other cpu added node already */
- node_found = true;
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
+ int ret;
+
+ ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+ if (ret)
count = 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
+ else
count = rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
- node_found = false;
- }
- break;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ goto out_unlock;
}
if (gc_count >= ARRAY_SIZE(gc_nodes))
@@ -394,9 +380,6 @@ insert_tree(struct net *net,
goto restart;
}
- if (node_found)
- goto out_unlock;
-
/* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
@@ -431,7 +414,6 @@ count_tree(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct rb_root *root;
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
@@ -444,7 +426,6 @@ count_tree(struct net *net,
parent = rcu_dereference_raw(root->rb_node);
while (parent) {
int diff;
- bool addit;
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
@@ -454,24 +435,29 @@ count_tree(struct net *net,
} else if (diff > 0) {
parent = rcu_dereference_raw(parent->rb_right);
} else {
- /* same source network -> be counted! */
- nf_conncount_lookup(net, &rbconn->list, tuple, zone,
- &addit);
+ int ret;
- if (!addit)
+ if (!tuple) {
+ nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count;
+ }
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
- return 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
- return rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
+ spin_lock_bh(&rbconn->list.list_lock);
+ /* Node might be about to be free'd.
+ * We need to defer to insert_tree() in this case.
+ */
+ if (rbconn->list.count == 0) {
+ spin_unlock_bh(&rbconn->list.list_lock);
break;
}
+
+ /* same source network -> be counted! */
+ ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+ spin_unlock_bh(&rbconn->list.list_lock);
+ if (ret)
+ return 0; /* hotdrop */
+ else
+ return rbconn->list.count;
}
}
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96ba4a12..af1497ab9464 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count;
- bool addit;
tuple_ptr = &tuple;
@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
- nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
- &addit);
- count = priv->list.count;
-
- if (!addit)
- goto out;
-
- if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
+ if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP;
return;
}
- count++;
-out:
+
+ count = priv->list.count;
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
--
2.11.0
next prev parent reply other threads:[~2018-12-29 12:58 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-29 12:57 [PATCH 0/9] Netfilter fixes for net Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 1/9] netfilter: nf_tables: fix a missing check of nla_put_failure Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 2/9] netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 3/9] netfilter: nf_conncount: don't skip eviction when age is negative Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 4/9] netfilter: nf_conncount: split gc in two phases Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 5/9] netfilter: nf_conncount: restart search when nodes have been erased Pablo Neira Ayuso
2018-12-29 12:58 ` Pablo Neira Ayuso [this message]
2018-12-29 12:58 ` [PATCH 7/9] netfilter: nf_conncount: move all list iterations under spinlock Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 8/9] netfilter: nf_conncount: speculative garbage collection on empty lists Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 9/9] netfilter: nf_conncount: fix argument order to find_next_bit Pablo Neira Ayuso
2018-12-29 22:33 ` [PATCH 0/9] Netfilter fixes for net David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181229125803.7415-7-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.