* [PATCH 5.4 134/159] netfilter: nf_tables: pass context to nft_set_destroy()
[not found] <20231124171941.909624388@linuxfoundation.org>
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 135/159] netfilter: nftables: rename set element data activation/deactivation functions Greg Kroah-Hartman
` (24 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 0c2a85edd143162b3a698f31e94bf8cdc041da87 upstream.
The patch that adds support for stateful expressions in set definitions
require this.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3852,7 +3852,7 @@ err1:
return err;
}
-static void nft_set_destroy(struct nft_set *set)
+static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
if (WARN_ON(set->use > 0))
return;
@@ -4024,7 +4024,7 @@ EXPORT_SYMBOL_GPL(nf_tables_deactivate_s
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
- nft_set_destroy(set);
+ nft_set_destroy(ctx, set);
}
EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
@@ -6715,7 +6715,7 @@ static void nft_commit_release(struct nf
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
- nft_set_destroy(nft_trans_set(trans));
+ nft_set_destroy(&trans->ctx, nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
nf_tables_set_elem_destroy(&trans->ctx,
@@ -7176,7 +7176,7 @@ static void nf_tables_abort_release(stru
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_set_destroy(nft_trans_set(trans));
+ nft_set_destroy(&trans->ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -7951,7 +7951,7 @@ static void __nft_release_table(struct n
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
nft_use_dec(&table->use);
- nft_set_destroy(set);
+ nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
nft_obj_del(obj);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 135/159] netfilter: nftables: rename set element data activation/deactivation functions
[not found] <20231124171941.909624388@linuxfoundation.org>
2023-11-24 17:55 ` [PATCH 5.4 134/159] netfilter: nf_tables: pass context to nft_set_destroy() Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 136/159] netfilter: nf_tables: drop map element references from preparation phase Greg Kroah-Hartman
` (23 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit f8bb7889af58d8e74d2d61c76b1418230f1610fa upstream.
Rename:
- nft_set_elem_activate() to nft_set_elem_data_activate().
- nft_set_elem_deactivate() to nft_set_elem_data_deactivate().
To prepare for updates in the set element infrastructure to add support
for the special catch-all element.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4602,8 +4602,8 @@ void nft_set_elem_destroy(const struct n
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_set_elem_deactivate() already deals with
- * the refcounting from the preparation phase.
+/* Only called from commit path, nft_setelem_data_deactivate() already deals
+ * with the refcounting from the preparation phase.
*/
static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem)
@@ -4919,9 +4919,9 @@ void nft_data_hold(const struct nft_data
}
}
-static void nft_set_elem_activate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -4931,9 +4931,9 @@ static void nft_set_elem_activate(const
nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_set_elem_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -5000,7 +5000,7 @@ static int nft_del_setelem(struct nft_ct
kfree(elem.priv);
elem.priv = priv;
- nft_set_elem_deactivate(ctx->net, set, &elem);
+ nft_setelem_data_deactivate(ctx->net, set, &elem);
nft_trans_elem(trans) = elem;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -5034,7 +5034,7 @@ static int nft_flush_set(const struct nf
}
set->ndeact++;
- nft_set_elem_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem);
nft_trans_elem_set(trans) = set;
nft_trans_elem(trans) = *elem;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -7277,7 +7277,7 @@ static int __nf_tables_abort(struct net
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_set_elem_activate(net, te->set, &te->elem);
+ nft_setelem_data_activate(net, te->set, &te->elem);
te->set->ops->activate(net, te->set, &te->elem);
te->set->ndeact--;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 136/159] netfilter: nf_tables: drop map element references from preparation phase
[not found] <20231124171941.909624388@linuxfoundation.org>
2023-11-24 17:55 ` [PATCH 5.4 134/159] netfilter: nf_tables: pass context to nft_set_destroy() Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 135/159] netfilter: nftables: rename set element data activation/deactivation functions Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 137/159] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection Greg Kroah-Hartman
` (22 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 628bd3e49cba1c066228e23d71a852c23e26da73 upstream.
set .destroy callback releases the references to other objects in maps.
This is very late and it results in spurious EBUSY errors. Drop refcount
from the preparation phase instead, update set backend not to drop
reference counter from set .destroy path.
Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the
reference counter because the transaction abort path releases the map
references for each element since the set is unbound. The abort path
also deals with releasing reference counter for new elements added to
unbound sets.
Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 5 +-
net/netfilter/nf_tables_api.c | 89 ++++++++++++++++++++++++++++++++++----
net/netfilter/nft_set_bitmap.c | 5 +-
net/netfilter/nft_set_hash.c | 23 +++++++--
net/netfilter/nft_set_rbtree.c | 5 +-
5 files changed, 108 insertions(+), 19 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -371,7 +371,8 @@ struct nft_set_ops {
int (*init)(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
- void (*destroy)(const struct nft_set *set);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
@@ -665,6 +666,8 @@ void *nft_set_elem_init(const struct nft
u64 timeout, u64 expiration, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem);
/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -403,6 +403,31 @@ static int nft_trans_set_add(const struc
return 0;
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -411,6 +436,9 @@ static int nft_delset(const struct nft_c
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
nft_use_dec(&ctx->table->use);
@@ -3840,7 +3868,7 @@ static int nf_tables_newset(struct net *
return 0;
err4:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err3:
kfree(set->name);
err2:
@@ -3857,7 +3885,7 @@ static void nft_set_destroy(const struct
if (WARN_ON(set->use > 0))
return;
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
kvfree(set);
@@ -3981,10 +4009,39 @@ static void nf_tables_unbind_set(const s
}
}
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
nft_use_inc_restore(&set->use);
}
@@ -4005,13 +4062,20 @@ void nf_tables_deactivate_set(const stru
nft_use_dec(&set->use);
break;
case NFT_TRANS_PREPARE:
- if (nft_set_is_anonymous(set))
- nft_deactivate_next(ctx->net, set);
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+ nft_deactivate_next(ctx->net, set);
+ }
nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_use_dec(&set->use);
/* fall through */
default:
@@ -4574,6 +4638,7 @@ void *nft_set_elem_init(const struct nft
return elem;
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -4602,11 +4667,11 @@ void nft_set_elem_destroy(const struct n
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -4614,6 +4679,7 @@ static void nf_tables_set_elem_destroy(c
nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
kfree(elem);
}
+EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy);
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
@@ -7263,6 +7329,8 @@ static int __nf_tables_abort(struct net
case NFT_MSG_DELSET:
nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -7951,6 +8019,9 @@ static void __nft_release_table(struct n
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
nft_use_dec(&table->use);
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -270,13 +270,14 @@ static int nft_bitmap_init(const struct
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -380,19 +380,31 @@ static int nft_rhash_init(const struct n
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -621,7 +633,8 @@ static int nft_hash_init(const struct nf
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -631,7 +644,7 @@ static void nft_hash_destroy(const struc
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -480,7 +480,8 @@ static int nft_rbtree_init(const struct
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -491,7 +492,7 @@ static void nft_rbtree_destroy(const str
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 137/159] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection
[not found] <20231124171941.909624388@linuxfoundation.org>
` (2 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 136/159] netfilter: nf_tables: drop map element references from preparation phase Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 138/159] netfilter: nft_set_rbtree: fix null deref on element insertion Greg Kroah-Hartman
` (21 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Stefano Brivio, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit c9e6978e2725a7d4b6cd23b2facd3f11422c0643 upstream.
...instead of a tree descent, which became overly complicated in an
attempt to cover cases where expired or inactive elements would affect
comparisons with the new element being inserted.
Further, it turned out that it's probably impossible to cover all those
cases, as inactive nodes might entirely hide subtrees consisting of a
complete interval plus a node that makes the current insertion not
overlap.
To speed up the overlap check, descent the tree to find a greater
element that is closer to the key value to insert. Then walk down the
node list for overlap detection. Starting the overlap check from
rb_first() unconditionally is slow, it takes 10 times longer due to the
full linear traversal of the list.
Moreover, perform garbage collection of expired elements when walking
down the node list to avoid bogus overlap reports.
For the insertion operation itself, this essentially reverts back to the
implementation before commit 7c84d41416d8 ("netfilter: nft_set_rbtree:
Detect partial overlaps on insertion"), except that cases of complete
overlap are already handled in the overlap detection phase itself, which
slightly simplifies the loop to find the insertion point.
Based on initial patch from Stefano Brivio, including text from the
original patch description too.
Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion")
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_rbtree.c | 223 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 198 insertions(+), 25 deletions(-)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(co
return !nft_rbtree_interval_end(rbe);
}
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
- const struct nft_rbtree_elem *interval)
+static int nft_rbtree_cmp(const struct nft_set *set,
+ const struct nft_rbtree_elem *e1,
+ const struct nft_rbtree_elem *e2)
{
- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+ set->klen);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const st
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- const void *this;
int d;
parent = rcu_dereference_raw(priv->root.rb_node);
@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const st
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
- nft_rbtree_equal(set, this, interval) &&
+ !nft_rbtree_cmp(set, rbe, interval) &&
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
@@ -214,43 +214,216 @@ static void *nft_rbtree_get(const struct
return rbe;
}
+static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_set_gc_batch *gcb;
+
+ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+ if (!gcb)
+ return -ENOMEM;
+
+ /* search for expired end interval coming before this element. */
+ do {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_end(rbe_prev))
+ break;
+
+ prev = rb_prev(prev);
+ } while (prev != NULL);
+
+ rb_erase(&rbe_prev->node, &priv->root);
+ rb_erase(&rbe->node, &priv->root);
+ atomic_sub(2, &set->nelems);
+
+ nft_set_gc_batch_add(gcb, rbe);
+ nft_set_gc_batch_complete(gcb);
+
+ return 0;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe,
+ struct rb_node *first)
+{
+ struct nft_rbtree_elem *first_elem;
+
+ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+ /* this element is closest to where the new element is to be inserted:
+ * update the first element for the node list path.
+ */
+ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+ return true;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_rbtree_elem *rbe;
- struct rb_node *parent, **p;
- int d;
+ int d, err;
+ /* Descend the tree to search for an existing element greater than the
+ * key value to insert that is greater than the new element. This is the
+ * first element to walk the ordered elements to find possible overlap.
+ */
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
- if (d < 0)
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0) {
p = &parent->rb_left;
- else if (d > 0)
+ } else if (d > 0) {
+ if (!first ||
+ nft_rbtree_update_first(set, rbe, first))
+ first = &rbe->node;
+
p = &parent->rb_right;
- else {
- if (nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(new)) {
+ } else {
+ if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
- } else if (nft_rbtree_interval_start(rbe) &&
- nft_rbtree_interval_end(new)) {
+ else
p = &parent->rb_right;
- } else if (nft_set_elem_active(&rbe->ext, genmask)) {
- *ext = &rbe->ext;
- return -EEXIST;
- } else {
- p = &parent->rb_left;
+ }
+ }
+
+ if (!first)
+ first = rb_first(&priv->root);
+
+ /* Detect overlap by going through the list of valid tree nodes.
+ * Values stored in the tree are in reversed order, starting from
+ * highest to lowest value.
+ */
+ for (node = first; node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* perform garbage collection to avoid bogus overlap reports. */
+ if (nft_set_elem_expired(&rbe->ext)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ d = nft_rbtree_cmp(set, rbe, new);
+ if (d == 0) {
+ /* Matching end element: no need to look for an
+ * overlapping greater or equal element.
+ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_le = rbe;
+ break;
+ }
+
+ /* first element that is greater or equal to key value. */
+ if (!rbe_ge) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* this is a closer more or equal element, update it. */
+ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+ rbe_ge = rbe;
+ continue;
}
+
+ /* element is equal to key value, make sure flags are
+ * the same, an existing more or equal start element
+ * must not be replaced by more or equal end element.
+ */
+ if ((nft_rbtree_interval_start(new) &&
+ nft_rbtree_interval_start(rbe_ge)) ||
+ (nft_rbtree_interval_end(new) &&
+ nft_rbtree_interval_end(rbe_ge))) {
+ rbe_ge = rbe;
+ continue;
+ }
+ } else if (d > 0) {
+ /* annotate element greater than the new element. */
+ rbe_ge = rbe;
+ continue;
+ } else if (d < 0) {
+ /* annotate element less than the new element. */
+ rbe_le = rbe;
+ break;
}
}
+
+ /* - new start element matching existing start element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+ *ext = &rbe_ge->ext;
+ return -EEXIST;
+ }
+
+ /* - new end element matching existing end element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ *ext = &rbe_le->ext;
+ return -EEXIST;
+ }
+
+ /* - new start element with existing closest, less or equal key value
+ * being a start element: partial overlap, reported as -ENOTEMPTY.
+ * Anonymous sets allow for two consecutive start element since they
+ * are constant, skip them to avoid bogus overlap reports.
+ */
+ if (!nft_set_is_anonymous(set) && rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, less or equal key value
+ * being a end element: partial overlap, reported as -ENOTEMPTY.
+ */
+ if (rbe_le &&
+ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, greater or equal key value
+ * being an end element: partial overlap, reported as -ENOTEMPTY
+ */
+ if (rbe_ge &&
+ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* Accepted element: pick insertion point depending on key value */
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 138/159] netfilter: nft_set_rbtree: fix null deref on element insertion
[not found] <20231124171941.909624388@linuxfoundation.org>
` (3 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 137/159] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 139/159] netfilter: nft_set_rbtree: fix overlap expiration walk Greg Kroah-Hartman
` (20 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Florian Westphal, Sasha Levin
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Florian Westphal <fw@strlen.de>
commit 61ae320a29b0540c16931816299eb86bf2b66c08 upstream.
There is no guarantee that rb_prev() will not return NULL in nft_rbtree_gc_elem():
general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
nft_add_set_elem+0x14b0/0x2990
nf_tables_newsetelem+0x528/0xb30
Furthermore, there is a possible use-after-free while iterating,
'node' can be free'd so we need to cache the next value to use.
Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_rbtree.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -220,7 +220,7 @@ static int nft_rbtree_gc_elem(const stru
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev;
+ struct nft_rbtree_elem *rbe_prev = NULL;
struct nft_set_gc_batch *gcb;
gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
@@ -228,17 +228,21 @@ static int nft_rbtree_gc_elem(const stru
return -ENOMEM;
/* search for expired end interval coming before this element. */
- do {
+ while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev))
break;
prev = rb_prev(prev);
- } while (prev != NULL);
+ }
+
+ if (rbe_prev) {
+ rb_erase(&rbe_prev->node, &priv->root);
+ atomic_dec(&set->nelems);
+ }
- rb_erase(&rbe_prev->node, &priv->root);
rb_erase(&rbe->node, &priv->root);
- atomic_sub(2, &set->nelems);
+ atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
nft_set_gc_batch_complete(gcb);
@@ -267,7 +271,7 @@ static int __nft_rbtree_insert(const str
struct nft_set_ext **ext)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
- struct rb_node *node, *parent, **p, *first = NULL;
+ struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
int d, err;
@@ -306,7 +310,9 @@ static int __nft_rbtree_insert(const str
* Values stored in the tree are in reversed order, starting from
* highest to lowest value.
*/
- for (node = first; node != NULL; node = rb_next(node)) {
+ for (node = first; node != NULL; node = next) {
+ next = rb_next(node);
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (!nft_set_elem_active(&rbe->ext, genmask))
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 139/159] netfilter: nft_set_rbtree: fix overlap expiration walk
[not found] <20231124171941.909624388@linuxfoundation.org>
` (4 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 138/159] netfilter: nft_set_rbtree: fix null deref on element insertion Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 140/159] netfilter: nf_tables: dont skip expired elements during walk Greg Kroah-Hartman
` (19 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Florian Westphal, Sasha Levin
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Florian Westphal <fw@strlen.de>
commit f718863aca469a109895cb855e6b81fff4827d71 upstream.
The lazy gc on insert that should remove timed-out entries fails to release
the other half of the interval, if any.
Can be reproduced with tests/shell/testcases/sets/0044interval_overlap_0
in nftables.git and kmemleak enabled kernel.
Second bug is the use of rbe_prev vs. prev pointer.
If rbe_prev() returns NULL after at least one iteration, rbe_prev points
to element that is not an end interval, hence it should not be removed.
Lastly, check the genmask of the end interval if this is active in the
current generation.
Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_rbtree.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -216,29 +216,37 @@ static void *nft_rbtree_get(const struct
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+ struct nft_rbtree_elem *rbe,
+ u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev = NULL;
+ struct nft_rbtree_elem *rbe_prev;
struct nft_set_gc_batch *gcb;
gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
if (!gcb)
return -ENOMEM;
- /* search for expired end interval coming before this element. */
+ /* search for end interval coming before this element.
+ * end intervals don't carry a timeout extension, they
+ * are coupled with the interval start element.
+ */
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- if (nft_rbtree_interval_end(rbe_prev))
+ if (nft_rbtree_interval_end(rbe_prev) &&
+ nft_set_elem_active(&rbe_prev->ext, genmask))
break;
prev = rb_prev(prev);
}
- if (rbe_prev) {
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+
rb_erase(&rbe_prev->node, &priv->root);
atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, rbe_prev);
}
rb_erase(&rbe->node, &priv->root);
@@ -320,7 +328,7 @@ static int __nft_rbtree_insert(const str
/* perform garbage collection to avoid bogus overlap reports. */
if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe);
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 140/159] netfilter: nf_tables: dont skip expired elements during walk
[not found] <20231124171941.909624388@linuxfoundation.org>
` (5 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 139/159] netfilter: nft_set_rbtree: fix overlap expiration walk Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 141/159] netfilter: nf_tables: GC transaction API to avoid race with control plane Greg Kroah-Hartman
` (18 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Florian Westphal, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Florian Westphal <fw@strlen.de>
commit 24138933b97b055d486e8064b4a1721702442a9b upstream.
There is an asymmetry between commit/abort and preparation phase if the
following conditions are met:
1. set is a verdict map ("1.2.3.4 : jump foo")
2. timeouts are enabled
In this case, following sequence is problematic:
1. element E in set S refers to chain C
2. userspace requests removal of set S
3. kernel does a set walk to decrement chain->use count for all elements
from preparation phase
4. kernel does another set walk to remove elements from the commit phase
(or another walk to do a chain->use increment for all elements from
abort phase)
If E has already expired in 1), it will be ignored during list walk, so its use count
won't have been changed.
Then, when set is culled, ->destroy callback will zap the element via
nf_tables_set_elem_destroy(), but this function is only safe for
elements that have been deactivated earlier from the preparation phase:
lack of earlier deactivate removes the element but leaks the chain use
count, which results in a WARN splat when the chain gets removed later,
plus a leak of the nft_chain structure.
Update pipapo_get() not to skip expired elements, otherwise flush
command reports bogus ENOENT errors.
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 4 ++++
net/netfilter/nft_set_hash.c | 2 --
net/netfilter/nft_set_rbtree.c | 2 --
3 files changed, 4 insertions(+), 4 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4258,8 +4258,12 @@ static int nf_tables_dump_setelem(const
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem);
}
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -277,8 +277,6 @@ static void nft_rhash_walk(const struct
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -553,8 +553,6 @@ static void nft_rbtree_walk(const struct
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 141/159] netfilter: nf_tables: GC transaction API to avoid race with control plane
[not found] <20231124171941.909624388@linuxfoundation.org>
` (6 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 140/159] netfilter: nf_tables: dont skip expired elements during walk Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 142/159] netfilter: nf_tables: adapt set backend to use GC transaction API Greg Kroah-Hartman
` (17 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 5f68718b34a531a556f2f50300ead2862278da26 upstream.
The set types rhashtable and rbtree use a GC worker to reclaim memory.
>From system work queue, in periodic intervals, a scan of the table is
done.
The major caveat here is that the nft transaction mutex is not held.
This causes a race between control plane and GC when they attempt to
delete the same element.
We cannot grab the netlink mutex from the work queue, because the
control plane has to wait for the GC work queue in case the set is to be
removed, so we get following deadlock:
cpu 1 cpu2
GC work transaction comes in , lock nft mutex
`acquire nft mutex // BLOCKS
transaction asks to remove the set
set destruction calls cancel_work_sync()
cancel_work_sync will now block forever, because it is waiting for the
mutex the caller already owns.
This patch adds a new API that deals with garbage collection in two
steps:
1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT
so they are not visible via lookup. Annotate current GC sequence in
the GC transaction. Enqueue GC transaction work as soon as it is
full. If ruleset is updated, then GC transaction is aborted and
retried later.
2) GC work grabs the mutex. If GC sequence has changed then this GC
transaction lost race with control plane, abort it as it contains
stale references to objects and let GC try again later. If the
ruleset is intact, then this GC transaction deactivates and removes
the elements and it uses call_rcu() to destroy elements.
Note that no elements are removed from GC lockless path, the _DEAD bit
is set and pointers are collected. GC catchall does not remove the
elements anymore too. There is a new set->dead flag that is set on to
abort the GC transaction to deal with set->ops->destroy() path which
removes the remaining elements in the set from commit_release, where no
mutex is held.
To deal with GC when mutex is held, which allows safe deactivate and
removal, add sync GC API which releases the set element object via
call_rcu(). This is used by rbtree and pipapo backends which also
perform garbage collection from control plane path.
Since element removal from sets can happen from control plane and
element garbage collection/timeout, it is necessary to keep the set
structure alive until all elements have been deactivated and destroyed.
We cannot do a cancel_work_sync or flush_work in nft_set_destroy because
its called with the transaction mutex held, but the aforementioned async
work queue might be blocked on the very mutex that nft_set_destroy()
callchain is sitting on.
This gives us the choice of ABBA deadlock or UaF.
To avoid both, add set->refs refcount_t member. The GC API can then
increment the set refcount and release it once the elements have been
free'd.
Set backends are adapted to use the GC transaction API in a follow up
patch entitled:
("netfilter: nf_tables: use gc transaction API in set backends")
This is joint work with Florian Westphal.
Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 61 ++++++++++
net/netfilter/nf_tables_api.c | 225 ++++++++++++++++++++++++++++++++++++--
2 files changed, 276 insertions(+), 10 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -402,6 +402,7 @@ void nft_unregister_set(struct nft_set_t
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -428,6 +429,7 @@ void nft_unregister_set(struct nft_set_t
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -446,7 +448,8 @@ struct nft_set {
unsigned char *udata;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -1386,6 +1389,32 @@ static inline void nft_set_elem_clear_bu
clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
}
+#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_DEAD_BIT 3
+#elif defined(__BIG_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
+#else
+#error
+#endif
+
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
+{
+ unsigned long *word = (unsigned long *)ext;
+
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
+}
+
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
+{
+ unsigned long *word = (unsigned long *)ext;
+
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
+}
+
/**
* struct nft_trans - nf_tables object update in transaction
*
@@ -1490,6 +1519,35 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable(trans) \
(((struct nft_trans_flowtable *)trans->data)->flowtable)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1510,6 +1568,7 @@ struct nftables_pernet {
struct mutex commit_mutex;
unsigned int base_seq;
u8 validate_state;
+ unsigned int gc_seq;
};
#endif /* _NET_NF_TABLES_H */
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,12 +26,15 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
unsigned int nf_tables_net_id __read_mostly;
+EXPORT_SYMBOL_GPL(nf_tables_net_id);
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
static u64 table_handle;
enum {
@@ -88,6 +91,9 @@ static void nft_validate_state_update(st
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -403,10 +409,6 @@ static int nft_trans_set_add(const struc
return 0;
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -3838,6 +3840,7 @@ static int nf_tables_newset(struct net *
}
INIT_LIST_HEAD(&set->bindings);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -3880,6 +3883,14 @@ err1:
return err;
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
if (WARN_ON(set->use > 0))
@@ -3887,8 +3898,7 @@ static void nft_set_destroy(const struct
set->ops->destroy(ctx, set);
module_put(to_set_type(set->ops)->owner);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct net *net, struct sock *nlsk,
@@ -5001,9 +5011,9 @@ static void nft_setelem_data_activate(co
nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -5012,6 +5022,7 @@ static void nft_setelem_data_deactivate(
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
+EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate);
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
@@ -6964,6 +6975,186 @@ static void nft_chain_del(struct nft_cha
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ trans->set->ops->remove(trans->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_destroy);
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = net_generic(trans->net, nf_tables_net_id);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_destroy_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->net = get_net(net);
+ trans->seq = gc_seq;
+
+ return trans;
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_alloc);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add);
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async);
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync);
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done);
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
@@ -7018,6 +7209,7 @@ static int nf_tables_commit(struct net *
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ unsigned int gc_seq;
int err;
if (list_empty(&nft_net->commit_list)) {
@@ -7074,6 +7266,10 @@ static int nf_tables_commit(struct net *
while (++nft_net->base_seq == 0)
;
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -7151,6 +7347,7 @@ static int nf_tables_commit(struct net *
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_DELSET, GFP_KERNEL);
@@ -7212,6 +7409,8 @@ static int nf_tables_commit(struct net *
}
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nf_tables_commit_release(net);
return 0;
@@ -8064,6 +8263,7 @@ static int __net_init nf_tables_init_net
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
nft_net->validate_state = NFT_VALIDATE_SKIP;
+ nft_net->gc_seq = 0;
return 0;
}
@@ -8090,10 +8290,16 @@ static void __net_exit nf_tables_exit_ne
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -8158,6 +8364,7 @@ static void __exit nf_tables_module_exit
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 142/159] netfilter: nf_tables: adapt set backend to use GC transaction API
[not found] <20231124171941.909624388@linuxfoundation.org>
` (7 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 141/159] netfilter: nf_tables: GC transaction API to avoid race with control plane Greg Kroah-Hartman
@ 2023-11-24 17:55 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 143/159] netfilter: nft_set_hash: mark set element as dead when deleting from packet path Greg Kroah-Hartman
` (16 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:55 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream.
Use the GC transaction API to replace the old and buggy gc API and the
busy mark approach.
No set elements are removed from async garbage collection anymore,
instead the _DEAD bit is set on so the set element is not visible from
lookup path anymore. Async GC enqueues transaction work that might be
aborted and retried later.
rbtree and pipapo set backends does not set on the _DEAD bit from the
sync GC path since this runs in control plane path where mutex is held.
In this case, set elements are deactivated, removed and then released
via RCU callback, sync GC never fails.
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_hash.c | 77 +++++++++++++++-------
net/netfilter/nft_set_rbtree.c | 142 +++++++++++++++++++++++++----------------
2 files changed, 142 insertions(+), 77 deletions(-)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -17,6 +17,9 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
+#include <net/netns/generic.h>
+
+extern unsigned int nf_tables_net_id;
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_RHASH_ELEMENT_HINT 3
@@ -59,6 +62,8 @@ static inline int nft_rhash_cmp(struct r
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -187,7 +192,6 @@ static void nft_rhash_activate(const str
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -195,12 +199,9 @@ static bool nft_rhash_flush(const struct
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -217,9 +218,8 @@ static void *nft_rhash_deactivate(const
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -295,49 +295,77 @@ cont:
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = net_generic(net, nf_tables_net_id);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
struct nft_expr *expr = nft_set_ext_expr(&he->ext);
if (expr->ops->gc &&
expr->ops->gc(read_pnet(&set->net), expr))
- goto gc;
+ goto needs_gc_run;
}
if (!nft_set_elem_expired(&he->ext))
continue;
-gc:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+needs_gc_run:
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, he);
}
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -400,7 +428,6 @@ static void nft_rhash_destroy(const stru
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -14,6 +14,9 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
+#include <net/netns/generic.h>
+
+extern unsigned int nf_tables_net_id;
struct nft_rbtree {
struct rb_root root;
@@ -46,6 +49,12 @@ static int nft_rbtree_cmp(const struct n
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +89,7 @@ static bool __nft_rbtree_lookup(const st
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +107,7 @@ static bool __nft_rbtree_lookup(const st
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -214,6 +223,18 @@ static void *nft_rbtree_get(const struct
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe,
@@ -221,11 +242,12 @@ static int nft_rbtree_gc_elem(const stru
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
+ struct net *net = read_pnet(&set->net);
struct nft_rbtree_elem *rbe_prev;
- struct nft_set_gc_batch *gcb;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
/* search for end interval coming before this element.
@@ -243,17 +265,28 @@ static int nft_rbtree_gc_elem(const stru
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_prev);
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe);
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -481,7 +514,6 @@ static void nft_rbtree_activate(const st
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -489,12 +521,9 @@ static bool nft_rbtree_flush(const struc
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -571,26 +600,40 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = net_generic(net, nf_tables_net_id);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -600,43 +643,38 @@ static void nft_rbtree_gc(struct work_st
rbe_end = rbe;
continue;
}
+
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
-
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 143/159] netfilter: nft_set_hash: mark set element as dead when deleting from packet path
[not found] <20231124171941.909624388@linuxfoundation.org>
` (8 preceding siblings ...)
2023-11-24 17:55 ` [PATCH 5.4 142/159] netfilter: nf_tables: adapt set backend to use GC transaction API Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 144/159] netfilter: nf_tables: remove busy mark and gc batch API Greg Kroah-Hartman
` (15 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit c92db3030492b8ad1d0faace7a93bbcf53850d0c upstream.
Set on the NFT_SET_ELEM_DEAD_BIT flag on this element, instead of
performing element removal which might race with an ongoing transaction.
Enable gc when dynamic flag is set on since dynset deletion requires
garbage collection after this patch.
Fixes: d0a8d877da97 ("netfilter: nft_dynset: support for element deletion")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_hash.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -251,7 +251,9 @@ static bool nft_rhash_delete(const struc
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -400,7 +402,7 @@ static int nft_rhash_init(const struct n
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 144/159] netfilter: nf_tables: remove busy mark and gc batch API
[not found] <20231124171941.909624388@linuxfoundation.org>
` (9 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 143/159] netfilter: nft_set_hash: mark set element as dead when deleting from packet path Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 145/159] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Greg Kroah-Hartman
` (14 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream.
Ditch it, it has been replace it by the GC transaction API and it has no
clients anymore.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 97 +-------------------------------------
net/netfilter/nf_tables_api.c | 28 ----------
2 files changed, 5 insertions(+), 120 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -672,62 +672,6 @@ void nft_set_elem_destroy(const struct n
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
-
struct nft_expr_ops;
/**
* struct nft_expr_type - nf_tables expression type
@@ -1354,47 +1298,12 @@ static inline void nft_set_elem_change_a
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
-
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
-#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
-#else
-#error
-#endif
-
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
-{
- unsigned long *word = (unsigned long *)ext;
-
- BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
-}
-
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
-{
- unsigned long *word = (unsigned long *)ext;
-
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
-}
-
-#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_DEAD_BIT 3
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4879,7 +4879,8 @@ static int nft_add_set_elem(struct nft_c
if (trans == NULL)
goto err4;
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
+
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
@@ -5172,31 +5173,6 @@ static int nf_tables_delsetelem(struct n
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
-
/*
* Stateful objects
*/
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 145/159] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path
[not found] <20231124171941.909624388@linuxfoundation.org>
` (10 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 144/159] netfilter: nf_tables: remove busy mark and gc batch API Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 146/159] netfilter: nf_tables: GC transaction race with netns dismantle Greg Kroah-Hartman
` (13 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream.
Netlink event path is missing a synchronization point with GC
transactions. Add GC sequence number update to netns release path and
netlink event path, any GC transaction losing race will be discarded.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7178,6 +7178,22 @@ static void nf_tables_commit_release(str
mutex_unlock(&nft_net->commit_mutex);
}
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+ unsigned int gc_seq;
+
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+ return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
@@ -7242,9 +7258,7 @@ static int nf_tables_commit(struct net *
while (++nft_net->base_seq == 0)
;
- /* Bump gc counter, it becomes odd, this is the busy mark. */
- gc_seq = READ_ONCE(nft_net->gc_seq);
- WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+ gc_seq = nft_gc_seq_begin(nft_net);
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -7386,7 +7400,7 @@ static int nf_tables_commit(struct net *
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
- WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+ nft_gc_seq_end(nft_net, gc_seq);
nf_tables_commit_release(net);
return 0;
@@ -8256,11 +8270,18 @@ static void __net_exit nf_tables_pre_exi
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nft_net->commit_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
__nft_release_tables(net);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 146/159] netfilter: nf_tables: GC transaction race with netns dismantle
[not found] <20231124171941.909624388@linuxfoundation.org>
` (11 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 145/159] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 147/159] netfilter: nf_tables: GC transaction race with abort path Greg Kroah-Hartman
` (12 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 02c6c24402bf1c1e986899c14ba22a10b510916b upstream.
Use maybe_get_net() since GC workqueue might race with netns exit path.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7052,9 +7052,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(
if (!trans)
return NULL;
+ trans->net = maybe_get_net(net);
+ if (!trans->net) {
+ kfree(trans);
+ return NULL;
+ }
+
refcount_inc(&set->refs);
trans->set = set;
- trans->net = get_net(net);
trans->seq = gc_seq;
return trans;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 147/159] netfilter: nf_tables: GC transaction race with abort path
[not found] <20231124171941.909624388@linuxfoundation.org>
` (12 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 146/159] netfilter: nf_tables: GC transaction race with netns dismantle Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 148/159] netfilter: nf_tables: use correct lock to protect gc_list Greg Kroah-Hartman
` (11 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream.
Abort path is missing a synchronization point with GC transactions. Add
GC sequence number hence any GC transaction losing race will be
discarded.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7597,7 +7597,12 @@ static int nf_tables_abort(struct net *n
enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- int ret = __nf_tables_abort(net, action);
+ unsigned int gc_seq;
+ int ret;
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 148/159] netfilter: nf_tables: use correct lock to protect gc_list
[not found] <20231124171941.909624388@linuxfoundation.org>
` (13 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 147/159] netfilter: nf_tables: GC transaction race with abort path Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 149/159] netfilter: nf_tables: defer gc run if previous batch is still pending Greg Kroah-Hartman
` (10 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e upstream.
Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to
protect the gc_list.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7028,9 +7028,9 @@ static void nft_trans_gc_work(struct wor
struct nft_trans_gc *trans, *next;
LIST_HEAD(trans_gc_list);
- spin_lock(&nf_tables_destroy_list_lock);
+ spin_lock(&nf_tables_gc_list_lock);
list_splice_init(&nf_tables_gc_list, &trans_gc_list);
- spin_unlock(&nf_tables_destroy_list_lock);
+ spin_unlock(&nf_tables_gc_list_lock);
list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
list_del(&trans->list);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 149/159] netfilter: nf_tables: defer gc run if previous batch is still pending
[not found] <20231124171941.909624388@linuxfoundation.org>
` (14 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 148/159] netfilter: nf_tables: use correct lock to protect gc_list Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 150/159] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction Greg Kroah-Hartman
` (9 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Florian Westphal <fw@strlen.de>
commit 8e51830e29e12670b4c10df070a4ea4c9593e961 upstream.
Don't queue more gc work, else we may queue the same elements multiple
times.
If an element is flagged as dead, this can mean that either the previous
gc request was invalidated/discarded by a transaction or that the previous
request is still pending in the system work queue.
The latter will happen if the gc interval is set to a very low value,
e.g. 1ms, and system work queue is backlogged.
The sets refcount is 1 if no previous gc requeusts are queued, so add
a helper for this and skip gc run if old requests are pending.
Add a helper for this and skip the gc run in this case.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 5 +++++
net/netfilter/nft_set_hash.c | 3 +++
net/netfilter/nft_set_rbtree.c | 3 +++
3 files changed, 11 insertions(+)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -467,6 +467,11 @@ static inline void *nft_set_priv(const s
return (void *)set->data;
}
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+ return refcount_read(&s->refs) != 1;
+}
+
static inline struct nft_set *nft_set_container_of(const void *priv)
{
return (void *)priv - offsetof(struct nft_set, data);
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -312,6 +312,9 @@ static void nft_rhash_gc(struct work_str
nft_net = net_generic(net, nf_tables_net_id);
gc_seq = READ_ONCE(nft_net->gc_seq);
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
if (!gc)
goto done;
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -615,6 +615,9 @@ static void nft_rbtree_gc(struct work_st
nft_net = net_generic(net, nf_tables_net_id);
gc_seq = READ_ONCE(nft_net->gc_seq);
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
if (!gc)
goto done;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 150/159] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction
[not found] <20231124171941.909624388@linuxfoundation.org>
` (15 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 149/159] netfilter: nf_tables: defer gc run if previous batch is still pending Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 151/159] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention Greg Kroah-Hartman
` (8 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 2ee52ae94baabf7ee09cf2a8d854b990dac5d0e4 upstream.
New elements in this transaction might expired before such transaction
ends. Skip sync GC for such elements otherwise commit path might walk
over an already released object. Once transaction is finished, async GC
will collect such expired element.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_rbtree.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -314,6 +314,7 @@ static int __nft_rbtree_insert(const str
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
int d, err;
@@ -359,8 +360,11 @@ static int __nft_rbtree_insert(const str
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
- /* perform garbage collection to avoid bogus overlap reports. */
- if (nft_set_elem_expired(&rbe->ext)) {
+ /* perform garbage collection to avoid bogus overlap reports
+ * but skip new elements in this transaction.
+ */
+ if (nft_set_elem_expired(&rbe->ext) &&
+ nft_set_elem_active(&rbe->ext, cur_genmask)) {
err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 151/159] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention
[not found] <20231124171941.909624388@linuxfoundation.org>
` (16 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 150/159] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 152/159] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Greg Kroah-Hartman
` (7 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 96b33300fba880ec0eafcf3d82486f3463b4b6da upstream.
rbtree GC does not modify the datastructure, instead it collects expired
elements and it enqueues a GC transaction. Use a read spinlock instead
to avoid data contention while GC worker is running.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_rbtree.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -626,8 +626,7 @@ static void nft_rbtree_gc(struct work_st
if (!gc)
goto done;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
/* Ruleset has been updated, try later. */
@@ -676,8 +675,7 @@ dead_elem:
}
try_later:
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ read_unlock_bh(&priv->lock);
if (gc)
nft_trans_gc_queue_async_done(gc);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 152/159] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration
[not found] <20231124171941.909624388@linuxfoundation.org>
` (17 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 151/159] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 153/159] netfilter: nf_tables: fix memleak when more than 255 elements expired Greg Kroah-Hartman
` (6 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit b079155faae94e9b3ab9337e82100a914ebb4e8d upstream.
Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC
might collect the same element more than once.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nft_set_hash.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -324,12 +324,9 @@ static void nft_rhash_gc(struct work_str
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
- continue;
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
}
/* Ruleset has been updated, try later. */
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 153/159] netfilter: nf_tables: fix memleak when more than 255 elements expired
[not found] <20231124171941.909624388@linuxfoundation.org>
` (18 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 152/159] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 154/159] netfilter: nf_tables: unregister flowtable hooks on netns exit Greg Kroah-Hartman
` (5 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream.
When more than 255 elements expired we're supposed to switch to a new gc
container structure.
This never happens: u8 type will wrap before reaching the boundary
and nft_trans_gc_space() always returns true.
This means we recycle the initial gc container structure and
lose track of the elements that came before.
While at it, don't deref 'gc' after we've passed it to call_rcu.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 2 +-
net/netfilter/nf_tables_api.c | 10 ++++++++--
2 files changed, 9 insertions(+), 3 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1440,7 +1440,7 @@ struct nft_trans_gc {
struct net *net;
struct nft_set *set;
u32 seq;
- u8 count;
+ u16 count;
void *priv[NFT_TRANS_GC_BATCHCOUNT];
struct rcu_head rcu;
};
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7089,12 +7089,15 @@ static int nft_trans_gc_space(struct nft
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
+ struct nft_set *set;
+
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
nft_trans_gc_queue_work(gc);
- return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+ return nft_trans_gc_alloc(set, gc_seq, gfp);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async);
@@ -7111,15 +7114,18 @@ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_asy
struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
{
+ struct nft_set *set;
+
if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
return NULL;
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
call_rcu(&gc->rcu, nft_trans_gc_trans_free);
- return nft_trans_gc_alloc(gc->set, 0, gfp);
+ return nft_trans_gc_alloc(set, 0, gfp);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 154/159] netfilter: nf_tables: unregister flowtable hooks on netns exit
[not found] <20231124171941.909624388@linuxfoundation.org>
` (19 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 153/159] netfilter: nf_tables: fix memleak when more than 255 elements expired Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 155/159] netfilter: nf_tables: double hook unregistration in netns path Greg Kroah-Hartman
` (4 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, syzbot+e918523f77e62790d6d9,
Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 6069da443bf65f513bb507bb21e2f87cfb1ad0b6 upstream.
Unregister flowtable hooks before they are releases via
nf_tables_flowtable_destroy() otherwise hook core reports UAF.
BUG: KASAN: use-after-free in nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142
Read of size 4 at addr ffff8880736f7438 by task syz-executor579/3666
CPU: 0 PID: 3666 Comm: syz-executor579 Not tainted 5.16.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
__dump_stack lib/dump_stack.c:88 [inline] lib/dump_stack.c:106
dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 lib/dump_stack.c:106
print_address_description+0x65/0x380 mm/kasan/report.c:247 mm/kasan/report.c:247
__kasan_report mm/kasan/report.c:433 [inline]
__kasan_report mm/kasan/report.c:433 [inline] mm/kasan/report.c:450
kasan_report+0x19a/0x1f0 mm/kasan/report.c:450 mm/kasan/report.c:450
nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142
__nf_register_net_hook+0x27e/0x8d0 net/netfilter/core.c:429 net/netfilter/core.c:429
nf_register_net_hook+0xaa/0x180 net/netfilter/core.c:571 net/netfilter/core.c:571
nft_register_flowtable_net_hooks+0x3c5/0x730 net/netfilter/nf_tables_api.c:7232 net/netfilter/nf_tables_api.c:7232
nf_tables_newflowtable+0x2022/0x2cf0 net/netfilter/nf_tables_api.c:7430 net/netfilter/nf_tables_api.c:7430
nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline]
nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline]
nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] net/netfilter/nfnetlink.c:652
nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] net/netfilter/nfnetlink.c:652
nfnetlink_rcv+0x10e6/0x2550 net/netfilter/nfnetlink.c:652 net/netfilter/nfnetlink.c:652
__nft_release_hook() calls nft_unregister_flowtable_net_hooks() which
only unregisters the hooks, then after RCU grace period, it is
guaranteed that no packets add new entries to the flowtable (no flow
offload rules and flowtable hooks are reachable from packet path), so it
is safe to call nf_flow_table_free() which cleans up the remaining
entries from the flowtable (both software and hardware) and it unbinds
the flow_block.
Fixes: ff4bf2f42a40 ("netfilter: nf_tables: add nft_unregister_flowtable_hook()")
Reported-by: syzbot+e918523f77e62790d6d9@syzkaller.appspotmail.com
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8186,16 +8186,24 @@ int __nft_release_basechain(struct nft_c
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
+static void __nft_release_hook(struct net *net, struct nft_table *table)
+{
+ struct nft_flowtable *flowtable;
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nft_unregister_flowtable_net_hooks(net, flowtable);
+}
+
static void __nft_release_hooks(struct net *net)
{
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_table *table;
- struct nft_chain *chain;
- list_for_each_entry(table, &nft_net->tables, list) {
- list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
- }
+ list_for_each_entry(table, &nft_net->tables, list)
+ __nft_release_hook(net, table);
}
static void __nft_release_table(struct net *net, struct nft_table *table)
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 155/159] netfilter: nf_tables: double hook unregistration in netns path
[not found] <20231124171941.909624388@linuxfoundation.org>
` (20 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 154/159] netfilter: nf_tables: unregister flowtable hooks on netns exit Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 156/159] netfilter: nftables: update table flags from the commit phase Greg Kroah-Hartman
` (3 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream.
__nft_release_hooks() is called from pre_netns exit path which
unregisters the hooks, then the NETDEV_UNREGISTER event is triggered
which unregisters the hooks again.
[ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270
[...]
[ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27
[ 565.253682] Workqueue: netns cleanup_net
[ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270
[...]
[ 565.297120] Call Trace:
[ 565.300900] <TASK>
[ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables]
[ 565.308518] raw_notifier_call_chain+0x63/0x80
[ 565.312386] unregister_netdevice_many+0x54f/0xb50
Unregister and destroy netdev hook from netns pre_exit via kfree_rcu
so the NETDEV_UNREGISTER path see unregistered hooks.
Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 34 +++++++++++++++++++++++++++-------
net/netfilter/nft_chain_filter.c | 3 +++
2 files changed, 30 insertions(+), 7 deletions(-)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -219,9 +219,10 @@ static int nf_tables_register_hook(struc
return nf_register_net_hook(net, ops);
}
-static void nf_tables_unregister_hook(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain)
+static void __nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain,
+ bool release_netdev)
{
const struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
@@ -236,6 +237,16 @@ static void nf_tables_unregister_hook(st
return basechain->type->ops_unregister(net, ops);
nf_unregister_net_hook(net, ops);
+ if (release_netdev &&
+ table->family == NFPROTO_NETDEV)
+ nft_base_chain(chain)->ops.dev = NULL;
+}
+
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
+{
+ __nf_tables_unregister_hook(net, table, chain, false);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -5997,8 +6008,9 @@ nft_flowtable_type_get(struct net *net,
return ERR_PTR(-ENOENT);
}
-static void nft_unregister_flowtable_net_hooks(struct net *net,
- struct nft_flowtable *flowtable)
+static void __nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable,
+ bool release_netdev)
{
int i;
@@ -6007,9 +6019,17 @@ static void nft_unregister_flowtable_net
continue;
nf_unregister_net_hook(net, &flowtable->ops[i]);
+ if (release_netdev)
+ flowtable->ops[i].dev = NULL;
}
}
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable)
+{
+ __nft_unregister_flowtable_net_hooks(net, flowtable, false);
+}
+
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -8192,9 +8212,9 @@ static void __nft_release_hook(struct ne
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
+ __nf_tables_unregister_hook(net, table, chain, true);
list_for_each_entry(flowtable, &table->flowtables, list)
- nft_unregister_flowtable_net_hooks(net, flowtable);
+ __nft_unregister_flowtable_net_hooks(net, flowtable, true);
}
static void __nft_release_hooks(struct net *net)
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -296,6 +296,9 @@ static void nft_netdev_event(unsigned lo
if (strcmp(basechain->dev_name, dev->name) != 0)
return;
+ if (!basechain->ops.dev)
+ return;
+
/* UNREGISTER events are also happpening on netns exit.
*
* Altough nf_tables core releases all tables/chains, only
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 156/159] netfilter: nftables: update table flags from the commit phase
[not found] <20231124171941.909624388@linuxfoundation.org>
` (21 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 155/159] netfilter: nf_tables: double hook unregistration in netns path Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 157/159] netfilter: nf_tables: fix table flag updates Greg Kroah-Hartman
` (2 subsequent siblings)
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 0ce7cf4127f14078ca598ba9700d813178a59409 upstream.
Do not update table flags from the preparation phase. Store the flags
update into the transaction, then update the flags from the commit
phase.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 9 ++++++---
net/netfilter/nf_tables_api.c | 31 ++++++++++++++++---------------
2 files changed, 22 insertions(+), 18 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1392,13 +1392,16 @@ struct nft_trans_chain {
struct nft_trans_table {
bool update;
- bool enable;
+ u8 state;
+ u32 flags;
};
#define nft_trans_table_update(trans) \
(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans) \
- (((struct nft_trans_table *)trans->data)->enable)
+#define nft_trans_table_state(trans) \
+ (((struct nft_trans_table *)trans->data)->state)
+#define nft_trans_table_flags(trans) \
+ (((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem {
struct nft_set *set;
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -893,6 +893,12 @@ static void nf_tables_table_disable(stru
nft_table_disable(net, table, 0);
}
+enum {
+ NFT_TABLE_STATE_UNCHANGED = 0,
+ NFT_TABLE_STATE_DORMANT,
+ NFT_TABLE_STATE_WAKEUP
+};
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -916,19 +922,17 @@ static int nf_tables_updtable(struct nft
if ((flags & NFT_TABLE_F_DORMANT) &&
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
- nft_trans_table_enable(trans) = false;
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret >= 0)
- nft_trans_table_enable(trans) = true;
- else
- ctx->table->flags |= NFT_TABLE_F_DORMANT;
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
}
if (ret < 0)
goto err;
+ nft_trans_table_flags(trans) = flags;
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
@@ -7298,11 +7302,10 @@ static int nf_tables_commit(struct net *
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!nft_trans_table_enable(trans)) {
- nf_tables_table_disable(net,
- trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- }
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+ nf_tables_table_disable(net, trans->ctx.table);
+
+ trans->ctx.table->flags = nft_trans_table_flags(trans);
} else {
nft_clear(net, trans->ctx.table);
}
@@ -7497,11 +7500,9 @@ static int __nf_tables_abort(struct net
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (nft_trans_table_enable(trans)) {
- nf_tables_table_disable(net,
- trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- }
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+ nf_tables_table_disable(net, trans->ctx.table);
+
nft_trans_destroy(trans);
} else {
list_del_rcu(&trans->ctx.table->list);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 157/159] netfilter: nf_tables: fix table flag updates
[not found] <20231124171941.909624388@linuxfoundation.org>
` (22 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 156/159] netfilter: nftables: update table flags from the commit phase Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 158/159] netfilter: nf_tables: disable toggling dormant table state more than once Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 159/159] netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 5.4) Greg Kroah-Hartman
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, syzbot+7ad5cd1615f2d89c6e7e,
Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit 179d9ba5559a756f4322583388b3213fe4e391b0 upstream.
The dormant flag need to be updated from the preparation phase,
otherwise, two consecutive requests to dorm a table in the same batch
might try to remove the same hooks twice, resulting in the following
warning:
hook not found, pf 3 num 0
WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480
Modules linked in:
CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: netns cleanup_net
RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480
This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables:
update table flags from the commit phase") to restore the previous
behaviour.
However, there is still another problem: A batch containing a series of
dorm-wakeup-dorm table and vice-versa also trigger the warning above
since hook unregistration happens from the preparation phase, while hook
registration occurs from the commit phase.
To fix this problem, this patch adds two internal flags to annotate the
original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and
__NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path.
The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update
with one single transaction.
Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com
Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_tables.h | 6 ---
include/uapi/linux/netfilter/nf_tables.h | 1
net/netfilter/nf_tables_api.c | 59 +++++++++++++++++++++----------
3 files changed, 41 insertions(+), 25 deletions(-)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1392,16 +1392,10 @@ struct nft_trans_chain {
struct nft_trans_table {
bool update;
- u8 state;
- u32 flags;
};
#define nft_trans_table_update(trans) \
(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_state(trans) \
- (((struct nft_trans_table *)trans->data)->state)
-#define nft_trans_table_flags(trans) \
- (((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem {
struct nft_set *set;
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -162,6 +162,7 @@ enum nft_hook_attributes {
enum nft_table_flags {
NFT_TABLE_F_DORMANT = 0x1,
};
+#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT)
/**
* enum nft_table_attributes - nf_tables table netlink attributes
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -701,7 +701,8 @@ static int nf_tables_fill_table_info(str
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
- nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+ nla_put_be32(skb, NFTA_TABLE_FLAGS,
+ htonl(table->flags & NFT_TABLE_F_MASK)) ||
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
NFTA_TABLE_PAD))
@@ -890,20 +891,22 @@ err:
static void nf_tables_table_disable(struct net *net, struct nft_table *table)
{
+ table->flags &= ~NFT_TABLE_F_DORMANT;
nft_table_disable(net, table, 0);
+ table->flags |= NFT_TABLE_F_DORMANT;
}
-enum {
- NFT_TABLE_STATE_UNCHANGED = 0,
- NFT_TABLE_STATE_DORMANT,
- NFT_TABLE_STATE_WAKEUP
-};
+#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
+#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
+#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
+ __NFT_TABLE_F_WAS_AWAKEN)
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
u32 flags;
- int ret = 0;
+ int ret;
if (!ctx->nla[NFTA_TABLE_FLAGS])
return 0;
@@ -922,21 +925,27 @@ static int nf_tables_updtable(struct nft
if ((flags & NFT_TABLE_F_DORMANT) &&
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
- nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
+ if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
+ ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->net, ctx->table);
- if (ret >= 0)
- nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
+ ret = nf_tables_table_enable(ctx->net, ctx->table);
+ if (ret < 0)
+ goto err_register_hooks;
+
+ ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
+ }
}
- if (ret < 0)
- goto err;
- nft_trans_table_flags(trans) = flags;
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
+
return 0;
-err:
+
+err_register_hooks:
nft_trans_destroy(trans);
return ret;
}
@@ -7302,10 +7311,14 @@ static int nf_tables_commit(struct net *
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+ if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ nft_trans_destroy(trans);
+ break;
+ }
+ if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
nf_tables_table_disable(net, trans->ctx.table);
- trans->ctx.table->flags = nft_trans_table_flags(trans);
+ trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
} else {
nft_clear(net, trans->ctx.table);
}
@@ -7500,9 +7513,17 @@ static int __nf_tables_abort(struct net
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+ if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ nft_trans_destroy(trans);
+ break;
+ }
+ if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
nf_tables_table_disable(net, trans->ctx.table);
-
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+ trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+ }
+ trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
list_del_rcu(&trans->ctx.table->list);
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 158/159] netfilter: nf_tables: disable toggling dormant table state more than once
[not found] <20231124171941.909624388@linuxfoundation.org>
` (23 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 157/159] netfilter: nf_tables: fix table flag updates Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 159/159] netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 5.4) Greg Kroah-Hartman
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel
Cc: Greg Kroah-Hartman, patches, Lee, Cherie-Anne,
Bing-Jhong Billy Jheng, info, Florian Westphal
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 upstream.
nft -f -<<EOF
add table ip t
add table ip t { flags dormant; }
add chain ip t c { type filter hook input priority 0; }
add table ip t
EOF
Triggers a splat from nf core on next table delete because we lose
track of right hook register state:
WARNING: CPU: 2 PID: 1597 at net/netfilter/core.c:501 __nf_unregister_net_hook
RIP: 0010:__nf_unregister_net_hook+0x41b/0x570
nf_unregister_net_hook+0xb4/0xf0
__nf_tables_unregister_hook+0x160/0x1d0
[..]
The above should have table in *active* state, but in fact no
hooks were registered.
Reject on/off/on games rather than attempting to fix this.
Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
Reported-by: "Lee, Cherie-Anne" <cherie.lee@starlabs.sg>
Cc: Bing-Jhong Billy Jheng <billy@starlabs.sg>
Cc: info@starlabs.sg
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -918,6 +918,10 @@ static int nf_tables_updtable(struct nft
if (flags == ctx->table->flags)
return 0;
+ /* No dormant off/on/off/on games in single transaction */
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 5.4 159/159] netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 5.4)
[not found] <20231124171941.909624388@linuxfoundation.org>
` (24 preceding siblings ...)
2023-11-24 17:56 ` [PATCH 5.4 158/159] netfilter: nf_tables: disable toggling dormant table state more than once Greg Kroah-Hartman
@ 2023-11-24 17:56 ` Greg Kroah-Hartman
25 siblings, 0 replies; 26+ messages in thread
From: Greg Kroah-Hartman @ 2023-11-24 17:56 UTC (permalink / raw)
To: stable, netfilter-devel; +Cc: Greg Kroah-Hartman, patches, Pablo Neira Ayuso
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
3f0465a9ef02 ("netfilter: nf_tables: dynamically allocate hooks per
net_device in flowtables") reworks flowtable support to allow for
dynamic allocation of hooks, which implicitly fixes the following
bogus EBUSY in transaction:
delete flowtable
add flowtable # same flowtable with same devices, it hits EBUSY
This patch does not exist in any tree, but it fixes this issue for
-stable Linux kernel 5.4
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/netfilter/nf_tables_api.c | 3 +++
1 file changed, 3 insertions(+)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6132,6 +6132,9 @@ static int nf_tables_newflowtable(struct
continue;
list_for_each_entry(ft, &table->flowtables, list) {
+ if (!nft_is_active_next(net, ft))
+ continue;
+
for (k = 0; k < ft->ops_len; k++) {
if (!ft->ops[k].dev)
continue;
^ permalink raw reply [flat|nested] 26+ messages in thread
end of thread, other threads:[~2023-11-24 19:28 UTC | newest]
Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20231124171941.909624388@linuxfoundation.org>
2023-11-24 17:55 ` [PATCH 5.4 134/159] netfilter: nf_tables: pass context to nft_set_destroy() Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 135/159] netfilter: nftables: rename set element data activation/deactivation functions Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 136/159] netfilter: nf_tables: drop map element references from preparation phase Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 137/159] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 138/159] netfilter: nft_set_rbtree: fix null deref on element insertion Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 139/159] netfilter: nft_set_rbtree: fix overlap expiration walk Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 140/159] netfilter: nf_tables: dont skip expired elements during walk Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 141/159] netfilter: nf_tables: GC transaction API to avoid race with control plane Greg Kroah-Hartman
2023-11-24 17:55 ` [PATCH 5.4 142/159] netfilter: nf_tables: adapt set backend to use GC transaction API Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 143/159] netfilter: nft_set_hash: mark set element as dead when deleting from packet path Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 144/159] netfilter: nf_tables: remove busy mark and gc batch API Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 145/159] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 146/159] netfilter: nf_tables: GC transaction race with netns dismantle Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 147/159] netfilter: nf_tables: GC transaction race with abort path Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 148/159] netfilter: nf_tables: use correct lock to protect gc_list Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 149/159] netfilter: nf_tables: defer gc run if previous batch is still pending Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 150/159] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 151/159] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 152/159] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 153/159] netfilter: nf_tables: fix memleak when more than 255 elements expired Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 154/159] netfilter: nf_tables: unregister flowtable hooks on netns exit Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 155/159] netfilter: nf_tables: double hook unregistration in netns path Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 156/159] netfilter: nftables: update table flags from the commit phase Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 157/159] netfilter: nf_tables: fix table flag updates Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 158/159] netfilter: nf_tables: disable toggling dormant table state more than once Greg Kroah-Hartman
2023-11-24 17:56 ` [PATCH 5.4 159/159] netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 5.4) Greg Kroah-Hartman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).