* [PATCH -stable,6.1.x 2/3] netfilter: nf_tables: allow clone callbacks to sleep
2024-08-12 10:23 [PATCH -stable,6.1.x 0/3] Netfilter fixes for -stable Pablo Neira Ayuso
2024-08-12 10:23 ` [PATCH -stable,6.1.x 1/3] netfilter: nf_tables: bail out if stateful expression provides no .clone Pablo Neira Ayuso
@ 2024-08-12 10:23 ` Pablo Neira Ayuso
2024-08-12 10:23 ` [PATCH -stable,6.1.x 3/3] netfilter: nf_tables: prefer nft_chain_validate Pablo Neira Ayuso
2024-08-12 15:01 ` [PATCH -stable,6.1.x 0/3] Netfilter fixes for -stable Greg KH
3 siblings, 0 replies; 7+ messages in thread
From: Pablo Neira Ayuso @ 2024-08-12 10:23 UTC (permalink / raw)
To: netfilter-devel; +Cc: gregkh, sashal, stable
From: Florian Westphal <fw@strlen.de>
commit fa23e0d4b756d25829e124d6b670a4c6bbd4bf7e upstream.
Sven Auhagen reports transaction failures with following error:
./main.nft:13:1-26: Error: Could not process rule: Cannot allocate memory
percpu: allocation failed, size=16 align=8 atomic=1, atomic alloc failed, no space left
This points to failing pcpu allocation with GFP_ATOMIC flag.
However, transactions happen from user context and are allowed to sleep.
One case where we can call into percpu allocator with GFP_ATOMIC is
nft_counter expression.
Normally this happens from control plane, so this could use GFP_KERNEL
instead. But one use case, element insertion from packet path,
needs to use GFP_ATOMIC allocations (nft_dynset expression).
At this time, .clone callbacks always use GFP_ATOMIC for this reason.
Add gfp_t argument to the .clone function and pass GFP_KERNEL or
GFP_ATOMIC flag depending on context, this allows all clone memory
allocations to sleep for the normal (transaction) case.
Cc: Sven Auhagen <sven.auhagen@voleatech.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 4 ++--
net/netfilter/nf_tables_api.c | 8 ++++----
net/netfilter/nft_connlimit.c | 4 ++--
net/netfilter/nft_counter.c | 4 ++--
net/netfilter/nft_dynset.c | 2 +-
net/netfilter/nft_last.c | 4 ++--
net/netfilter/nft_limit.c | 14 ++++++++------
net/netfilter/nft_quota.c | 4 ++--
8 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9a80d0251d8f..e365302fed95 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -387,7 +387,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
return (void *)expr->data;
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
@@ -889,7 +889,7 @@ struct nft_expr_ops {
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
int (*clone)(struct nft_expr *dst,
- const struct nft_expr *src);
+ const struct nft_expr *src, gfp_t gfp);
unsigned int size;
int (*init)(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 85d7250a29a2..8f8470ba6c37 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3119,7 +3119,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
return ERR_PTR(err);
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp)
{
int err;
@@ -3127,7 +3127,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
return -EINVAL;
dst->ops = src->ops;
- err = src->ops->clone(dst, src);
+ err = src->ops->clone(dst, src, gfp);
if (err < 0)
return err;
@@ -6059,7 +6059,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
if (!expr)
goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]);
+ err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT);
if (err < 0) {
kfree(expr);
goto err_expr;
@@ -6098,7 +6098,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- err = nft_expr_clone(expr, expr_array[i]);
+ err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT);
if (err < 0)
goto err_elem_expr_setup;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index d657f999a11b..793994622b87 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -209,12 +209,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx,
nft_connlimit_do_destroy(ctx, priv);
}
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f4d3573e8782..b5fe7fe4b60d 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -225,7 +225,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
@@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index a470e5f61284..953aba871f45 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+ if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0)
return -1;
elem_expr->size += priv->expr_array[i]->ops->size;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index eaa54964cf23..2ff387b649cf 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -101,12 +101,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
kfree(priv->last);
}
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 36ded7d43262..a263596da99a 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx,
}
static int nft_limit_clone(struct nft_limit_priv *priv_dst,
- const struct nft_limit_priv *priv_src)
+ const struct nft_limit_priv *priv_src, gfp_t gfp)
{
priv_dst->tokens_max = priv_src->tokens_max;
priv_dst->rate = priv_src->rate;
@@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp);
if (!priv_dst->limit)
return -ENOMEM;
@@ -222,14 +222,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, &priv->limit);
}
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp);
}
static struct nft_expr_type nft_limit_type;
@@ -280,12 +281,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, priv);
}
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src);
+ return nft_limit_clone(priv_dst, priv_src, gfp);
}
static const struct nft_expr_ops nft_limit_bytes_ops = {
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 410a5fcf8830..ef8e7cdbd0e6 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -232,7 +232,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
return nft_quota_do_destroy(ctx, priv);
}
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
struct nft_quota *priv_src = nft_expr_priv(src);
@@ -240,7 +240,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
priv_dst->quota = priv_src->quota;
priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp);
if (!priv_dst->consumed)
return -ENOMEM;
--
2.30.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH -stable,6.1.x 3/3] netfilter: nf_tables: prefer nft_chain_validate
2024-08-12 10:23 [PATCH -stable,6.1.x 0/3] Netfilter fixes for -stable Pablo Neira Ayuso
2024-08-12 10:23 ` [PATCH -stable,6.1.x 1/3] netfilter: nf_tables: bail out if stateful expression provides no .clone Pablo Neira Ayuso
2024-08-12 10:23 ` [PATCH -stable,6.1.x 2/3] netfilter: nf_tables: allow clone callbacks to sleep Pablo Neira Ayuso
@ 2024-08-12 10:23 ` Pablo Neira Ayuso
2024-08-12 15:01 ` [PATCH -stable,6.1.x 0/3] Netfilter fixes for -stable Greg KH
3 siblings, 0 replies; 7+ messages in thread
From: Pablo Neira Ayuso @ 2024-08-12 10:23 UTC (permalink / raw)
To: netfilter-devel; +Cc: gregkh, sashal, stable
From: Florian Westphal <fw@strlen.de>
commit cff3bd012a9512ac5ed858d38e6ed65f6391008c upstream.
nft_chain_validate already performs loop detection because a cycle will
result in a call stack overflow (ctx->level >= NFT_JUMP_STACK_SIZE).
It also follows maps via ->validate callback in nft_lookup, so there
appears no reason to iterate the maps again.
nf_tables_check_loops() and all its helper functions can be removed.
This improves ruleset load time significantly, from 23s down to 12s.
This also fixes a crash bug. Old loop detection code can result in
unbounded recursion:
BUG: TASK stack guard page was hit at ....
Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN
CPU: 4 PID: 1539 Comm: nft Not tainted 6.10.0-rc5+ #1
[..]
with a suitable ruleset during validation of register stores.
I can't see any actual reason to attempt to check for this from
nft_validate_register_store(), at this point the transaction is still in
progress, so we don't have a full picture of the rule graph.
For nf-next it might make sense to either remove it or make this depend
on table->validate_state in case we could catch an error earlier
(for improved error reporting to userspace).
Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_tables_api.c | 151 +++-------------------------------
1 file changed, 13 insertions(+), 138 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8f8470ba6c37..10180d280e79 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3515,6 +3515,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3533,6 +3542,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -10190,143 +10202,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -10439,7 +10314,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 7+ messages in thread