From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
Subject: [PATCH -stable,4.19.x 03/40] netfilter: nf_tables: drop map element references from preparation phase
Date: Thu, 13 Jun 2024 03:01:32 +0200 [thread overview]
Message-ID: <20240613010209.104423-4-pablo@netfilter.org> (raw)
In-Reply-To: <20240613010209.104423-1-pablo@netfilter.org>
[ Upstream commit 628bd3e49cba1c066228e23d71a852c23e26da73 ]
set .destroy callback releases the references to other objects in maps.
This is very late and it results in spurious EBUSY errors. Drop refcount
from the preparation phase instead, update set backend not to drop
reference counter from set .destroy path.
Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the
reference counter because the transaction abort path releases the map
references for each element since the set is unbound. The abort path
also deals with releasing reference counter for new elements added to
unbound sets.
Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 5 +-
net/netfilter/nf_tables_api.c | 89 +++++++++++++++++++++++++++----
net/netfilter/nft_set_bitmap.c | 5 +-
net/netfilter/nft_set_hash.c | 23 ++++++--
net/netfilter/nft_set_rbtree.c | 5 +-
5 files changed, 108 insertions(+), 19 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index fd85286482c1..cff8c7141276 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -349,7 +349,8 @@ struct nft_set_ops {
int (*init)(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
- void (*destroy)(const struct nft_set *set);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
@@ -645,6 +646,8 @@ void *nft_set_elem_init(const struct nft_set *set,
u64 timeout, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem);
/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index acd2763bb784..a9dfe3183565 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -388,6 +388,31 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return 0;
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -396,6 +421,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
nft_use_dec(&ctx->table->use);
@@ -3741,7 +3769,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
return 0;
err4:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err3:
kfree(set->name);
err2:
@@ -3758,7 +3786,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
if (WARN_ON(set->use > 0))
return;
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
kvfree(set);
@@ -3883,10 +3911,39 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
nft_use_inc_restore(&set->use);
}
@@ -3907,13 +3964,20 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
nft_use_dec(&set->use);
break;
case NFT_TRANS_PREPARE:
- if (nft_set_is_anonymous(set))
- nft_deactivate_next(ctx->net, set);
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+ nft_deactivate_next(ctx->net, set);
+ }
nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_use_dec(&set->use);
/* fall through */
default:
@@ -4473,6 +4537,7 @@ void *nft_set_elem_init(const struct nft_set *set,
return elem;
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -4501,11 +4566,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -4513,6 +4578,7 @@ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
kfree(elem);
}
+EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy);
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
@@ -6940,6 +7006,8 @@ static int __nf_tables_abort(struct net *net)
case NFT_MSG_DELSET:
nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -7604,6 +7672,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
nft_use_dec(&table->use);
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index f866bd41e5d2..80dce1182215 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -273,13 +273,14 @@ static int nft_bitmap_init(const struct nft_set *set,
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b8510a4185d..f2be05731740 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -379,19 +379,31 @@ static int nft_rhash_init(const struct nft_set *set,
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -629,7 +641,8 @@ static int nft_hash_init(const struct nft_set *set,
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -639,7 +652,7 @@ static void nft_hash_destroy(const struct nft_set *set)
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9c7ec2ec1fcf..60ef5dea89fa 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -466,7 +466,8 @@ static int nft_rbtree_init(const struct nft_set *set,
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -477,7 +478,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
--
2.30.2
next prev parent reply other threads:[~2024-06-13 1:02 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-13 1:01 [PATCH -stable,4.19.x 00/40] Netfilter fixes for -stable Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 01/40] netfilter: nf_tables: pass context to nft_set_destroy() Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 02/40] netfilter: nftables: rename set element data activation/deactivation functions Pablo Neira Ayuso
2024-06-13 1:01 ` Pablo Neira Ayuso [this message]
2024-06-13 1:01 ` [PATCH -stable,4.19.x 04/40] netfilter: nft_set_rbtree: allow loose matching of closing element in interval Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 05/40] netfilter: nft_set_rbtree: Add missing expired checks Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 06/40] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 07/40] netfilter: nft_set_rbtree: fix null deref on element insertion Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 08/40] netfilter: nft_set_rbtree: fix overlap expiration walk Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 09/40] netfilter: nf_tables: don't skip expired elements during walk Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 10/40] netfilter: nf_tables: GC transaction API to avoid race with control plane Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 11/40] netfilter: nf_tables: adapt set backend to use GC transaction API Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 12/40] netfilter: nf_tables: remove busy mark and gc batch API Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 13/40] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 14/40] netfilter: nf_tables: GC transaction race with netns dismantle Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 15/40] netfilter: nf_tables: GC transaction race with abort path Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 16/40] netfilter: nf_tables: defer gc run if previous batch is still pending Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 17/40] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 18/40] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 19/40] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 20/40] netfilter: nf_tables: fix memleak when more than 255 elements expired Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 21/40] netfilter: nf_tables: unregister flowtable hooks on netns exit Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 22/40] netfilter: nf_tables: double hook unregistration in netns path Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 23/40] netfilter: nftables: update table flags from the commit phase Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 24/40] netfilter: nf_tables: fix table flag updates Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 25/40] netfilter: nf_tables: disable toggling dormant table state more than once Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 26/40] netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 4.19) Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 27/40] netfilter: nft_dynset: fix timeouts later than 23 days Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 28/40] netfilter: nftables: exthdr: fix 4-byte stack OOB write Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 29/40] netfilter: nft_dynset: report EOPNOTSUPP on missing set feature Pablo Neira Ayuso
2024-06-13 1:01 ` [PATCH -stable,4.19.x 30/40] netfilter: nft_dynset: relax superfluous check on set updates Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 31/40] netfilter: nf_tables: mark newset as dead on transaction abort Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 32/40] netfilter: nf_tables: skip dead set elements in netlink dump Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 33/40] netfilter: nf_tables: validate NFPROTO_* family Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 34/40] netfilter: nft_set_rbtree: skip end interval element from gc Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 35/40] netfilter: nf_tables: set dormant flag on hook register failure Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 36/40] netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate() Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 37/40] netfilter: nf_tables: do not compare internal table flags on updates Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 38/40] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 39/40] netfilter: nf_tables: reject new basechain after table flag update Pablo Neira Ayuso
2024-06-13 1:02 ` [PATCH -stable,4.19.x 40/40] netfilter: nf_tables: discard table flag update with pending basechain deletion Pablo Neira Ayuso
2024-06-13 6:43 ` [PATCH -stable,4.19.x 00/40] Netfilter fixes for -stable Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240613010209.104423-4-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=gregkh@linuxfoundation.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).