* [PATCH nf-next v2 1/5] netfilter: nf_tables: prefer nft_trans_elem_alloc helper
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
@ 2024-10-11 0:32 ` Florian Westphal
2024-10-11 0:33 ` [PATCH nf-next v2 2/5] netfilter: nf_tables: add nft_trans_commit_list_add_elem helper Florian Westphal
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2024-10-11 0:32 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Reduce references to sizeof(struct nft_trans_elem).
Preparation patch to move this to a flexiable array to store
elem references.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/netfilter/nf_tables_api.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a24fe62650a7..2f85f4a10c50 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6409,7 +6409,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
@@ -7471,13 +7471,11 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_KERNEL);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (!trans)
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
- nft_trans_elem_set(trans) = set;
nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH nf-next v2 2/5] netfilter: nf_tables: add nft_trans_commit_list_add_elem helper
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
2024-10-11 0:32 ` [PATCH nf-next v2 1/5] netfilter: nf_tables: prefer nft_trans_elem_alloc helper Florian Westphal
@ 2024-10-11 0:33 ` Florian Westphal
2024-10-11 0:33 ` [PATCH nf-next v2 3/5] netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure Florian Westphal
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2024-10-11 0:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Add and use a wrapper to append trans_elem structures to the
transaction log.
Unlike the existing helper, pass a gfp_t to indicate if sleeping
is allowed.
This will be used by a followup patch to realloc nft_trans_elem
structures after they gain a flexible array member to reduce
number of such container structures on the transaction list.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/netfilter/nf_tables_api.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2f85f4a10c50..8afcd24f9901 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -421,6 +421,17 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
+static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
+ gfp_t gfp)
+{
+ WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
+ trans->msg_type != NFT_MSG_DELSETELEM);
+
+ might_alloc(gfp);
+
+ nft_trans_commit_list_add_tail(net, trans);
+}
+
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -7183,7 +7194,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (update_flags) {
nft_trans_elem_priv(trans) = elem_priv;
nft_trans_elem_update_flags(trans) = update_flags;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
}
@@ -7207,7 +7218,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
err_set_full:
@@ -7424,7 +7435,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
fail_ops:
@@ -7460,7 +7471,7 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
}
@@ -7477,7 +7488,7 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH nf-next v2 3/5] netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
2024-10-11 0:32 ` [PATCH nf-next v2 1/5] netfilter: nf_tables: prefer nft_trans_elem_alloc helper Florian Westphal
2024-10-11 0:33 ` [PATCH nf-next v2 2/5] netfilter: nf_tables: add nft_trans_commit_list_add_elem helper Florian Westphal
@ 2024-10-11 0:33 ` Florian Westphal
2024-10-11 0:33 ` [PATCH nf-next v2 4/5] netfilter: nf_tables: switch trans_elem to real flex array Florian Westphal
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2024-10-11 0:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Add helpers to release the individual elements contained in the
trans_elem container structure.
No functional change intended.
Followup patch will add 'nelems' member and will turn 'priv' into
a flexible array.
These helpers can then loop over all elements.
Care needs to be taken to handle a mix of new elements and existing
elements that are being updated (e.g. timeout refresh).
Before this patch, NEWSETELEM transaction with update is released
early so nft_trans_set_elem_destroy() won't get called, so we need
to skip elements marked as update.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
v2: nft_trans_set_elem_destroy must skip entries that have update
flag set -- these elements were already in the table at start of
transaction and should not be removed.
include/net/netfilter/nf_tables.h | 21 ++-
net/netfilter/nf_tables_api.c | 221 +++++++++++++++++++++---------
2 files changed, 168 insertions(+), 74 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 91ae20cb7648..2a2631edab2b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1754,28 +1754,25 @@ enum nft_trans_elem_flags {
NFT_TRANS_UPD_EXPIRATION = (1 << 1),
};
-struct nft_trans_elem {
- struct nft_trans nft_trans;
- struct nft_set *set;
- struct nft_elem_priv *elem_priv;
+struct nft_trans_one_elem {
+ struct nft_elem_priv *priv;
u64 timeout;
u64 expiration;
u8 update_flags;
+};
+
+struct nft_trans_elem {
+ struct nft_trans nft_trans;
+ struct nft_set *set;
bool bound;
+ unsigned int nelems;
+ struct nft_trans_one_elem elems[] __counted_by(nelems);
};
#define nft_trans_container_elem(t) \
container_of(t, struct nft_trans_elem, nft_trans)
#define nft_trans_elem_set(trans) \
nft_trans_container_elem(trans)->set
-#define nft_trans_elem_priv(trans) \
- nft_trans_container_elem(trans)->elem_priv
-#define nft_trans_elem_update_flags(trans) \
- nft_trans_container_elem(trans)->update_flags
-#define nft_trans_elem_timeout(trans) \
- nft_trans_container_elem(trans)->timeout
-#define nft_trans_elem_expiration(trans) \
- nft_trans_container_elem(trans)->expiration
#define nft_trans_elem_set_bound(trans) \
nft_trans_container_elem(trans)->bound
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8afcd24f9901..a6bb8eafdcd4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6552,28 +6552,51 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set,
- const struct nft_elem_priv *elem_priv,
- bool destroy_expr)
+static void __nft_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- struct nft_ctx ctx = {
- .net = read_pnet(&set->net),
- .family = set->table->family,
- };
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
- nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+ nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem_priv);
}
+
+/* Drop references and destroy. Called from gc and dynset. */
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
+{
+ struct nft_ctx ctx = {
+ .net = read_pnet(&set->net),
+ .family = set->table->family,
+ };
+
+ __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr);
+}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+/* Drop references and destroy. Called from abort path. */
+static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (te->elems[i].update_flags)
+ continue;
+
+ __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
+ }
+}
+
/* Destroy element. References have been already dropped in the preparation
* path via nft_setelem_data_deactivate().
*/
@@ -6589,6 +6612,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem_priv);
}
+static void nft_trans_elems_destroy(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++)
+ nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv);
+}
+
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[])
{
@@ -6745,6 +6777,37 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
}
}
+static void nft_trans_elem_update(const struct nft_set *set,
+ const struct nft_trans_one_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+ if (elem->update_flags & NFT_TRANS_UPD_TIMEOUT)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, elem->timeout);
+
+ if (elem->update_flags & NFT_TRANS_UPD_EXPIRATION)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + elem->expiration);
+}
+
+static void nft_trans_elems_activate(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ const struct nft_trans_one_elem *elem = &te->elems[i];
+
+ if (elem->update_flags) {
+ nft_trans_elem_update(te->set, elem);
+ continue;
+ }
+
+ nft_setelem_activate(ctx->net, te->set, elem->priv);
+ nf_tables_setelem_notify(ctx, te->set, elem->priv,
+ NFT_MSG_NEWSETELEM);
+ }
+}
+
static int nft_setelem_catchall_deactivate(const struct net *net,
struct nft_set *set,
struct nft_set_elem *elem)
@@ -6827,6 +6890,24 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem_priv);
}
+static void nft_trans_elems_remove(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ nf_tables_setelem_notify(ctx, te->set,
+ te->elems[i].priv,
+ te->nft_trans.msg_type);
+
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) {
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
+ }
+ }
+}
+
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
@@ -7178,22 +7259,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
+ struct nft_trans_one_elem *update;
+
+ update = &nft_trans_container_elem(trans)->elems[0];
+
update_flags = 0;
if (timeout != nft_set_ext_timeout(ext2)->timeout) {
- nft_trans_elem_timeout(trans) = timeout;
+ update->timeout = timeout;
if (expiration == 0)
expiration = timeout;
update_flags |= NFT_TRANS_UPD_TIMEOUT;
}
if (expiration) {
- nft_trans_elem_expiration(trans) = expiration;
+ update->expiration = expiration;
update_flags |= NFT_TRANS_UPD_EXPIRATION;
}
if (update_flags) {
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_elem_update_flags(trans) = update_flags;
+ update->priv = elem_priv;
+ update->update_flags = update_flags;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
@@ -7217,7 +7302,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem_priv(trans) = elem.priv;
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
@@ -7355,6 +7440,50 @@ void nft_setelem_data_deactivate(const struct net *net,
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
+/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem.
+ * No notifications and no ndeact changes.
+ *
+ * Returns true if set had been added to (i.e., elements need to be removed again).
+ */
+static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ bool removed = false;
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (te->elems[i].update_flags)
+ continue;
+
+ if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ atomic_dec(&te->set->nelems);
+
+ removed = true;
+ }
+
+ return removed;
+}
+
+/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */
+static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) {
+ nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv);
+ nft_setelem_activate(ctx->net, te->set, te->elems[i].priv);
+ }
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ te->set->ndeact--;
+ }
+}
+
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -7434,7 +7563,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem_priv(trans) = elem.priv;
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
@@ -7461,7 +7590,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
return 0;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ struct_size_t(struct nft_trans_elem, elems, 1),
+ GFP_ATOMIC);
if (!trans)
return -ENOMEM;
@@ -7470,7 +7600,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
+ nft_trans_container_elem(trans)->nelems = 1;
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
@@ -7487,7 +7618,7 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
- nft_trans_elem_priv(trans) = elem_priv;
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
@@ -9666,9 +9797,7 @@ static void nft_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&ctx,
- nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans));
+ nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -10521,25 +10650,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- if (te->update_flags) {
- const struct nft_set_ext *ext =
- nft_set_elem_ext(te->set, te->elem_priv);
-
- if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
- te->timeout);
- }
- if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
- get_jiffies_64() + te->expiration);
- }
- } else {
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
+ nft_trans_elems_activate(&ctx, te);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10551,14 +10663,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- trans->msg_type);
- nft_setelem_remove(net, te->set, te->elem_priv);
- if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
- atomic_dec(&te->set->nelems);
- te->set->ndeact--;
- }
+ nft_trans_elems_remove(&ctx, te);
+
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10678,8 +10784,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
- nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans), true);
+ nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&ctx, nft_trans_obj(trans));
@@ -10836,18 +10941,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_update_flags(trans) ||
- nft_trans_elem_set_bound(trans)) {
+ if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
- if (!te->set->ops->abort ||
- nft_setelem_is_catchall(te->set, te->elem_priv))
- nft_setelem_remove(net, te->set, te->elem_priv);
-
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- atomic_dec(&te->set->nelems);
+ if (!nft_trans_elems_new_abort(&ctx, te)) {
+ nft_trans_destroy(trans);
+ break;
+ }
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@@ -10859,12 +10961,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- te->set->ndeact--;
+ nft_trans_elems_destroy_abort(&ctx, te);
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH nf-next v2 4/5] netfilter: nf_tables: switch trans_elem to real flex array
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
` (2 preceding siblings ...)
2024-10-11 0:33 ` [PATCH nf-next v2 3/5] netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure Florian Westphal
@ 2024-10-11 0:33 ` Florian Westphal
2024-10-11 0:33 ` [PATCH nf-next v2 5/5] netfilter: nf_tables: allocate element update information dynamically Florian Westphal
2024-10-12 17:24 ` [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Pablo Neira Ayuso
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2024-10-11 0:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
When queueing a set element add or removal operation to the transaction
log, check if the previous operation already asks for a the identical
operation on the same set.
If so, store the element reference in the preeeding operation.
This significantlty reduces memory consumption when many set add/delete
operations appear in a single transaction.
Example: 10k elements require 937kb of memory (10k allocations from
kmalloc-96 slab).
Assuming we can compact 4 elements in the same set, 468 kbytes
are needed (64 bytes for base struct, nft_trans_elemn, 32 bytes
for nft_trans_one_elem structure, so 2500 allocations from kmalloc-192
slab).
For large batch updates we can compact up to 62 elements
into one single nft_trans_elem structure (~65% mem reduction):
(64 bytes for base struct, nft_trans_elem, 32 byte for nft_trans_one_elem
struct).
We can halve size of nft_trans_one_elem struct by moving
timeout/expire/update_flags into a dynamically allocated structure,
this allows to store 124 elements in a 2k slab nft_trans_elem struct.
This is done in a followup patch.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/netfilter/nf_tables_api.c | 79 ++++++++++++++++++++++++++++++++++-
1 file changed, 77 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a6bb8eafdcd4..acd574986647 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -25,6 +25,7 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
#define NFT_SET_MAX_ANONLEN 16
+#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem))
unsigned int nf_tables_net_id __read_mostly;
@@ -391,6 +392,69 @@ static void nf_tables_unregister_hook(struct net *net,
return __nf_tables_unregister_hook(net, table, chain, false);
}
+static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b)
+{
+ return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS;
+}
+
+static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
+ struct nft_trans_elem *tail,
+ struct nft_trans_elem *trans,
+ gfp_t gfp)
+{
+ unsigned int nelems, old_nelems = tail->nelems;
+ struct nft_trans_elem *new_trans;
+
+ if (!nft_trans_collapse_set_elem_allowed(tail, trans))
+ return false;
+
+ if (WARN_ON_ONCE(trans->nelems != 1))
+ return false;
+
+ if (check_add_overflow(old_nelems, trans->nelems, &nelems))
+ return false;
+
+ /* krealloc might free tail which invalidates list pointers */
+ list_del_init(&tail->nft_trans.list);
+
+ new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
+ if (!new_trans) {
+ list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
+ return false;
+ }
+
+ INIT_LIST_HEAD(&new_trans->nft_trans.list);
+ new_trans->nelems = nelems;
+ new_trans->elems[old_nelems] = trans->elems[0];
+ list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list);
+
+ return true;
+}
+
+static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
+ struct nft_trans *trans, gfp_t gfp)
+{
+ struct nft_trans *tail;
+
+ if (list_empty(&nft_net->commit_list))
+ return false;
+
+ tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list);
+
+ if (tail->msg_type != trans->msg_type)
+ return false;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_collapse_set_elem(nft_net,
+ nft_trans_container_elem(tail),
+ nft_trans_container_elem(trans), gfp);
+ }
+
+ return false;
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -424,11 +488,18 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
gfp_t gfp)
{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
trans->msg_type != NFT_MSG_DELSETELEM);
might_alloc(gfp);
+ if (nft_trans_try_collapse(nft_net, trans, gfp)) {
+ kfree(trans);
+ return;
+ }
+
nft_trans_commit_list_add_tail(net, trans);
}
@@ -6424,13 +6495,17 @@ static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
+ struct nft_trans_elem *te;
struct nft_trans *trans;
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1));
if (trans == NULL)
return NULL;
- nft_trans_elem_set(trans) = set;
+ te = nft_trans_container_elem(trans);
+ te->nelems = 1;
+ te->set = set;
+
return trans;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH nf-next v2 5/5] netfilter: nf_tables: allocate element update information dynamically
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
` (3 preceding siblings ...)
2024-10-11 0:33 ` [PATCH nf-next v2 4/5] netfilter: nf_tables: switch trans_elem to real flex array Florian Westphal
@ 2024-10-11 0:33 ` Florian Westphal
2024-10-12 17:24 ` [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Pablo Neira Ayuso
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2024-10-11 0:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Move the timeout/expire/flag members from nft_trans_one_elem struct into
a dybamically allocated structure, only needed when timeout update was
requested.
This halves size of nft_trans_one_elem struct and allows to compact up to
124 elements in one transaction container rather than 62.
This halves memory requirements for a large flush or insert transaction,
where ->update remains NULL.
Care has to be taken to release the extra data in all spots, including
abort path.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
v2: nft_trans_set_elem_destroy must skip entries with NULL priv field,
those were update requests that have been handled already during
error unwinding.
include/net/netfilter/nf_tables.h | 10 +++--
net/netfilter/nf_tables_api.c | 61 ++++++++++++++++++++-----------
2 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2a2631edab2b..1caf142900fe 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1754,11 +1754,15 @@ enum nft_trans_elem_flags {
NFT_TRANS_UPD_EXPIRATION = (1 << 1),
};
-struct nft_trans_one_elem {
- struct nft_elem_priv *priv;
+struct nft_elem_update {
u64 timeout;
u64 expiration;
- u8 update_flags;
+ u8 flags;
+};
+
+struct nft_trans_one_elem {
+ struct nft_elem_priv *priv;
+ struct nft_elem_update *update;
};
struct nft_trans_elem {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index acd574986647..2c98005daf84 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6665,7 +6665,8 @@ static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_tra
int i;
for (i = 0; i < te->nelems; i++) {
- if (te->elems[i].update_flags)
+ /* skip update request, see nft_trans_elems_new_abort() */
+ if (!te->elems[i].priv)
continue;
__nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
@@ -6856,24 +6857,28 @@ static void nft_trans_elem_update(const struct nft_set *set,
const struct nft_trans_one_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_elem_update *update = elem->update;
- if (elem->update_flags & NFT_TRANS_UPD_TIMEOUT)
- WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, elem->timeout);
+ if (update->flags & NFT_TRANS_UPD_TIMEOUT)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout);
- if (elem->update_flags & NFT_TRANS_UPD_EXPIRATION)
- WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + elem->expiration);
+ if (update->flags & NFT_TRANS_UPD_EXPIRATION)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration);
}
static void nft_trans_elems_activate(const struct nft_ctx *ctx,
- const struct nft_trans_elem *te)
+ struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++) {
- const struct nft_trans_one_elem *elem = &te->elems[i];
+ struct nft_trans_one_elem *elem = &te->elems[i];
- if (elem->update_flags) {
+ if (elem->update) {
nft_trans_elem_update(te->set, elem);
+ kfree(elem->update);
+ elem->update = NULL;
+ elem->priv = NULL;
continue;
}
@@ -6971,6 +6976,8 @@ static void nft_trans_elems_remove(const struct nft_ctx *ctx,
int i;
for (i = 0; i < te->nelems; i++) {
+ WARN_ON_ONCE(te->elems[i].update);
+
nf_tables_setelem_notify(ctx, te->set,
te->elems[i].priv,
te->nft_trans.msg_type);
@@ -7019,7 +7026,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@@ -7334,26 +7340,32 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
- struct nft_trans_one_elem *update;
-
- update = &nft_trans_container_elem(trans)->elems[0];
+ struct nft_elem_update update = { };
- update_flags = 0;
if (timeout != nft_set_ext_timeout(ext2)->timeout) {
- update->timeout = timeout;
+ update.timeout = timeout;
if (expiration == 0)
expiration = timeout;
- update_flags |= NFT_TRANS_UPD_TIMEOUT;
+ update.flags |= NFT_TRANS_UPD_TIMEOUT;
}
if (expiration) {
- update->expiration = expiration;
- update_flags |= NFT_TRANS_UPD_EXPIRATION;
+ update.expiration = expiration;
+ update.flags |= NFT_TRANS_UPD_EXPIRATION;
}
- if (update_flags) {
- update->priv = elem_priv;
- update->update_flags = update_flags;
+ if (update.flags) {
+ struct nft_trans_one_elem *ue;
+
+ ue = &nft_trans_container_elem(trans)->elems[0];
+
+ ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL);
+ if (!ue->update) {
+ err = -ENOMEM;
+ goto err_element_clash;
+ }
+
+ ue->priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
@@ -7521,14 +7533,19 @@ void nft_setelem_data_deactivate(const struct net *net,
* Returns true if set had been added to (i.e., elements need to be removed again).
*/
static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
- const struct nft_trans_elem *te)
+ struct nft_trans_elem *te)
{
bool removed = false;
int i;
for (i = 0; i < te->nelems; i++) {
- if (te->elems[i].update_flags)
+ if (te->elems[i].update) {
+ kfree(te->elems[i].update);
+ te->elems[i].update = NULL;
+ /* Update request, so do not release this element */
+ te->elems[i].priv = NULL;
continue;
+ }
if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size
2024-10-11 0:32 [PATCH nf-next v2 0/5] netfilter: nf_tables: reduce set element transaction size Florian Westphal
` (4 preceding siblings ...)
2024-10-11 0:33 ` [PATCH nf-next v2 5/5] netfilter: nf_tables: allocate element update information dynamically Florian Westphal
@ 2024-10-12 17:24 ` Pablo Neira Ayuso
5 siblings, 0 replies; 7+ messages in thread
From: Pablo Neira Ayuso @ 2024-10-12 17:24 UTC (permalink / raw)
To: Florian Westphal; +Cc: netfilter-devel
On Fri, Oct 11, 2024 at 02:32:58AM +0200, Florian Westphal wrote:
> v2: only change is in patch 3, and by extension, the last one:
> During transaction abort, we need to handle an aggregate container to
> contain both new set elements and updates. The latter must be
> skipped, else we remove element that already existed at start of the
> transaction.
>
> original cover letter:
>
> When doing a flush on a set or mass adding/removing elements from a
> set, each element needs to allocate 96 bytes to hold the transactional
> state.
>
> In such cases, virtually all the information in struct nft_trans_elem
> is the same.
>
> Change nft_trans_elem to a flex-array, i.e. a single nft_trans_elem
> can hold multiple set element pointers.
>
> The number of elements that can be stored in one nft_trans_elem is limited
> by the slab allocator, this series limits the compaction to at most 62
> elements as it caps the reallocation to 2048 bytes of memory.
Applied to nf-next, thanks Florian
^ permalink raw reply [flat|nested] 7+ messages in thread