From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
pabeni@redhat.com, edumazet@google.com, fw@strlen.de
Subject: [PATCH net-next 02/19] netfilter: nft_set_rbtree: prefer sync gc to async worker
Date: Wed, 25 Oct 2023 23:25:38 +0200 [thread overview]
Message-ID: <20231025212555.132775-3-pablo@netfilter.org> (raw)
In-Reply-To: <20231025212555.132775-1-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
There is no need for asynchronous garbage collection, rbtree inserts
can only happen from the netlink control plane.
We already perform on-demand gc on insertion, in the area of the
tree where the insertion takes place, but we don't do a full tree
walk there for performance reasons.
Do a full gc walk at the end of the transaction instead and
remove the async worker.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nft_set_rbtree.c | 124 +++++++++++++++++----------------
1 file changed, 65 insertions(+), 59 deletions(-)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d59be2bc6e6c..7d1004f9e7d2 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,7 +19,7 @@ struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
seqcount_rwlock_t count;
- struct delayed_work gc_work;
+ unsigned long last_gc;
};
struct nft_rbtree_elem {
@@ -48,8 +48,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
{
- return nft_set_elem_expired(&rbe->ext) ||
- nft_set_elem_is_dead(&rbe->ext);
+ return nft_set_elem_expired(&rbe->ext);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -508,6 +507,15 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
+static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
+{
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ rb_erase(&rbe->node, &priv->root);
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+}
+
static void nft_rbtree_remove(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
@@ -515,11 +523,7 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- rb_erase(&rbe->node, &priv->root);
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ nft_rbtree_erase(priv, rbe);
}
static void nft_rbtree_activate(const struct net *net,
@@ -613,45 +617,40 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
read_unlock_bh(&priv->lock);
}
-static void nft_rbtree_gc(struct work_struct *work)
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ nft_rbtree_erase(priv, rbe);
+}
+
+static void nft_rbtree_gc(struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct nftables_pernet *nft_net;
- struct nft_rbtree *priv;
+ struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct rb_node *node;
- struct nft_set *set;
- unsigned int gc_seq;
struct net *net;
- priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
nft_net = nft_pernet(net);
- gc_seq = READ_ONCE(nft_net->gc_seq);
- if (nft_set_gc_is_pending(set))
- goto done;
-
- gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
- goto done;
-
- read_lock_bh(&priv->lock);
- for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ return;
- /* Ruleset has been updated, try later. */
- if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
+ for (node = rb_first(&priv->root); node ; node = next) {
+ next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_is_dead(&rbe->ext))
- goto dead_elem;
-
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
* always visited before the start element.
@@ -663,37 +662,34 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- nft_set_elem_dead(&rbe->ext);
-
- if (!rbe_end)
- continue;
-
- nft_set_elem_dead(&rbe_end->ext);
-
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
- nft_trans_gc_elem_add(gc, rbe_end);
- rbe_end = NULL;
-dead_elem:
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ /* end element needs to be removed first, it has
+ * no timeout extension.
+ */
+ if (rbe_end) {
+ nft_rbtree_gc_remove(net, set, priv, rbe_end);
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+ }
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
+ nft_rbtree_gc_remove(net, set, priv, rbe);
nft_trans_gc_elem_add(gc, rbe);
}
- gc = nft_trans_gc_catchall_async(gc, gc_seq);
-
try_later:
- read_unlock_bh(&priv->lock);
- if (gc)
- nft_trans_gc_queue_async_done(gc);
-done:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
+ if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -712,11 +708,6 @@ static int nft_rbtree_init(const struct nft_set *set,
seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-
return 0;
}
@@ -727,8 +718,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -754,6 +743,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
+static void nft_rbtree_commit(struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ nft_rbtree_gc(set);
+}
+
+static void nft_rbtree_gc_init(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->last_gc = jiffies;
+}
+
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -767,6 +771,8 @@ const struct nft_set_type nft_set_rbtree_type = {
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
+ .commit = nft_rbtree_commit,
+ .gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
--
2.30.2
next prev parent reply other threads:[~2023-10-25 21:26 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-25 21:25 [PATCH net-next 00/19] Netfilter updates for net-next Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 01/19] netfilter: nft_set_rbtree: rename gc deactivate+erase function Pablo Neira Ayuso
2023-10-26 13:30 ` patchwork-bot+netdevbpf
2023-10-25 21:25 ` Pablo Neira Ayuso [this message]
2023-11-03 17:34 ` [PATCH net-next 02/19] netfilter: nft_set_rbtree: prefer sync gc to async worker Simon Horman
2023-11-03 17:55 ` Florian Westphal
2023-10-25 21:25 ` [PATCH net-next 03/19] netfilter: nf_tables: Open-code audit log call in nf_tables_getrule() Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 04/19] netfilter: nf_tables: Introduce nf_tables_getrule_single() Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 05/19] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 06/19] br_netfilter: use single forward hook for ip and arp Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 07/19] netfilter: conntrack: switch connlabels to atomic_t Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 08/19] netfilter: nf_tables: Drop pointless memset in nf_tables_dump_obj Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 09/19] netfilter: nf_tables: Unconditionally allocate nft_obj_filter Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 10/19] netfilter: nf_tables: A better name for nft_obj_filter Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 11/19] netfilter: nf_tables: Carry s_idx in nft_obj_dump_ctx Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 12/19] netfilter: nf_tables: nft_obj_filter fits into cb->ctx Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 13/19] netfilter: nf_tables: Carry reset boolean in nft_obj_dump_ctx Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 14/19] netfilter: nft_set_pipapo: no need to call pipapo_deactivate() from flush Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 15/19] netfilter: nf_tables: set backend .flush always succeeds Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 16/19] netfilter: nf_tables: expose opaque set element as struct nft_elem_priv Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 17/19] netfilter: nf_tables: shrink memory consumption of set elements Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 18/19] netfilter: nf_tables: set->ops->insert returns opaque set element in case of EEXIST Pablo Neira Ayuso
2023-10-25 21:25 ` [PATCH net-next 19/19] netfilter: nf_tables: Carry reset boolean in nft_set_dump_ctx Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231025212555.132775-3-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).