From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Florian Westphal <fw@strlen.de>
Cc: netfilter-devel@vger.kernel.org, Sven Auhagen <Sven.Auhagen@belden.com>
Subject: Re: [PATCH nf-next v2 1/2] netfilter: nf_tables: allow iter callbacks to sleep
Date: Thu, 28 Aug 2025 16:30:09 +0200 [thread overview]
Message-ID: <aLBn8Q3hgcqCvk4D@calendula> (raw)
In-Reply-To: <20250822081542.27261-2-fw@strlen.de>
Hi Florian,
On Fri, Aug 22, 2025 at 10:15:37AM +0200, Florian Westphal wrote:
[...]
> diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
> index 266d0c637225..cc302543c2e4 100644
> --- a/net/netfilter/nft_set_hash.c
> +++ b/net/netfilter/nft_set_hash.c
> @@ -30,6 +30,7 @@ struct nft_rhash {
> struct nft_rhash_elem {
> struct nft_elem_priv priv;
> struct rhash_head node;
> + struct llist_node walk_node;
> u32 wq_gc_seq;
> struct nft_set_ext ext;
> };
> @@ -144,6 +145,7 @@ nft_rhash_update(struct nft_set *set, const u32 *key,
> goto err1;
>
> he = nft_elem_priv_cast(elem_priv);
> + init_llist_node(&he->walk_node);
> prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
> nft_rhash_params);
> if (IS_ERR(prev))
> @@ -180,6 +182,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
> };
> struct nft_rhash_elem *prev;
>
> + init_llist_node(&he->walk_node);
> prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
> nft_rhash_params);
> if (IS_ERR(prev))
> @@ -261,12 +264,12 @@ static bool nft_rhash_delete(const struct nft_set *set,
> return true;
> }
>
> -static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
> - struct nft_set_iter *iter)
> +static void nft_rhash_walk_ro(const struct nft_ctx *ctx, struct nft_set *set,
> + struct nft_set_iter *iter)
> {
> struct nft_rhash *priv = nft_set_priv(set);
> - struct nft_rhash_elem *he;
> struct rhashtable_iter hti;
> + struct nft_rhash_elem *he;
>
> rhashtable_walk_enter(&priv->ht, &hti);
> rhashtable_walk_start(&hti);
> @@ -295,6 +298,99 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
> rhashtable_walk_exit(&hti);
> }
>
> +static void nft_rhash_walk_update(const struct nft_ctx *ctx,
> + struct nft_set *set,
> + struct nft_set_iter *iter)
> +{
> + struct nft_rhash *priv = nft_set_priv(set);
> + struct nft_rhash_elem *he, *tmp;
> + struct llist_node *first_node;
> + struct rhashtable_iter hti;
> + LLIST_HEAD(walk_list);
> +
> + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
> +
> + if (set->in_update_walk) {
> + /* This can happen with bogus rulesets during ruleset validation
> + * when a verdict map causes a jump back to the same map.
> + *
> + * Without this extra check the walk_next loop below will see
> + * elems on the callers walk_list and skip (not validate) them.
> + */
> + iter->err = -EMLINK;
> + return;
> + }
> +
> + /* walk happens under RCU.
> + *
> + * We create a snapshot list so ->iter callback can sleep.
> + * commit_mutex is held, elements can ...
> + * .. be added in parallel from dataplane (dynset)
> + * .. be marked as dead in parallel from dataplane (dynset).
> + * .. be queued for removal in parallel (gc timeout).
> + * .. not be freed: transaction mutex is held.
> + */
> + rhashtable_walk_enter(&priv->ht, &hti);
> + rhashtable_walk_start(&hti);
> +
> + while ((he = rhashtable_walk_next(&hti))) {
> + if (IS_ERR(he)) {
> + if (PTR_ERR(he) != -EAGAIN) {
> + iter->err = PTR_ERR(he);
> + break;
> + }
> +
> + continue;
> + }
> +
> + /* rhashtable resized during walk, skip */
> + if (llist_on_list(&he->walk_node))
> + continue;
> +
> + if (iter->count < iter->skip) {
> + iter->count++;
> + continue;
> + }
Not causing any harm, but is iter->count useful for this
NFT_ITER_UPDATE variant?
I think iter->count is only used for netlink dumps, to resume from the
last netlink message.
> + llist_add(&he->walk_node, &walk_list);
> + }
> + rhashtable_walk_stop(&hti);
> + rhashtable_walk_exit(&hti);
> +
> + first_node = __llist_del_all(&walk_list);
> + set->in_update_walk = true;
> + llist_for_each_entry_safe(he, tmp, first_node, walk_node) {
> + if (iter->err == 0) {
> + iter->err = iter->fn(ctx, set, iter, &he->priv);
> + if (iter->err == 0)
> + iter->count++;
> + }
> +
> + /* all entries must be cleared again, else next ->walk iteration
> + * will skip entries.
> + */
> + init_llist_node(&he->walk_node);
> + }
> + set->in_update_walk = false;
> +}
> +
> +static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
> + struct nft_set_iter *iter)
> +{
> + switch (iter->type) {
> + case NFT_ITER_UPDATE:
> + nft_rhash_walk_update(ctx, set, iter);
> + break;
> + case NFT_ITER_READ:
> + nft_rhash_walk_ro(ctx, set, iter);
> + break;
> + default:
> + iter->err = -EINVAL;
> + WARN_ON_ONCE(1);
> + break;
> + }
> +}
> +
> static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
> struct nft_set_ext *ext)
> {
next prev parent reply other threads:[~2025-08-28 14:30 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-22 8:15 [PATCH nf-next v2 0/2] netfilter: nf_tables: avoid atomic allocations for set flush Florian Westphal
2025-08-22 8:15 ` [PATCH nf-next v2 1/2] netfilter: nf_tables: allow iter callbacks to sleep Florian Westphal
2025-08-28 14:30 ` Pablo Neira Ayuso [this message]
2025-08-28 15:10 ` Florian Westphal
2025-08-28 23:46 ` Florian Westphal
2025-08-22 8:15 ` [PATCH nf-next v2 2/2] netfilter: nf_tables: all transaction allocations can now sleep Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aLBn8Q3hgcqCvk4D@calendula \
--to=pablo@netfilter.org \
--cc=Sven.Auhagen@belden.com \
--cc=fw@strlen.de \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.