From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Florian Westphal <fw@strlen.de>
Cc: netfilter-devel@vger.kernel.org, syzkaller-bugs@googlegroups.com,
syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com
Subject: Re: [PATCH nf] netfilter: nf_tables: make destruction work queue pernet
Date: Wed, 5 Mar 2025 23:08:38 +0100 [thread overview]
Message-ID: <Z8jLZv6asBnqrniC@calendula> (raw)
In-Reply-To: <20250304115706.2566960-1-fw@strlen.de>
Hi Florian,
On Tue, Mar 04, 2025 at 12:55:53PM +0100, Florian Westphal wrote:
> The call to flush_work before tearing down a table from the netlink
> notifier was supposed to make sure that all earlier updates (e.g. rule
> add) that might reference that table have been processed.
>
> Unfortunately, flush_work() waits for the last queued instance.
> This could be an instance that is different from the one that we must
> wait for.
>
> This is because transactions are protected with a pernet mutex, but the
> work item is global, so holding the transaction mutex doesn't prevent
> another netns from queueing more work.
>
> Make the work item pernet so that flush_work() will wait for all
> transactions queued from this netns.
>
> A welcome side effect is that we no longer need to wait for transaction
> objects from other network namespaces.
>
> The gc work queue is still global. This seems to be ok because nft_set
> structures are reference counted and each container structure owns a
> reference on the net namespace.
>
> The destroy_list is still protected by a global spinlock rather than
> pernet one but the hold time is very short anyway.
A few questions below.
> Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier")
> Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=5d8c5789c8cb076b2c25
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> include/net/netfilter/nf_tables.h | 4 +++-
> net/netfilter/nf_tables_api.c | 25 +++++++++++++++----------
> net/netfilter/nft_compat.c | 8 ++++----
> 3 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> index 60d5dcdb289c..803d5f1601f9 100644
> --- a/include/net/netfilter/nf_tables.h
> +++ b/include/net/netfilter/nf_tables.h
> @@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void);
> void __init nft_chain_route_init(void);
> void nft_chain_route_fini(void);
>
> -void nf_tables_trans_destroy_flush_work(void);
> +void nf_tables_trans_destroy_flush_work(struct net *net);
>
> int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
> __be64 nf_jiffies64_to_msecs(u64 input);
> @@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
> struct nftables_pernet {
> struct list_head tables;
> struct list_head commit_list;
> + struct list_head destroy_list;
> struct list_head commit_set_list;
> struct list_head binding_list;
> struct list_head module_list;
> @@ -1915,6 +1916,7 @@ struct nftables_pernet {
> unsigned int base_seq;
> unsigned int gc_seq;
> u8 validate_state;
> + struct work_struct destroy_work;
> };
>
> extern unsigned int nf_tables_net_id;
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index a34de9c17cf1..adf8b2b37fc3 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly;
> static LIST_HEAD(nf_tables_expressions);
> static LIST_HEAD(nf_tables_objects);
> static LIST_HEAD(nf_tables_flowtables);
> -static LIST_HEAD(nf_tables_destroy_list);
> static LIST_HEAD(nf_tables_gc_list);
> static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
> static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
> @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
> table->validate_state = new_validate_state;
> }
> static void nf_tables_trans_destroy_work(struct work_struct *w);
> -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
>
> static void nft_trans_gc_work(struct work_struct *work);
> static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
> @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans)
>
> static void nf_tables_trans_destroy_work(struct work_struct *w)
> {
> + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
> struct nft_trans *trans, *next;
> LIST_HEAD(head);
>
> spin_lock(&nf_tables_destroy_list_lock);
> - list_splice_init(&nf_tables_destroy_list, &head);
> + list_splice_init(&nft_net->destroy_list, &head);
> spin_unlock(&nf_tables_destroy_list_lock);
>
> if (list_empty(&head))
> @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
> }
> }
>
> -void nf_tables_trans_destroy_flush_work(void)
> +void nf_tables_trans_destroy_flush_work(struct net *net)
> {
> - flush_work(&trans_destroy_work);
> + struct nftables_pernet *nft_net = nft_pernet(net);
> +
> + flush_work(&nft_net->destroy_work);
> }
> EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
>
> @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net)
>
> trans->put_net = true;
> spin_lock(&nf_tables_destroy_list_lock);
> - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
> + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
> spin_unlock(&nf_tables_destroy_list_lock);
>
> nf_tables_module_autoload_cleanup(net);
> - schedule_work(&trans_destroy_work);
> + schedule_work(&nft_net->destroy_work);
>
> mutex_unlock(&nft_net->commit_mutex);
> }
> @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
>
> gc_seq = nft_gc_seq_begin(nft_net);
>
> - nf_tables_trans_destroy_flush_work();
> + nf_tables_trans_destroy_flush_work(net);
> again:
> list_for_each_entry(table, &nft_net->tables, list) {
> if (nft_table_has_owner(table) &&
> @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net)
>
> INIT_LIST_HEAD(&nft_net->tables);
> INIT_LIST_HEAD(&nft_net->commit_list);
> + INIT_LIST_HEAD(&nft_net->destroy_list);
> INIT_LIST_HEAD(&nft_net->commit_set_list);
> INIT_LIST_HEAD(&nft_net->binding_list);
> INIT_LIST_HEAD(&nft_net->module_list);
> @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net)
> nft_net->base_seq = 1;
> nft_net->gc_seq = 0;
> nft_net->validate_state = NFT_VALIDATE_SKIP;
> + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
>
> return 0;
> }
> @@ -11936,9 +11939,13 @@ static void __net_exit nf_tables_exit_net(struct net *net)
> nft_gc_seq_end(nft_net, gc_seq);
>
> mutex_unlock(&nft_net->commit_mutex);
> +
> + cancel_work_sync(&nft_net->destroy_work);
__nft_release_tables() is called in this nf_tables_exit_net()
function, cancel_work_sync() needs to be called before it?
> +
> WARN_ON_ONCE(!list_empty(&nft_net->tables));
> WARN_ON_ONCE(!list_empty(&nft_net->module_list));
> WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
> + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
> }
>
> static void nf_tables_exit_batch(struct list_head *net_exit_list)
> @@ -12029,10 +12036,8 @@ static void __exit nf_tables_module_exit(void)
> unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
> nft_chain_filter_fini();
> nft_chain_route_fini();
> - nf_tables_trans_destroy_flush_work();
My understanding is that this is not required anymore because of the
new cancel_work_sync() in the exit_net() path?
> unregister_pernet_subsys(&nf_tables_net_ops);
> cancel_work_sync(&trans_gc_work);
> - cancel_work_sync(&trans_destroy_work);
> rcu_barrier();
> rhltable_destroy(&nft_objname_ht);
> nf_tables_core_module_exit();
> diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
> index 7ca4f0d21fe2..72711d62fddf 100644
> --- a/net/netfilter/nft_compat.c
> +++ b/net/netfilter/nft_compat.c
> @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
> return 0;
> }
>
> -static void nft_compat_wait_for_destructors(void)
> +static void nft_compat_wait_for_destructors(struct net *net)
> {
> /* xtables matches or targets can have side effects, e.g.
> * creation/destruction of /proc files.
> @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
> * work queue. If we have pending invocations we thus
> * need to wait for those to finish.
> */
> - nf_tables_trans_destroy_flush_work();
> + nf_tables_trans_destroy_flush_work(net);
> }
>
> static int
> @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
>
> nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
>
> - nft_compat_wait_for_destructors();
> + nft_compat_wait_for_destructors(ctx->net);
>
> ret = xt_check_target(&par, size, proto, inv);
> if (ret < 0) {
> @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
>
> nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
>
> - nft_compat_wait_for_destructors();
> + nft_compat_wait_for_destructors(ctx->net);
>
> return xt_check_match(&par, size, proto, inv);
> }
> --
> 2.48.1
>
>
next prev parent reply other threads:[~2025-03-05 22:08 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-04 11:55 [PATCH nf] netfilter: nf_tables: make destruction work queue pernet Florian Westphal
2025-03-05 22:08 ` Pablo Neira Ayuso [this message]
2025-03-06 2:50 ` Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z8jLZv6asBnqrniC@calendula \
--to=pablo@netfilter.org \
--cc=fw@strlen.de \
--cc=netfilter-devel@vger.kernel.org \
--cc=syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com \
--cc=syzkaller-bugs@googlegroups.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).