From: Florian Westphal <fw@strlen.de>
To: Brian Witte <brianwitte@mailfence.com>
Cc: netfilter-devel@vger.kernel.org, pablo@netfilter.org,
kadlec@netfilter.org,
syzbot+ff16b505ec9152e5f448@syzkaller.appspotmail.com
Subject: Re: [PATCH v4 nf-next 2/2] netfilter: nf_tables: serialize reset with spinlock and atomic
Date: Tue, 3 Feb 2026 13:51:46 +0100 [thread overview]
Message-ID: <aYHvYiDKHVdOoSeg@strlen.de> (raw)
In-Reply-To: <20260203050723.263515-3-brianwitte@mailfence.com>
Brian Witte <brianwitte@mailfence.com> wrote:
> Add a dedicated spinlock to serialize counter reset operations,
> preventing concurrent dump-and-reset from underrunning values.
>
> Store struct net in counter priv to access the per-net spinlock during
> reset. This avoids dereferencing skb->sk which is NULL in single-element
> GET paths such as nft_get_set_elem.
Ouch, sorry about making a wrong suggestion. I did not consider that
this reset infra also works via plain netlink requests rather than just
for netlink dumps.
> For quota, use atomic64_xchg() to atomically read and zero the consumed
> value, which is simpler and doesn't require spinlock protection.
I think you should split this, one patch for quota and one nft_counter.
> Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
> Suggested-by: Florian Westphal <fw@strlen.de>
> Signed-off-by: Brian Witte <brianwitte@mailfence.com>
> ---
> include/net/netfilter/nf_tables.h | 1 +
> net/netfilter/nf_tables_api.c | 3 +--
> net/netfilter/nft_counter.c | 17 ++++++++++++-----
> net/netfilter/nft_quota.c | 12 +++++++-----
> 4 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> index 426534a711b0..c4b6b8cadf09 100644
> --- a/include/net/netfilter/nf_tables.h
> +++ b/include/net/netfilter/nf_tables.h
> @@ -1935,6 +1935,7 @@ struct nftables_pernet {
> struct list_head module_list;
> struct list_head notify_list;
> struct mutex commit_mutex;
> + spinlock_t reset_lock;
> u64 table_handle;
> u64 tstamp;
> unsigned int gc_seq;
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 9969d8488de4..146f29be834a 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -3986,7 +3986,6 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
> static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
> const struct nlattr * const nla[])
> {
> - struct nftables_pernet *nft_net = nft_pernet(info->net);
This is odd, shouldn't that be in patch 1?
I would expect compiler to warn here, when just compiling with
patch 1 applied.
> u32 portid = NETLINK_CB(skb).portid;
> struct net *net = info->net;
> struct sk_buff *skb2;
> @@ -8529,7 +8528,6 @@ nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
> static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
> const struct nlattr * const nla[])
> {
> - struct nftables_pernet *nft_net = nft_pernet(info->net);
Same.
> @@ -12050,6 +12048,7 @@ static int __net_init nf_tables_init_net(struct net *net)
> INIT_LIST_HEAD(&nft_net->module_list);
> INIT_LIST_HEAD(&nft_net->notify_list);
> mutex_init(&nft_net->commit_mutex);
> + spin_lock_init(&nft_net->reset_lock);
> net->nft.base_seq = 1;
> nft_net->gc_seq = 0;
> nft_net->validate_state = NFT_VALIDATE_SKIP;
> diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
> index cc7325329496..54bcbf33e2b9 100644
> --- a/net/netfilter/nft_counter.c
> +++ b/net/netfilter/nft_counter.c
> @@ -28,6 +28,7 @@ struct nft_counter_tot {
>
> struct nft_counter_percpu_priv {
> struct nft_counter __percpu *counter;
> + struct net *net;
> };
Hmm, I think thats too high a price.
> -static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
> +static void nft_counter_reset(struct nftables_pernet *nft_net,
> + struct nft_counter_percpu_priv *priv,
> struct nft_counter_tot *total)
> {
> struct u64_stats_sync *nft_sync;
> struct nft_counter *this_cpu;
>
> local_bh_disable();
> + spin_lock(&nft_net->reset_lock);
This lock is too late, it has to be taken before fetching 'total',
else you get following in parallel reset case:
cpu0 cpu1
fetch total counter, get 1000
fetch total counter, get 1000
subtract 1000
subtract 1000 again
Maybe something like this? I think the single-spinlock is fine, I
don't expect frequent resets, much less from concurrent netns.
If we get a complaint, we can always take the code-churn route to pass
'struct net' down to the dumper callback, or resort to array-of-locks
or similar at a later time.
+/* control plane only: sync fetch+reset */
+static DEFINE_SPINLOCK(nft_counter_lock);
+
static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -147,6 +151,14 @@ static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
total->packets += packets;
}
}
+static void nft_counter_fetch_and_reset(struct nft_counter_percpu_priv *priv,
+ struct nft_counter_tot *total)
+{
+ spin_lock(&nft_lock);
+ nft_counter_fetch(priv, total);
+ nft_counter_reset(priv, total);
+ spin_unlock(&nft_lock);
+}
static int nft_counter_do_dump(struct sk_buff *skb,
struct nft_counter_percpu_priv *priv,
@@ -154,7 +166,10 @@ static int nft_counter_do_dump(struct sk_buff *skb,
{
struct nft_counter_tot total;
- nft_counter_fetch(priv, &total);
+ if (unlikely(reset))
+ nft_counter_fetch_and_reset(priv, &total);
+ else
+ nft_counter_fetch(priv, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
NFTA_COUNTER_PAD) ||
@@ -162,9 +177,6 @@ static int nft_counter_do_dump(struct sk_buff *skb,
NFTA_COUNTER_PAD))
goto nla_put_failure;
- if (reset)
- nft_counter_reset(priv, &total);
-
return 0;
next prev parent reply other threads:[~2026-02-03 12:51 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-03 5:07 [PATCH v4 nf-next 0/2] netfilter: nf_tables: fix reset request deadlock Brian Witte
2026-02-03 5:07 ` [PATCH v4 nf-next 1/2] Revert nf_tables commit_mutex in reset path Brian Witte
2026-02-03 5:07 ` [PATCH v4 nf-next 2/2] netfilter: nf_tables: serialize reset with spinlock and atomic Brian Witte
2026-02-03 12:51 ` Florian Westphal [this message]
2026-02-03 22:19 ` Pablo Neira Ayuso
2026-02-04 17:58 ` Brian Witte
2026-02-04 18:08 ` Florian Westphal
2026-02-04 21:42 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aYHvYiDKHVdOoSeg@strlen.de \
--to=fw@strlen.de \
--cc=brianwitte@mailfence.com \
--cc=kadlec@netfilter.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pablo@netfilter.org \
--cc=syzbot+ff16b505ec9152e5f448@syzkaller.appspotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.