From: Florian Westphal <fw@strlen.de>
To: Jozsef Kadlecsik <kadlec@netfilter.org>
Cc: netfilter-devel@vger.kernel.org, Pablo Neira Ayuso <pablo@netfilter.org>
Subject: Re: [PATCH 2/2] netfilter: ipset: Fix data race between add and dump in all hash types
Date: Wed, 15 Apr 2026 12:22:05 +0200 [thread overview]
Message-ID: <ad9mzYNk5JpDfklg@strlen.de> (raw)
In-Reply-To: <20260415082039.4133308-3-kadlec@netfilter.org>
Jozsef Kadlecsik <kadlec@netfilter.org> wrote:
> When adding a new entry to the next position in the existing hash bucket,
> the position index was incremented too early and parallel dump could
> read it before the entry was populated with the value. Move the setting
> of the position index after populating the entry.
>
> Reported-by: syzbot+786c889f046e8b003ca6@syzkaller.appspotmail.com
> Reported-by: syzbot+1da17e4b41d795df059e@syzkaller.appspotmail.com
> Reported-by: syzbot+421c5f3ff8e9493084d9@syzkaller.appspotmail.com
> Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
> ---
> net/netfilter/ipset/ip_set_hash_gen.h | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
> index b79e5dd2af03..0da02a8dfbae 100644
> --- a/net/netfilter/ipset/ip_set_hash_gen.h
> +++ b/net/netfilter/ipset/ip_set_hash_gen.h
> @@ -844,7 +844,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> const struct mtype_elem *d = value;
> struct mtype_elem *data;
> struct hbucket *n, *old = ERR_PTR(-ENOENT);
> - int i, j = -1, ret;
> + int i, j = -1, npos = 0, ret;
> bool flag_exist = flags & IPSET_FLAG_EXIST;
> bool deleted = false, forceadd = false, reuse = false;
> u32 r, key, multi = 0, elements, maxelem;
> @@ -889,6 +889,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> ext_size(AHASH_INIT_SIZE, set->dsize);
> goto copy_elem;
> }
> + npos = n->pos;
> for (i = 0; i < n->pos; i++) {
> if (!test_bit(i, n->used)) {
> /* Reuse first deleted entry */
> @@ -962,7 +963,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> }
>
> copy_elem:
> - j = n->pos++;
> + j = npos = n->pos + 1;
Hmm. Should that be:
+ j = npos;
+ npos = n->pos + 1;
As-is j is advanced by 1 compared to old code.
> data = ahash_data(n, j, set->dsize);
> copy_data:
> t->hregion[r].elements++;
> @@ -985,6 +986,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> if (SET_WITH_TIMEOUT(set))
> ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
> smp_mb__before_atomic();
> + n->pos = npos;
> set_bit(j, n->used);
I think this needs a followup-patch to switch this to smp_store_release
and readers to smp_load_acquire helpers.
Here is a diff for this (generated by LLM and only compile tested).
I think this can be a separate patch to not make this change too big and
to not mix different fixes.
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 0da02a8dfbae..2ca674e51699 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -682,10 +682,13 @@ mtype_resize(struct ip_set *set, bool retried)
rcu_read_lock_bh();
for (i = ahash_bucket_start(r, orig->htable_bits);
i < ahash_bucket_end(r, orig->htable_bits); i++) {
+ u8 pos;
+
n = __ipset_dereference(hbucket(orig, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -817,10 +820,13 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
for (i = ahash_bucket_start(r, t->htable_bits);
i < ahash_bucket_end(r, t->htable_bits); i++) {
+ u8 pos;
+
n = rcu_dereference_bh(hbucket(t, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, set->dsize);
@@ -963,7 +969,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
copy_elem:
- j = npos = n->pos + 1;
+ j = npos;
+ npos = n->pos + 1;
data = ahash_data(n, j, set->dsize);
copy_data:
t->hregion[r].elements++;
@@ -985,8 +992,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
/* Must come last for the case when timed out entry is reused */
if (SET_WITH_TIMEOUT(set))
ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
- smp_mb__before_atomic();
- n->pos = npos;
+ /* Ensure all data writes are visible before updating position */
+ smp_store_release(&n->pos, npos);
set_bit(j, n->used);
if (old != ERR_PTR(-ENOENT)) {
rcu_assign_pointer(hbucket(t, key), n);
@@ -1172,6 +1179,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
int ret, i, j = 0;
#endif
u32 key, multi = 0;
+ u8 pos;
pr_debug("test by nets\n");
for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
@@ -1189,7 +1197,8 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
n = rcu_dereference_bh(hbucket(t, key));
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1223,6 +1232,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct mtype_elem *data;
int i, ret = 0;
u32 key, multi = 0;
+ u8 pos;
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
@@ -1245,7 +1255,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ret = 0;
goto out;
}
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1373,6 +1384,8 @@ mtype_list(const struct ip_set *set,
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
+ u8 pos;
+
cond_resched_rcu();
incomplete = skb_tail_pointer(skb);
n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
@@ -1380,7 +1393,9 @@ mtype_list(const struct ip_set *set,
cb->args[IPSET_CB_ARG0], t, n);
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ /* Acquire ordering with smp_store_release in mtype_add */
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
next prev parent reply other threads:[~2026-04-15 10:22 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-15 8:20 [PATCH 0/2 v3] netfilter: ipset: concurrent add and dump fixes Jozsef Kadlecsik
2026-04-15 8:20 ` [PATCH 1/2] netfilter: ipset: Fix data race between add and list header in all hash types Jozsef Kadlecsik
2026-04-15 8:20 ` [PATCH 2/2] netfilter: ipset: Fix data race between add and dump " Jozsef Kadlecsik
2026-04-15 10:22 ` Florian Westphal [this message]
2026-04-15 10:33 ` Jozsef Kadlecsik
2026-04-20 13:11 ` Jozsef Kadlecsik
2026-04-20 14:03 ` Florian Westphal
-- strict thread matches above, loose matches on Subject: below --
2026-04-08 7:02 [PATCH 0/2] netfilter: ipset: concurrent add and dump fixes Jozsef Kadlecsik
2026-04-08 7:02 ` [PATCH 2/2] netfilter: ipset: Fix data race between add and dump in all hash types Jozsef Kadlecsik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ad9mzYNk5JpDfklg@strlen.de \
--to=fw@strlen.de \
--cc=kadlec@netfilter.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pablo@netfilter.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.