From: Zhang Tengfei <zhtfdev@gmail.com>
To: Simon Horman <horms@verge.net.au>, Julian Anastasov <ja@ssi.bg>,
lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
Pablo Neira Ayuso <pablo@netfilter.org>,
Jozsef Kadlecsik <kadlec@netfilter.org>,
Florian Westphal <fw@strlen.de>
Cc: "David S . Miller" <davem@davemloft.net>,
David Ahern <dsahern@kernel.org>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
coreteam@netfilter.org, Zhang Tengfei <zhtfdev@gmail.com>,
syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Subject: [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
Date: Tue, 26 Aug 2025 21:31:04 +0800 [thread overview]
Message-ID: <20250826133104.212975-1-zhtfdev@gmail.com> (raw)
A data-race was detected by KCSAN between ip_vs_add_service() which
acts as a writer, and ip_vs_out_hook() which acts as a reader. This
can lead to unpredictable behavior and crashes. One observed symptom
is the "no destination available" error when processing packets.
The race occurs on the `enable` flag within the `netns_ipvs`
struct. This flag was being written in the configuration path without
any protection, while concurrently being read in the packet processing
path. This lack of synchronization means a reader on one CPU could see a
partially initialized service, leading to incorrect behavior.
To fix this, convert the `enable` flag from a plain integer to an
atomic_t. This ensures that all reads and writes to the flag are atomic.
More importantly, using atomic_set() and atomic_read() provides the
necessary memory barriers to guarantee that changes to other fields of
the service are visible to the reader CPU before the service is marked
as enabled.
Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
---
include/net/ip_vs.h | 2 +-
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
5 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29a36709e7f3..58b2ad7906e8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -895,7 +895,7 @@ struct ipvs_sync_daemon_cfg {
/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
- int enable; /* enable like nf_hooks do */
+ atomic_t enable; /* enable like nf_hooks do */
/* Hash table: for real service lookups */
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e5089..5c97f85929b4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
* conntrack cleanup for the net.
*/
smp_rmb();
- if (ipvs->enable)
+ if (atomic_read(&ipvs->enable))
ip_vs_conn_drop_conntrack(cp);
}
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
cond_resched_rcu();
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
break;
}
rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7308..84eed2e45927 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,7 +1353,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -1940,7 +1940,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2108,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
int r;
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
return NF_ACCEPT;
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2295,7 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
/* Hold the beast until a service is registered */
- ipvs->enable = 0;
+ atomic_set(&ipvs->enable, 0);
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2367,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
ip_vs_unregister_hooks(ipvs, AF_INET6);
- ipvs->enable = 0; /* Disable packet reception */
+ atomic_set(&ipvs->enable, 0); /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc4478533..ad7e1c387c1f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
goto unlock;
if (!kd)
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
- if (!ipvs->enable) {
+ if (!atomic_read(&ipvs->enable)) {
/* Now there is a service - full throttle */
- ipvs->enable = 1;
+ atomic_set(&ipvs->enable, 1);
/* Start estimation for first time */
ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b826732..c5aa2660de92 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
/* Ignore reloads before first service is added */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
int i;
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- ipvs->enable && ipvs->est_max_threads)
+ atomic_read(&ipvs->enable) && ipvs->est_max_threads)
return -EINVAL;
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
}
/* Start kthread tasks only when services are present */
- if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+ if (atomic_read(&ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
ret = ip_vs_est_kthread_start(ipvs, kd);
if (ret < 0)
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && ipvs->enable)
+ if (!ipvs->est_max_threads && atomic_read(&ipvs->enable))
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
/* Wait for cpufreq frequency transition */
wait_event_idle_timeout(wq, kthread_should_stop(),
HZ / 50);
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto stop;
}
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
rcu_read_unlock();
local_bh_enable();
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto stop;
cond_resched();
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
goto unlock2;
kd = ipvs->est_kt_arr[id];
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
id = ipvs->est_kt_count;
next_kt:
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto unlock;
id--;
if (id < 0)
--
2.47.3
next reply other threads:[~2025-08-26 13:31 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-26 13:31 Zhang Tengfei [this message]
2025-08-26 14:18 ` [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook Florian Westphal
2025-08-26 14:40 ` Eric Dumazet
2025-08-26 15:56 ` Julian Anastasov
2025-08-27 6:48 ` Julian Anastasov
2025-08-27 14:43 ` Zhang Tengfei
2025-08-27 21:37 ` Pablo Neira Ayuso
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
2025-08-27 22:51 ` Zhang Tengfei
2025-08-31 13:01 ` Julian Anastasov
2025-09-01 13:46 ` [PATCH v3 nf-next] ipvs: " Zhang Tengfei
2025-09-03 17:31 ` Julian Anastasov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250826133104.212975-1-zhtfdev@gmail.com \
--to=zhtfdev@gmail.com \
--cc=coreteam@netfilter.org \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@verge.net.au \
--cc=ja@ssi.bg \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=lvs-devel@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
--cc=syzbot+1651b5234028c294c339@syzkaller.appspotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).