* [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
@ 2025-08-26 13:31 Zhang Tengfei
2025-08-26 14:18 ` Florian Westphal
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Zhang Tengfei @ 2025-08-26 13:31 UTC (permalink / raw)
To: Simon Horman, Julian Anastasov, lvs-devel, netfilter-devel,
Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal
Cc: David S . Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, coreteam, Zhang Tengfei, syzbot+1651b5234028c294c339
A data-race was detected by KCSAN between ip_vs_add_service() which
acts as a writer, and ip_vs_out_hook() which acts as a reader. This
can lead to unpredictable behavior and crashes. One observed symptom
is the "no destination available" error when processing packets.
The race occurs on the `enable` flag within the `netns_ipvs`
struct. This flag was being written in the configuration path without
any protection, while concurrently being read in the packet processing
path. This lack of synchronization means a reader on one CPU could see a
partially initialized service, leading to incorrect behavior.
To fix this, convert the `enable` flag from a plain integer to an
atomic_t. This ensures that all reads and writes to the flag are atomic.
More importantly, using atomic_set() and atomic_read() provides the
necessary memory barriers to guarantee that changes to other fields of
the service are visible to the reader CPU before the service is marked
as enabled.
Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
---
include/net/ip_vs.h | 2 +-
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
5 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29a36709e7f3..58b2ad7906e8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -895,7 +895,7 @@ struct ipvs_sync_daemon_cfg {
/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
- int enable; /* enable like nf_hooks do */
+ atomic_t enable; /* enable like nf_hooks do */
/* Hash table: for real service lookups */
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e5089..5c97f85929b4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
* conntrack cleanup for the net.
*/
smp_rmb();
- if (ipvs->enable)
+ if (atomic_read(&ipvs->enable))
ip_vs_conn_drop_conntrack(cp);
}
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
cond_resched_rcu();
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
break;
}
rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7308..84eed2e45927 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,7 +1353,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -1940,7 +1940,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2108,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
int r;
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
return NF_ACCEPT;
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2295,7 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
/* Hold the beast until a service is registered */
- ipvs->enable = 0;
+ atomic_set(&ipvs->enable, 0);
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2367,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
ip_vs_unregister_hooks(ipvs, AF_INET6);
- ipvs->enable = 0; /* Disable packet reception */
+ atomic_set(&ipvs->enable, 0); /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc4478533..ad7e1c387c1f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
goto unlock;
if (!kd)
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
- if (!ipvs->enable) {
+ if (!atomic_read(&ipvs->enable)) {
/* Now there is a service - full throttle */
- ipvs->enable = 1;
+ atomic_set(&ipvs->enable, 1);
/* Start estimation for first time */
ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b826732..c5aa2660de92 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
/* Ignore reloads before first service is added */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
int i;
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- ipvs->enable && ipvs->est_max_threads)
+ atomic_read(&ipvs->enable) && ipvs->est_max_threads)
return -EINVAL;
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
}
/* Start kthread tasks only when services are present */
- if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+ if (atomic_read(&ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
ret = ip_vs_est_kthread_start(ipvs, kd);
if (ret < 0)
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && ipvs->enable)
+ if (!ipvs->est_max_threads && atomic_read(&ipvs->enable))
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
/* Wait for cpufreq frequency transition */
wait_event_idle_timeout(wq, kthread_should_stop(),
HZ / 50);
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto stop;
}
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
rcu_read_unlock();
local_bh_enable();
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto stop;
cond_resched();
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!ipvs->enable)
+ if (!atomic_read(&ipvs->enable))
goto unlock2;
kd = ipvs->est_kt_arr[id];
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
id = ipvs->est_kt_count;
next_kt:
- if (!ipvs->enable || kthread_should_stop())
+ if (!atomic_read(&ipvs->enable) || kthread_should_stop())
goto unlock;
id--;
if (id < 0)
--
2.47.3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
2025-08-26 13:31 [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook Zhang Tengfei
@ 2025-08-26 14:18 ` Florian Westphal
2025-08-26 14:40 ` Eric Dumazet
2025-08-26 15:56 ` Julian Anastasov
2025-08-27 6:48 ` Julian Anastasov
2 siblings, 1 reply; 12+ messages in thread
From: Florian Westphal @ 2025-08-26 14:18 UTC (permalink / raw)
To: Zhang Tengfei
Cc: Simon Horman, Julian Anastasov, lvs-devel, netfilter-devel,
Pablo Neira Ayuso, Jozsef Kadlecsik, David S . Miller,
David Ahern, Eric Dumazet, Jakub Kicinski, Paolo Abeni, coreteam,
syzbot+1651b5234028c294c339
Zhang Tengfei <zhtfdev@gmail.com> wrote:
> A data-race was detected by KCSAN between ip_vs_add_service() which
> acts as a writer, and ip_vs_out_hook() which acts as a reader. This
> can lead to unpredictable behavior and crashes.
Really? How can this cause a crash?
> The race occurs on the `enable` flag within the `netns_ipvs`
> struct. This flag was being written in the configuration path without
> any protection, while concurrently being read in the packet processing
> path. This lack of synchronization means a reader on one CPU could see a
> partially initialized service, leading to incorrect behavior.
>
> To fix this, convert the `enable` flag from a plain integer to an
> atomic_t. This ensures that all reads and writes to the flag are atomic.
> More importantly, using atomic_set() and atomic_read() provides the
> necessary memory barriers to guarantee that changes to other fields of
> the service are visible to the reader CPU before the service is marked
> as enabled.
> - int enable; /* enable like nf_hooks do */
> + atomic_t enable; /* enable like nf_hooks do */
Julian, Simon, I will defer to your judgment but I dislike this,
because I see no reason for atomic_t. To me is seems better to use
READ/WRITE_ONCE as ->enable is only ever set but not modified
(increment for instance).
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
2025-08-26 14:18 ` Florian Westphal
@ 2025-08-26 14:40 ` Eric Dumazet
0 siblings, 0 replies; 12+ messages in thread
From: Eric Dumazet @ 2025-08-26 14:40 UTC (permalink / raw)
To: Florian Westphal
Cc: Zhang Tengfei, Simon Horman, Julian Anastasov, lvs-devel,
netfilter-devel, Pablo Neira Ayuso, Jozsef Kadlecsik,
David S . Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
coreteam, syzbot+1651b5234028c294c339
On Tue, Aug 26, 2025 at 7:18 AM Florian Westphal <fw@strlen.de> wrote:
>
> Zhang Tengfei <zhtfdev@gmail.com> wrote:
> > A data-race was detected by KCSAN between ip_vs_add_service() which
> > acts as a writer, and ip_vs_out_hook() which acts as a reader. This
> > can lead to unpredictable behavior and crashes.
>
> Really? How can this cause a crash?
KCSAN + panic_on_warn=1 : Only in debug environment
>
> > The race occurs on the `enable` flag within the `netns_ipvs`
> > struct. This flag was being written in the configuration path without
> > any protection, while concurrently being read in the packet processing
> > path. This lack of synchronization means a reader on one CPU could see a
> > partially initialized service, leading to incorrect behavior.
> >
> > To fix this, convert the `enable` flag from a plain integer to an
> > atomic_t. This ensures that all reads and writes to the flag are atomic.
> > More importantly, using atomic_set() and atomic_read() provides the
> > necessary memory barriers to guarantee that changes to other fields of
> > the service are visible to the reader CPU before the service is marked
> > as enabled.
>
> > - int enable; /* enable like nf_hooks do */
> > + atomic_t enable; /* enable like nf_hooks do */
>
> Julian, Simon, I will defer to your judgment but I dislike this,
> because I see no reason for atomic_t. To me is seems better to use
> READ/WRITE_ONCE as ->enable is only ever set but not modified
> (increment for instance).
+2
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
2025-08-26 13:31 [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook Zhang Tengfei
2025-08-26 14:18 ` Florian Westphal
@ 2025-08-26 15:56 ` Julian Anastasov
2025-08-27 6:48 ` Julian Anastasov
2 siblings, 0 replies; 12+ messages in thread
From: Julian Anastasov @ 2025-08-26 15:56 UTC (permalink / raw)
To: Zhang Tengfei
Cc: Simon Horman, lvs-devel, netfilter-devel, Pablo Neira Ayuso,
Jozsef Kadlecsik, Florian Westphal, David S . Miller, David Ahern,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, coreteam,
syzbot+1651b5234028c294c339
Hello,
On Tue, 26 Aug 2025, Zhang Tengfei wrote:
> A data-race was detected by KCSAN between ip_vs_add_service() which
> acts as a writer, and ip_vs_out_hook() which acts as a reader. This
> can lead to unpredictable behavior and crashes. One observed symptom
> is the "no destination available" error when processing packets.
You can not solve the "no destination available" race
in IPVS itself. When service is added in ip_vs_add_service()
things happen in some order:
- hooks are registered (if first service)
- service is registered
- enable flag is set (if first service)
All this is part of single IP_VS_SO_SET_ADD call.
You can reorder the above actions as you wish but without any
gain because the dests (real servers) are added with a
following IP_VS_SO_SET_ADDDEST call. There will be always a
gap between the both calls where packets can hit service
without any dests, with enable=0 or 1.
One can stop the traffic with firewall rules until
all IPVS rules are added. It is decided by user space tools
when to route the traffic via IPVS (after or during
configuration).
>
> The race occurs on the `enable` flag within the `netns_ipvs`
> struct. This flag was being written in the configuration path without
> any protection, while concurrently being read in the packet processing
> path. This lack of synchronization means a reader on one CPU could see a
> partially initialized service, leading to incorrect behavior.
No, service is added with hlist_add_head_rcu() in
ip_vs_svc_hash() which commits all writes before adding svc
to list. This is also an answer for the concerns in your
paragraph below.
If "partially initialized" refers to the missing
dests, then see above, they are added later with
IP_VS_SO_SET_ADDDEST.
> To fix this, convert the `enable` flag from a plain integer to an
> atomic_t. This ensures that all reads and writes to the flag are atomic.
> More importantly, using atomic_set() and atomic_read() provides the
> necessary memory barriers to guarantee that changes to other fields of
> the service are visible to the reader CPU before the service is marked
> as enabled.
We use RCU for proper svc registration.
> Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
> ---
> include/net/ip_vs.h | 2 +-
> net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
> net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
> net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
> net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
> 5 files changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 29a36709e7f3..58b2ad7906e8 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -895,7 +895,7 @@ struct ipvs_sync_daemon_cfg {
> /* IPVS in network namespace */
> struct netns_ipvs {
> int gen; /* Generation */
> - int enable; /* enable like nf_hooks do */
> + atomic_t enable; /* enable like nf_hooks do */
> /* Hash table: for real service lookups */
> #define IP_VS_RTAB_BITS 4
> #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 965f3c8e5089..5c97f85929b4 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
> * conntrack cleanup for the net.
> */
> smp_rmb();
> - if (ipvs->enable)
> + if (atomic_read(&ipvs->enable))
This place is a valid user of this flag.
This is one of the reasons we should keep such flag.
> ip_vs_conn_drop_conntrack(cp);
> }
>
> @@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
> cond_resched_rcu();
>
> /* netns clean up started, abort delayed work */
> - if (!ipvs->enable)
> + if (!atomic_read(&ipvs->enable))
> break;
> }
> rcu_read_unlock();
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index c7a8a08b7308..84eed2e45927 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1353,7 +1353,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
> if (unlikely(!skb_dst(skb)))
> return NF_ACCEPT;
>
> - if (!ipvs->enable)
The checks in the hooks are not needed anymore after
commit 857ca89711de ("ipvs: register hooks only with services")
because the hooks are registered when the first service is
added.
So, you can not see enable=0 in hooks anymore which
was possible before first service was added. Or it is
possible for small time between adding the hooks and setting
the flag but it is irrelevant because there are no dests yet.
> + if (!atomic_read(&ipvs->enable))
> return NF_ACCEPT;
>
> ip_vs_fill_iph_skb(af, skb, false, &iph);
> @@ -1940,7 +1940,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
> return NF_ACCEPT;
> }
> /* ipvs enabled in this netns ? */
> - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> + if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
> return NF_ACCEPT;
>
> ip_vs_fill_iph_skb(af, skb, false, &iph);
> @@ -2108,7 +2108,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
> int r;
>
> /* ipvs enabled in this netns ? */
> - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> + if (unlikely(sysctl_backup_only(ipvs) || !atomic_read(&ipvs->enable)))
> return NF_ACCEPT;
>
> if (state->pf == NFPROTO_IPV4) {
> @@ -2295,7 +2295,7 @@ static int __net_init __ip_vs_init(struct net *net)
> return -ENOMEM;
>
> /* Hold the beast until a service is registered */
> - ipvs->enable = 0;
> + atomic_set(&ipvs->enable, 0);
> ipvs->net = net;
> /* Counters used for creating unique names */
> ipvs->gen = atomic_read(&ipvs_netns_cnt);
> @@ -2367,7 +2367,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
> ipvs = net_ipvs(net);
> ip_vs_unregister_hooks(ipvs, AF_INET);
> ip_vs_unregister_hooks(ipvs, AF_INET6);
No new/flying packets after this point...
> - ipvs->enable = 0; /* Disable packet reception */
It was an old method to disable reception but now
the flag helps for other purposes.
> + atomic_set(&ipvs->enable, 0); /* Disable packet reception */
> smp_wmb();
> ip_vs_sync_net_cleanup(ipvs);
> }
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 6a6fc4478533..ad7e1c387c1f 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
> struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
>
> /* netns clean up started, abort delayed work */
> - if (!ipvs->enable)
Such checks in slow path just speedup the cleanup.
> + if (!atomic_read(&ipvs->enable))
> goto unlock;
> if (!kd)
> continue;
> @@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
>
> *svc_p = svc;
>
> - if (!ipvs->enable) {
> + if (!atomic_read(&ipvs->enable)) {
> /* Now there is a service - full throttle */
> - ipvs->enable = 1;
> + atomic_set(&ipvs->enable, 1);
>
> /* Start estimation for first time */
> ip_vs_est_reload_start(ipvs);
> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
> index 15049b826732..c5aa2660de92 100644
> --- a/net/netfilter/ipvs/ip_vs_est.c
> +++ b/net/netfilter/ipvs/ip_vs_est.c
> @@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
> void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
> {
> /* Ignore reloads before first service is added */
> - if (!ipvs->enable)
> + if (!atomic_read(&ipvs->enable))
> return;
> ip_vs_est_stopped_recalc(ipvs);
> /* Bump the kthread configuration genid */
> @@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
> int i;
>
> if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
> - ipvs->enable && ipvs->est_max_threads)
> + atomic_read(&ipvs->enable) && ipvs->est_max_threads)
> return -EINVAL;
>
> mutex_lock(&ipvs->est_mutex);
> @@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
> }
>
> /* Start kthread tasks only when services are present */
> - if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
> + if (atomic_read(&ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
> ret = ip_vs_est_kthread_start(ipvs, kd);
> if (ret < 0)
> goto out;
> @@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
> struct ip_vs_estimator *est = &stats->est;
> int ret;
>
> - if (!ipvs->est_max_threads && ipvs->enable)
> + if (!ipvs->est_max_threads && atomic_read(&ipvs->enable))
> ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
>
> est->ktid = -1;
> @@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
> /* Wait for cpufreq frequency transition */
> wait_event_idle_timeout(wq, kthread_should_stop(),
> HZ / 50);
> - if (!ipvs->enable || kthread_should_stop())
> + if (!atomic_read(&ipvs->enable) || kthread_should_stop())
> goto stop;
> }
>
> @@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
> rcu_read_unlock();
> local_bh_enable();
>
> - if (!ipvs->enable || kthread_should_stop())
> + if (!atomic_read(&ipvs->enable) || kthread_should_stop())
> goto stop;
> cond_resched();
>
> @@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
> mutex_lock(&ipvs->est_mutex);
> for (id = 1; id < ipvs->est_kt_count; id++) {
> /* netns clean up started, abort */
> - if (!ipvs->enable)
> + if (!atomic_read(&ipvs->enable))
> goto unlock2;
> kd = ipvs->est_kt_arr[id];
> if (!kd)
> @@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
> id = ipvs->est_kt_count;
>
> next_kt:
> - if (!ipvs->enable || kthread_should_stop())
> + if (!atomic_read(&ipvs->enable) || kthread_should_stop())
> goto unlock;
> id--;
> if (id < 0)
> --
In summary, the checks in fast path (in hooks) are
useless/obsolete while the other checks in slow path do not need
atomic operations. Such races are normal to happen because
service and its dests are not added in same atomic transaction.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook
2025-08-26 13:31 [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook Zhang Tengfei
2025-08-26 14:18 ` Florian Westphal
2025-08-26 15:56 ` Julian Anastasov
@ 2025-08-27 6:48 ` Julian Anastasov
2025-08-27 14:43 ` Zhang Tengfei
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
2 siblings, 2 replies; 12+ messages in thread
From: Julian Anastasov @ 2025-08-27 6:48 UTC (permalink / raw)
To: Zhang Tengfei
Cc: Simon Horman, lvs-devel, netfilter-devel, Pablo Neira Ayuso,
Jozsef Kadlecsik, Florian Westphal, David S . Miller, David Ahern,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, coreteam,
syzbot+1651b5234028c294c339
Hello,
On Tue, 26 Aug 2025, Zhang Tengfei wrote:
> A data-race was detected by KCSAN between ip_vs_add_service() which
> acts as a writer, and ip_vs_out_hook() which acts as a reader. This
> can lead to unpredictable behavior and crashes. One observed symptom
> is the "no destination available" error when processing packets.
>
> The race occurs on the `enable` flag within the `netns_ipvs`
> struct. This flag was being written in the configuration path without
> any protection, while concurrently being read in the packet processing
> path. This lack of synchronization means a reader on one CPU could see a
> partially initialized service, leading to incorrect behavior.
>
> To fix this, convert the `enable` flag from a plain integer to an
> atomic_t. This ensures that all reads and writes to the flag are atomic.
> More importantly, using atomic_set() and atomic_read() provides the
> necessary memory barriers to guarantee that changes to other fields of
> the service are visible to the reader CPU before the service is marked
> as enabled.
>
> Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
> ---
> include/net/ip_vs.h | 2 +-
> net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
> net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
> net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
> net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
> 5 files changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
> index 15049b826732..c5aa2660de92 100644
> --- a/net/netfilter/ipvs/ip_vs_est.c
> +++ b/net/netfilter/ipvs/ip_vs_est.c
...
> @@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
> mutex_lock(&ipvs->est_mutex);
> for (id = 1; id < ipvs->est_kt_count; id++) {
> /* netns clean up started, abort */
> - if (!ipvs->enable)
> + if (!atomic_read(&ipvs->enable))
> goto unlock2;
It is a simple flag but as it is checked in loops
in a few places in ip_vs_est.c, lets use READ_ONCE/WRITE_ONCE as
suggested by Florian and Eric. The 3 checks in hooks in ip_vs_core.c
can be simply removed: in ip_vs_out_hook, ip_vs_in_hook and
ip_vs_forward_icmp. We can see enable=0 in rare cases which is
not fatal. It is a flying packet in two possible cases:
1. after hooks are registered but before the flag is set
2. after the hooks are unregistered on cleanup_net
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 12+ messages in thread
* (no subject)
2025-08-27 6:48 ` Julian Anastasov
@ 2025-08-27 14:43 ` Zhang Tengfei
2025-08-27 21:37 ` Pablo Neira Ayuso
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
1 sibling, 1 reply; 12+ messages in thread
From: Zhang Tengfei @ 2025-08-27 14:43 UTC (permalink / raw)
To: ja
Cc: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339, zhtfdev
Hi everyone,
Here is the v2 patch that incorporates the feedback.
Many thanks to Julian for his thorough review and for providing
the detailed plan for this new version, and thanks to Florian
and Eric for suggestions.
Subject: [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for
ipvs->enable
KCSAN reported a data-race on the `ipvs->enable` flag, which is
written in the control path and read concurrently from many other
contexts.
Following a suggestion by Julian, this patch fixes the race by
converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
This lightweight approach ensures atomic access and acts as a
compiler barrier, preventing unsafe optimizations where the flag
is checked in loops (e.g., in ip_vs_est.c).
Additionally, the now-obsolete `enable` checks in the fast path
hooks (`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`)
are removed. These are unnecessary since commit 857ca89711de
("ipvs: register hooks only with services").
Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Suggested-by: Julian Anastasov <ja@ssi.bg>
Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
---
v2:
- Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
- Removed obsolete checks from the packet processing hooks.
- Polished commit message based on feedback.
---
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
4 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e5..37ebb0cb6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
* conntrack cleanup for the net.
*/
smp_rmb();
- if (ipvs->enable)
+ if (READ_ONCE(ipvs->enable))
ip_vs_conn_drop_conntrack(cp);
}
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
cond_resched_rcu();
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
break;
}
rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7..5ea7ab8bf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- if (!ipvs->enable)
- return NF_ACCEPT;
-
ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
int r;
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
/* Hold the beast until a service is registered */
- ipvs->enable = 0;
+ WRITE_ONCE(ipvs->enable, 0);
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
ip_vs_unregister_hooks(ipvs, AF_INET6);
- ipvs->enable = 0; /* Disable packet reception */
+ WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc4478..4c8fa22be 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock;
if (!kd)
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
- if (!ipvs->enable) {
+ if (!READ_ONCE(ipvs->enable)) {
/* Now there is a service - full throttle */
- ipvs->enable = 1;
+ WRITE_ONCE(ipvs->enable, 1);
/* Start estimation for first time */
ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b826..93a925f1e 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
/* Ignore reloads before first service is added */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
int i;
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- ipvs->enable && ipvs->est_max_threads)
+ READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
return -EINVAL;
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
}
/* Start kthread tasks only when services are present */
- if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+ if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
ret = ip_vs_est_kthread_start(ipvs, kd);
if (ret < 0)
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && ipvs->enable)
+ if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
/* Wait for cpufreq frequency transition */
wait_event_idle_timeout(wq, kthread_should_stop(),
HZ / 50);
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
}
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
rcu_read_unlock();
local_bh_enable();
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
cond_resched();
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock2;
kd = ipvs->est_kt_arr[id];
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
id = ipvs->est_kt_count;
next_kt:
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto unlock;
id--;
if (id < 0)
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re:
2025-08-27 14:43 ` Zhang Tengfei
@ 2025-08-27 21:37 ` Pablo Neira Ayuso
0 siblings, 0 replies; 12+ messages in thread
From: Pablo Neira Ayuso @ 2025-08-27 21:37 UTC (permalink / raw)
To: Zhang Tengfei
Cc: ja, coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, syzbot+1651b5234028c294c339
On Wed, Aug 27, 2025 at 10:43:42PM +0800, Zhang Tengfei wrote:
> Hi everyone,
>
> Here is the v2 patch that incorporates the feedback.
Patch without subject will not fly too far, I'm afraid you will have
to resubmit. One more comment below.
> Many thanks to Julian for his thorough review and for providing
> the detailed plan for this new version, and thanks to Florian
> and Eric for suggestions.
>
> Subject: [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for
> ipvs->enable
>
> KCSAN reported a data-race on the `ipvs->enable` flag, which is
> written in the control path and read concurrently from many other
> contexts.
>
> Following a suggestion by Julian, this patch fixes the race by
> converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
> This lightweight approach ensures atomic access and acts as a
> compiler barrier, preventing unsafe optimizations where the flag
> is checked in loops (e.g., in ip_vs_est.c).
>
> Additionally, the now-obsolete `enable` checks in the fast path
> hooks (`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`)
> are removed. These are unnecessary since commit 857ca89711de
> ("ipvs: register hooks only with services").
>
> Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Suggested-by: Julian Anastasov <ja@ssi.bg>
> Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
> Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
>
> ---
> v2:
> - Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
> - Removed obsolete checks from the packet processing hooks.
> - Polished commit message based on feedback.
> ---
> net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
> net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
> net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
> net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
> 4 files changed, 17 insertions(+), 20 deletions(-)
[...]
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index c7a8a08b7..5ea7ab8bf 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
> if (unlikely(!skb_dst(skb)))
> return NF_ACCEPT;
>
> - if (!ipvs->enable)
> - return NF_ACCEPT;
Patch does say why is this going away? If you think this is not
necessary, then make a separated patch and example why this is needed?
Thanks
> ip_vs_fill_iph_skb(af, skb, false, &iph);
> #ifdef CONFIG_IP_VS_IPV6
> if (af == AF_INET6) {
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
2025-08-27 6:48 ` Julian Anastasov
2025-08-27 14:43 ` Zhang Tengfei
@ 2025-08-27 22:33 ` Zhang Tengfei
2025-08-27 22:51 ` Zhang Tengfei
2025-08-31 13:01 ` Julian Anastasov
1 sibling, 2 replies; 12+ messages in thread
From: Zhang Tengfei @ 2025-08-27 22:33 UTC (permalink / raw)
To: ja
Cc: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339, zhtfdev
KCSAN reported a data-race on the `ipvs->enable` flag, which is
written in the control path and read concurrently from many other
contexts.
Following a suggestion by Julian, this patch fixes the race by
converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
This lightweight approach ensures atomic access and acts as a
compiler barrier, preventing unsafe optimizations where the flag
is checked in loops (e.g., in ip_vs_est.c).
Additionally, the `enable` checks in the fast-path hooks
(`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
removed. They are considered unnecessary because the `enable=0`
condition they check for can only occur in two rare and non-fatal
scenarios: 1) after hooks are registered but before the flag is set,
and 2) after hooks are unregistered on cleanup_net. In the worst
case, a single packet might be mishandled (e.g., dropped), which
does not lead to a system crash or data corruption. Adding a check
in the performance-critical fast-path to handle this harmless
condition is not a worthwhile trade-off.
Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Suggested-by: Julian Anastasov <ja@ssi.bg>
Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
---
v2:
- Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
- Removed obsolete checks from the packet processing hooks.
- Polished commit message based on feedback from maintainers.
---
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
4 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e5..37ebb0cb6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
* conntrack cleanup for the net.
*/
smp_rmb();
- if (ipvs->enable)
+ if (READ_ONCE(ipvs->enable))
ip_vs_conn_drop_conntrack(cp);
}
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
cond_resched_rcu();
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
break;
}
rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7..5ea7ab8bf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- if (!ipvs->enable)
- return NF_ACCEPT;
-
ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
int r;
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
/* Hold the beast until a service is registered */
- ipvs->enable = 0;
+ WRITE_ONCE(ipvs->enable, 0);
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
ip_vs_unregister_hooks(ipvs, AF_INET6);
- ipvs->enable = 0; /* Disable packet reception */
+ WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc4478..4c8fa22be 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock;
if (!kd)
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
- if (!ipvs->enable) {
+ if (!READ_ONCE(ipvs->enable)) {
/* Now there is a service - full throttle */
- ipvs->enable = 1;
+ WRITE_ONCE(ipvs->enable, 1);
/* Start estimation for first time */
ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b826..93a925f1e 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
/* Ignore reloads before first service is added */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
int i;
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- ipvs->enable && ipvs->est_max_threads)
+ READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
return -EINVAL;
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
}
/* Start kthread tasks only when services are present */
- if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+ if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
ret = ip_vs_est_kthread_start(ipvs, kd);
if (ret < 0)
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && ipvs->enable)
+ if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
/* Wait for cpufreq frequency transition */
wait_event_idle_timeout(wq, kthread_should_stop(),
HZ / 50);
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
}
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
rcu_read_unlock();
local_bh_enable();
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
cond_resched();
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock2;
kd = ipvs->est_kt_arr[id];
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
id = ipvs->est_kt_count;
next_kt:
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto unlock;
id--;
if (id < 0)
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
@ 2025-08-27 22:51 ` Zhang Tengfei
2025-08-31 13:01 ` Julian Anastasov
1 sibling, 0 replies; 12+ messages in thread
From: Zhang Tengfei @ 2025-08-27 22:51 UTC (permalink / raw)
To: zhtfdev
Cc: coreteam, davem, dsahern, edumazet, fw, horms, ja, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339
Hi everyone,
My apologies for the noise and confusion with the multiple v2 submissions.
I sent a malformed reply earlier with no subject.
This patch in *this* thread is the correct and final version for review.
Please disregard the previous malformed email.
Many thanks to Pablo Neira Ayuso for pointing out the formatting error.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
2025-08-27 22:51 ` Zhang Tengfei
@ 2025-08-31 13:01 ` Julian Anastasov
2025-09-01 13:46 ` [PATCH v3 nf-next] ipvs: " Zhang Tengfei
1 sibling, 1 reply; 12+ messages in thread
From: Julian Anastasov @ 2025-08-31 13:01 UTC (permalink / raw)
To: Zhang Tengfei
Cc: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339
Hello,
On Thu, 28 Aug 2025, Zhang Tengfei wrote:
> KCSAN reported a data-race on the `ipvs->enable` flag, which is
> written in the control path and read concurrently from many other
> contexts.
>
> Following a suggestion by Julian, this patch fixes the race by
> converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
> This lightweight approach ensures atomic access and acts as a
> compiler barrier, preventing unsafe optimizations where the flag
> is checked in loops (e.g., in ip_vs_est.c).
>
> Additionally, the `enable` checks in the fast-path hooks
> (`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
> removed. They are considered unnecessary because the `enable=0`
It was good idea to mention about the 857ca89711de
commit here as in the previous v2 version. You can even add
it as Fixes tag as suggested here:
scripts/checkpatch.pl --strict /tmp/file.patch
As for the Subject line, you probably can use
[PATCH v3 nf-next] ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
to specify the desired target tree ('nf-next' or 'nf' if such
data-race needs it).
As for the patch code, it looks ok.
> condition they check for can only occur in two rare and non-fatal
> scenarios: 1) after hooks are registered but before the flag is set,
> and 2) after hooks are unregistered on cleanup_net. In the worst
> case, a single packet might be mishandled (e.g., dropped), which
> does not lead to a system crash or data corruption. Adding a check
> in the performance-critical fast-path to handle this harmless
> condition is not a worthwhile trade-off.
>
> Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Suggested-by: Julian Anastasov <ja@ssi.bg>
> Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
> Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
>
> ---
> v2:
> - Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
> - Removed obsolete checks from the packet processing hooks.
> - Polished commit message based on feedback from maintainers.
...
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 nf-next] ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
2025-08-31 13:01 ` Julian Anastasov
@ 2025-09-01 13:46 ` Zhang Tengfei
2025-09-03 17:31 ` Julian Anastasov
0 siblings, 1 reply; 12+ messages in thread
From: Zhang Tengfei @ 2025-09-01 13:46 UTC (permalink / raw)
To: ja
Cc: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339, zhtfdev
KCSAN reported a data-race on the `ipvs->enable` flag, which is
written in the control path and read concurrently from many other
contexts.
Following a suggestion by Julian, this patch fixes the race by
converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
This lightweight approach ensures atomic access and acts as a
compiler barrier, preventing unsafe optimizations where the flag
is checked in loops (e.g., in ip_vs_est.c).
Additionally, the `enable` checks in the fast-path hooks
(`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
removed. These are unnecessary since commit 857ca89711de
("ipvs: register hooks only with services"). The `enable=0`
condition they check for can only occur in two rare and non-fatal
scenarios: 1) after hooks are registered but before the flag is set,
and 2) after hooks are unregistered on cleanup_net. In the worst
case, a single packet might be mishandled (e.g., dropped), which
does not lead to a system crash or data corruption. Adding a check
in the performance-critical fast-path to handle this harmless
condition is not a worthwhile trade-off.
Fixes: 857ca89711de ("ipvs: register hooks only with services")
Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Suggested-by: Julian Anastasov <ja@ssi.bg>
Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
---
v3:
- Restore reference to commit 857ca89711de in commit message.
- Add corresponding Fixes tag.
v2:
- Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
- Removed obsolete checks from the packet processing hooks.
- Polished commit message based on feedback from maintainers.
---
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
4 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e508..37ebb0cb62b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
* conntrack cleanup for the net.
*/
smp_rmb();
- if (ipvs->enable)
+ if (READ_ONCE(ipvs->enable))
ip_vs_conn_drop_conntrack(cp);
}
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
cond_resched_rcu();
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
break;
}
rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b730..5ea7ab8bf4d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- if (!ipvs->enable)
- return NF_ACCEPT;
-
ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
int r;
/* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+ if (unlikely(sysctl_backup_only(ipvs)))
return NF_ACCEPT;
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
/* Hold the beast until a service is registered */
- ipvs->enable = 0;
+ WRITE_ONCE(ipvs->enable, 0);
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
ip_vs_unregister_hooks(ipvs, AF_INET6);
- ipvs->enable = 0; /* Disable packet reception */
+ WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc447853..4c8fa22be88 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
/* netns clean up started, abort delayed work */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock;
if (!kd)
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
- if (!ipvs->enable) {
+ if (!READ_ONCE(ipvs->enable)) {
/* Now there is a service - full throttle */
- ipvs->enable = 1;
+ WRITE_ONCE(ipvs->enable, 1);
/* Start estimation for first time */
ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b82673..93a925f1ed9 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
/* Ignore reloads before first service is added */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
int i;
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- ipvs->enable && ipvs->est_max_threads)
+ READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
return -EINVAL;
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
}
/* Start kthread tasks only when services are present */
- if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+ if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
ret = ip_vs_est_kthread_start(ipvs, kd);
if (ret < 0)
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && ipvs->enable)
+ if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
/* Wait for cpufreq frequency transition */
wait_event_idle_timeout(wq, kthread_should_stop(),
HZ / 50);
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
}
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
rcu_read_unlock();
local_bh_enable();
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto stop;
cond_resched();
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!ipvs->enable)
+ if (!READ_ONCE(ipvs->enable))
goto unlock2;
kd = ipvs->est_kt_arr[id];
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
id = ipvs->est_kt_count;
next_kt:
- if (!ipvs->enable || kthread_should_stop())
+ if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
goto unlock;
id--;
if (id < 0)
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 nf-next] ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
2025-09-01 13:46 ` [PATCH v3 nf-next] ipvs: " Zhang Tengfei
@ 2025-09-03 17:31 ` Julian Anastasov
0 siblings, 0 replies; 12+ messages in thread
From: Julian Anastasov @ 2025-09-03 17:31 UTC (permalink / raw)
To: Zhang Tengfei
Cc: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
lvs-devel, netfilter-devel, pabeni, pablo,
syzbot+1651b5234028c294c339
Hello,
On Mon, 1 Sep 2025, Zhang Tengfei wrote:
> KCSAN reported a data-race on the `ipvs->enable` flag, which is
> written in the control path and read concurrently from many other
> contexts.
>
> Following a suggestion by Julian, this patch fixes the race by
> converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
> This lightweight approach ensures atomic access and acts as a
> compiler barrier, preventing unsafe optimizations where the flag
> is checked in loops (e.g., in ip_vs_est.c).
>
> Additionally, the `enable` checks in the fast-path hooks
> (`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
> removed. These are unnecessary since commit 857ca89711de
> ("ipvs: register hooks only with services"). The `enable=0`
> condition they check for can only occur in two rare and non-fatal
> scenarios: 1) after hooks are registered but before the flag is set,
> and 2) after hooks are unregistered on cleanup_net. In the worst
> case, a single packet might be mishandled (e.g., dropped), which
> does not lead to a system crash or data corruption. Adding a check
> in the performance-critical fast-path to handle this harmless
> condition is not a worthwhile trade-off.
>
> Fixes: 857ca89711de ("ipvs: register hooks only with services")
> Reported-by: syzbot+1651b5234028c294c339@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Suggested-by: Julian Anastasov <ja@ssi.bg>
> Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/
> Signed-off-by: Zhang Tengfei <zhtfdev@gmail.com>
Looks good to me, thanks!
Acked-by: Julian Anastasov <ja@ssi.bg>
> ---
> v3:
> - Restore reference to commit 857ca89711de in commit message.
> - Add corresponding Fixes tag.
> v2:
> - Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
> - Removed obsolete checks from the packet processing hooks.
> - Polished commit message based on feedback from maintainers.
> ---
> net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
> net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
> net/netfilter/ipvs/ip_vs_ctl.c | 6 +++---
> net/netfilter/ipvs/ip_vs_est.c | 16 ++++++++--------
> 4 files changed, 17 insertions(+), 20 deletions(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 965f3c8e508..37ebb0cb62b 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
> * conntrack cleanup for the net.
> */
> smp_rmb();
> - if (ipvs->enable)
> + if (READ_ONCE(ipvs->enable))
> ip_vs_conn_drop_conntrack(cp);
> }
>
> @@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
> cond_resched_rcu();
>
> /* netns clean up started, abort delayed work */
> - if (!ipvs->enable)
> + if (!READ_ONCE(ipvs->enable))
> break;
> }
> rcu_read_unlock();
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index c7a8a08b730..5ea7ab8bf4d 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
> if (unlikely(!skb_dst(skb)))
> return NF_ACCEPT;
>
> - if (!ipvs->enable)
> - return NF_ACCEPT;
> -
> ip_vs_fill_iph_skb(af, skb, false, &iph);
> #ifdef CONFIG_IP_VS_IPV6
> if (af == AF_INET6) {
> @@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
> return NF_ACCEPT;
> }
> /* ipvs enabled in this netns ? */
> - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> + if (unlikely(sysctl_backup_only(ipvs)))
> return NF_ACCEPT;
>
> ip_vs_fill_iph_skb(af, skb, false, &iph);
> @@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
> int r;
>
> /* ipvs enabled in this netns ? */
> - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> + if (unlikely(sysctl_backup_only(ipvs)))
> return NF_ACCEPT;
>
> if (state->pf == NFPROTO_IPV4) {
> @@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
> return -ENOMEM;
>
> /* Hold the beast until a service is registered */
> - ipvs->enable = 0;
> + WRITE_ONCE(ipvs->enable, 0);
> ipvs->net = net;
> /* Counters used for creating unique names */
> ipvs->gen = atomic_read(&ipvs_netns_cnt);
> @@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
> ipvs = net_ipvs(net);
> ip_vs_unregister_hooks(ipvs, AF_INET);
> ip_vs_unregister_hooks(ipvs, AF_INET6);
> - ipvs->enable = 0; /* Disable packet reception */
> + WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
> smp_wmb();
> ip_vs_sync_net_cleanup(ipvs);
> }
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 6a6fc447853..4c8fa22be88 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
> struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
>
> /* netns clean up started, abort delayed work */
> - if (!ipvs->enable)
> + if (!READ_ONCE(ipvs->enable))
> goto unlock;
> if (!kd)
> continue;
> @@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
>
> *svc_p = svc;
>
> - if (!ipvs->enable) {
> + if (!READ_ONCE(ipvs->enable)) {
> /* Now there is a service - full throttle */
> - ipvs->enable = 1;
> + WRITE_ONCE(ipvs->enable, 1);
>
> /* Start estimation for first time */
> ip_vs_est_reload_start(ipvs);
> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
> index 15049b82673..93a925f1ed9 100644
> --- a/net/netfilter/ipvs/ip_vs_est.c
> +++ b/net/netfilter/ipvs/ip_vs_est.c
> @@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
> void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
> {
> /* Ignore reloads before first service is added */
> - if (!ipvs->enable)
> + if (!READ_ONCE(ipvs->enable))
> return;
> ip_vs_est_stopped_recalc(ipvs);
> /* Bump the kthread configuration genid */
> @@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
> int i;
>
> if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
> - ipvs->enable && ipvs->est_max_threads)
> + READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
> return -EINVAL;
>
> mutex_lock(&ipvs->est_mutex);
> @@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
> }
>
> /* Start kthread tasks only when services are present */
> - if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
> + if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
> ret = ip_vs_est_kthread_start(ipvs, kd);
> if (ret < 0)
> goto out;
> @@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
> struct ip_vs_estimator *est = &stats->est;
> int ret;
>
> - if (!ipvs->est_max_threads && ipvs->enable)
> + if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
> ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
>
> est->ktid = -1;
> @@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
> /* Wait for cpufreq frequency transition */
> wait_event_idle_timeout(wq, kthread_should_stop(),
> HZ / 50);
> - if (!ipvs->enable || kthread_should_stop())
> + if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
> goto stop;
> }
>
> @@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
> rcu_read_unlock();
> local_bh_enable();
>
> - if (!ipvs->enable || kthread_should_stop())
> + if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
> goto stop;
> cond_resched();
>
> @@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
> mutex_lock(&ipvs->est_mutex);
> for (id = 1; id < ipvs->est_kt_count; id++) {
> /* netns clean up started, abort */
> - if (!ipvs->enable)
> + if (!READ_ONCE(ipvs->enable))
> goto unlock2;
> kd = ipvs->est_kt_arr[id];
> if (!kd)
> @@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
> id = ipvs->est_kt_count;
>
> next_kt:
> - if (!ipvs->enable || kthread_should_stop())
> + if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
> goto unlock;
> id--;
> if (id < 0)
> --
> 2.34.1
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2025-09-03 17:31 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-26 13:31 [PATCH] net/netfilter/ipvs: Fix data-race in ip_vs_add_service / ip_vs_out_hook Zhang Tengfei
2025-08-26 14:18 ` Florian Westphal
2025-08-26 14:40 ` Eric Dumazet
2025-08-26 15:56 ` Julian Anastasov
2025-08-27 6:48 ` Julian Anastasov
2025-08-27 14:43 ` Zhang Tengfei
2025-08-27 21:37 ` Pablo Neira Ayuso
2025-08-27 22:33 ` [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable Zhang Tengfei
2025-08-27 22:51 ` Zhang Tengfei
2025-08-31 13:01 ` Julian Anastasov
2025-09-01 13:46 ` [PATCH v3 nf-next] ipvs: " Zhang Tengfei
2025-09-03 17:31 ` Julian Anastasov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).