public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] bonding: annotate data-races around slave->last_rx
@ 2026-01-20 15:28 Eric Dumazet
  2026-01-22  2:22 ` [net] " Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2026-01-20 15:28 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jay Vosburgh, Andy Gospodarek, netdev, eric.dumazet, Eric Dumazet,
	syzbot

slave->last_rx and slave->target_last_arp_rx[...] can be read and written
locklessly. Add READ_ONCE() and WRITE_ONCE() annotations.

syzbot reported:

BUG: KCSAN: data-race in bond_rcv_validate / bond_rcv_validate

write to 0xffff888149f0d428 of 8 bytes by interrupt on cpu 1:
  bond_rcv_validate+0x202/0x7a0 drivers/net/bonding/bond_main.c:3335
  bond_handle_frame+0xde/0x5e0 drivers/net/bonding/bond_main.c:1533
  __netif_receive_skb_core+0x5b1/0x1950 net/core/dev.c:6039
  __netif_receive_skb_one_core net/core/dev.c:6150 [inline]
  __netif_receive_skb+0x59/0x270 net/core/dev.c:6265
  netif_receive_skb_internal net/core/dev.c:6351 [inline]
  netif_receive_skb+0x4b/0x2d0 net/core/dev.c:6410
...

write to 0xffff888149f0d428 of 8 bytes by interrupt on cpu 0:
  bond_rcv_validate+0x202/0x7a0 drivers/net/bonding/bond_main.c:3335
  bond_handle_frame+0xde/0x5e0 drivers/net/bonding/bond_main.c:1533
  __netif_receive_skb_core+0x5b1/0x1950 net/core/dev.c:6039
  __netif_receive_skb_one_core net/core/dev.c:6150 [inline]
  __netif_receive_skb+0x59/0x270 net/core/dev.c:6265
  netif_receive_skb_internal net/core/dev.c:6351 [inline]
  netif_receive_skb+0x4b/0x2d0 net/core/dev.c:6410
  br_netif_receive_skb net/bridge/br_input.c:30 [inline]
  NF_HOOK include/linux/netfilter.h:318 [inline]
...

value changed: 0x0000000100005365 -> 0x0000000100005366

Fixes: f5b2b966f032 ("[PATCH] bonding: Validate probe replies in ARP monitor")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
---
 drivers/net/bonding/bond_main.c    | 18 ++++++++++--------
 drivers/net/bonding/bond_options.c |  2 +-
 include/net/bonding.h              | 13 +++++++------
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0aca6c937297def91d5740dfd456800432b5e343..0ab400b870b4247cc64a698c2ad7961406cec82e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3047,8 +3047,8 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
 			   __func__, &sip);
 		return;
 	}
-	slave->last_rx = jiffies;
-	slave->target_last_arp_rx[i] = jiffies;
+	WRITE_ONCE(slave->last_rx, jiffies);
+	WRITE_ONCE(slave->target_last_arp_rx[i], jiffies);
 }
 
 static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
@@ -3267,8 +3267,8 @@ static void bond_validate_na(struct bonding *bond, struct slave *slave,
 			  __func__, saddr);
 		return;
 	}
-	slave->last_rx = jiffies;
-	slave->target_last_arp_rx[i] = jiffies;
+	WRITE_ONCE(slave->last_rx, jiffies);
+	WRITE_ONCE(slave->target_last_arp_rx[i], jiffies);
 }
 
 static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
@@ -3338,7 +3338,7 @@ int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond,
 		    (slave_do_arp_validate_only(bond) && is_ipv6) ||
 #endif
 		    !slave_do_arp_validate_only(bond))
-			slave->last_rx = jiffies;
+			WRITE_ONCE(slave->last_rx, jiffies);
 		return RX_HANDLER_ANOTHER;
 	} else if (is_arp) {
 		return bond_arp_rcv(skb, bond, slave);
@@ -3406,7 +3406,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 
 		if (slave->link != BOND_LINK_UP) {
 			if (bond_time_in_interval(bond, last_tx, 1) &&
-			    bond_time_in_interval(bond, slave->last_rx, 1)) {
+			    bond_time_in_interval(bond, READ_ONCE(slave->last_rx), 1)) {
 
 				bond_propose_link_state(slave, BOND_LINK_UP);
 				slave_state_changed = 1;
@@ -3430,8 +3430,10 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 			 * when the source ip is 0, so don't take the link down
 			 * if we don't know our ip yet
 			 */
-			if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
-			    !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
+			if (!bond_time_in_interval(bond, last_tx,
+						   bond->params.missed_max) ||
+			    !bond_time_in_interval(bond, READ_ONCE(slave->last_rx),
+						   bond->params.missed_max)) {
 
 				bond_propose_link_state(slave, BOND_LINK_DOWN);
 				slave_state_changed = 1;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 384499c869b8da9f036c43eb081095f1bf2141af..74708bd2570ef1d09e8faa596c6064ad11ffb3a8 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1152,7 +1152,7 @@ static void _bond_options_arp_ip_target_set(struct bonding *bond, int slot,
 
 	if (slot >= 0 && slot < BOND_MAX_ARP_TARGETS) {
 		bond_for_each_slave(bond, slave, iter)
-			slave->target_last_arp_rx[slot] = last_rx;
+			WRITE_ONCE(slave->target_last_arp_rx[slot], last_rx);
 		targets[slot] = target;
 	}
 }
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 49edc7da05867f355f880329817bdc2a371f226b..46207840355709ee28d771c803b8ee6ef8920538 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -521,13 +521,14 @@ static inline int bond_is_ip6_target_ok(struct in6_addr *addr)
 static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond,
 						       struct slave *slave)
 {
+	unsigned long tmp, ret = READ_ONCE(slave->target_last_arp_rx[0]);
 	int i = 1;
-	unsigned long ret = slave->target_last_arp_rx[0];
-
-	for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++)
-		if (time_before(slave->target_last_arp_rx[i], ret))
-			ret = slave->target_last_arp_rx[i];
 
+	for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) {
+		tmp = READ_ONCE(slave->target_last_arp_rx[i]);
+		if (time_before(tmp, ret))
+			ret = tmp;
+	}
 	return ret;
 }
 
@@ -537,7 +538,7 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
 	if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL)
 		return slave_oldest_target_arp_rx(bond, slave);
 
-	return slave->last_rx;
+	return READ_ONCE(slave->last_rx);
 }
 
 static inline void slave_update_last_tx(struct slave *slave)
-- 
2.52.0.457.g6b5491de43-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-20 15:28 [PATCH net] bonding: annotate data-races around slave->last_rx Eric Dumazet
@ 2026-01-22  2:22 ` Jakub Kicinski
  2026-01-22  4:42   ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2026-01-22  2:22 UTC (permalink / raw)
  To: edumazet
  Cc: Jakub Kicinski, netdev, syzkaller, andy, eric.dumazet, pabeni,
	j.vosburgh, davem

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
bonding: annotate data-races around slave->last_rx

This patch adds READ_ONCE()/WRITE_ONCE() annotations to slave->last_rx
and slave->target_last_arp_rx[] to fix KCSAN-reported data races in the
bonding driver's ARP monitoring code.

> diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
> index 384499c869b8..74708bd2570e 100644
> --- a/drivers/net/bonding/bond_options.c
> +++ b/drivers/net/bonding/bond_options.c
> @@ -1152,7 +1152,7 @@ static void _bond_options_arp_ip_target_set(struct bonding *bond, int slot,
>
>  	if (slot >= 0 && slot < BOND_MAX_ARP_TARGETS) {
>  		bond_for_each_slave(bond, slave, iter)
> -			slave->target_last_arp_rx[slot] = last_rx;
> +			WRITE_ONCE(slave->target_last_arp_rx[slot], last_rx);
>  		targets[slot] = target;
>  	}
>  }

Should the IPv6 NS version of this function also get WRITE_ONCE?
_bond_options_ns_ip6_target_set() writes to the same target_last_arp_rx[]
array:

    static void _bond_options_ns_ip6_target_set(...)
    {
        ...
        if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
            bond_for_each_slave(bond, slave, iter) {
                slave->target_last_arp_rx[slot] = last_rx;  <-- no WRITE_ONCE
                ...
            }
        }
    }

This seems like the same concurrent access pattern that the ARP IPv4 path
needed fixing for.

[ ... ]
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22  2:22 ` [net] " Jakub Kicinski
@ 2026-01-22  4:42   ` Eric Dumazet
  2026-01-22 15:28     ` Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2026-01-22  4:42 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem

On Thu, Jan 22, 2026 at 3:22 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
>
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> bonding: annotate data-races around slave->last_rx
>
> This patch adds READ_ONCE()/WRITE_ONCE() annotations to slave->last_rx
> and slave->target_last_arp_rx[] to fix KCSAN-reported data races in the
> bonding driver's ARP monitoring code.
>
> > diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
> > index 384499c869b8..74708bd2570e 100644
> > --- a/drivers/net/bonding/bond_options.c
> > +++ b/drivers/net/bonding/bond_options.c
> > @@ -1152,7 +1152,7 @@ static void _bond_options_arp_ip_target_set(struct bonding *bond, int slot,
> >
> >       if (slot >= 0 && slot < BOND_MAX_ARP_TARGETS) {
> >               bond_for_each_slave(bond, slave, iter)
> > -                     slave->target_last_arp_rx[slot] = last_rx;
> > +                     WRITE_ONCE(slave->target_last_arp_rx[slot], last_rx);
> >               targets[slot] = target;
> >       }
> >  }
>
> Should the IPv6 NS version of this function also get WRITE_ONCE?
> _bond_options_ns_ip6_target_set() writes to the same target_last_arp_rx[]
> array:
>
>     static void _bond_options_ns_ip6_target_set(...)
>     {
>         ...
>         if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
>             bond_for_each_slave(bond, slave, iter) {
>                 slave->target_last_arp_rx[slot] = last_rx;  <-- no WRITE_ONCE
>                 ...
>             }
>         }
>     }
>
> This seems like the same concurrent access pattern that the ARP IPv4 path
> needed fixing for.

I saw this spot, but determined this was used at setup time, not in
packet processing.

I was maybe wrong.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22  4:42   ` Eric Dumazet
@ 2026-01-22 15:28     ` Jakub Kicinski
  2026-01-22 15:50       ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2026-01-22 15:28 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem

On Thu, 22 Jan 2026 05:42:55 +0100 Eric Dumazet wrote:
> > This seems like the same concurrent access pattern that the ARP IPv4 path
> > needed fixing for.  
> 
> I saw this spot, but determined this was used at setup time, not in
> packet processing.
> 
> I was maybe wrong.

I guess if nothing else we should do it for consistency?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22 15:28     ` Jakub Kicinski
@ 2026-01-22 15:50       ` Eric Dumazet
  2026-01-22 15:56         ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2026-01-22 15:50 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem

On Thu, Jan 22, 2026 at 4:29 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 22 Jan 2026 05:42:55 +0100 Eric Dumazet wrote:
> > > This seems like the same concurrent access pattern that the ARP IPv4 path
> > > needed fixing for.
> >
> > I saw this spot, but determined this was used at setup time, not in
> > packet processing.
> >
> > I was maybe wrong.
>
> I guess if nothing else we should do it for consistency?

To be clear, I will cook a V2 with this part.

However, I will not add WRITE_ONCE() in bond_enslave() :

    new_slave->last_rx = jiffies -

(msecs_to_jiffies(bond->params.arp_interval) + 1);
     for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
            new_slave->target_last_arp_rx[i] = new_slave->last_rx;

      new_slave->last_tx = new_slave->last_rx;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22 15:50       ` Eric Dumazet
@ 2026-01-22 15:56         ` Eric Dumazet
  2026-01-22 15:57           ` Eric Dumazet
  2026-01-22 16:58           ` Jakub Kicinski
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Dumazet @ 2026-01-22 15:56 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem

On Thu, Jan 22, 2026 at 4:50 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Jan 22, 2026 at 4:29 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > On Thu, 22 Jan 2026 05:42:55 +0100 Eric Dumazet wrote:
> > > > This seems like the same concurrent access pattern that the ARP IPv4 path
> > > > needed fixing for.
> > >
> > > I saw this spot, but determined this was used at setup time, not in
> > > packet processing.
> > >
> > > I was maybe wrong.
> >
> > I guess if nothing else we should do it for consistency?
>
> To be clear, I will cook a V2 with this part.
>

_bond_options_ns_ip6_target_set() probably needs a fix as well.

Is AI review stopping at the first error ?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22 15:56         ` Eric Dumazet
@ 2026-01-22 15:57           ` Eric Dumazet
  2026-01-22 16:58           ` Jakub Kicinski
  1 sibling, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2026-01-22 15:57 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem

On Thu, Jan 22, 2026 at 4:56 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Jan 22, 2026 at 4:50 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Thu, Jan 22, 2026 at 4:29 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > >
> > > On Thu, 22 Jan 2026 05:42:55 +0100 Eric Dumazet wrote:
> > > > > This seems like the same concurrent access pattern that the ARP IPv4 path
> > > > > needed fixing for.
> > > >
> > > > I saw this spot, but determined this was used at setup time, not in
> > > > packet processing.
> > > >
> > > > I was maybe wrong.
> > >
> > > I guess if nothing else we should do it for consistency?
> >
> > To be clear, I will cook a V2 with this part.
> >
>
> _bond_options_ns_ip6_target_set() probably needs a fix as well.

Sorry, copy paste error : I meant : bond_option_arp_ip_target_rem()

>
> Is AI review stopping at the first error ?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22 15:56         ` Eric Dumazet
  2026-01-22 15:57           ` Eric Dumazet
@ 2026-01-22 16:58           ` Jakub Kicinski
  2026-01-22 17:42             ` Chris Mason
  1 sibling, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2026-01-22 16:58 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem,
	clm

On Thu, 22 Jan 2026 16:56:33 +0100 Eric Dumazet wrote:
> On Thu, Jan 22, 2026 at 4:50 PM Eric Dumazet <edumazet@google.com> wrote:
> > On Thu, Jan 22, 2026 at 4:29 PM Jakub Kicinski <kuba@kernel.org> wrote:  
> > > I guess if nothing else we should do it for consistency?  
> >
> > To be clear, I will cook a V2 with this part.
> >  
> 
> _bond_options_ns_ip6_target_set() probably needs a fix as well.
> 
> Is AI review stopping at the first error ?

I don't think it explicitly stops after the first occurrence but it
seems to have limited ability to dig around. Simplifying, it only digs
around for 5-10min, and it spends a lot of time proving that the
issue is real. So once it's spend time proving one issue it's often out
of time, and may not search for another one :(

Ccing Chris, hopefully he'll chime in later if I'm wrong.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [net] bonding: annotate data-races around slave->last_rx
  2026-01-22 16:58           ` Jakub Kicinski
@ 2026-01-22 17:42             ` Chris Mason
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Mason @ 2026-01-22 17:42 UTC (permalink / raw)
  To: Jakub Kicinski, Eric Dumazet
  Cc: netdev, syzkaller, andy, eric.dumazet, pabeni, j.vosburgh, davem



On 1/22/26 11:58 AM, Jakub Kicinski wrote:
> On Thu, 22 Jan 2026 16:56:33 +0100 Eric Dumazet wrote:
>> On Thu, Jan 22, 2026 at 4:50 PM Eric Dumazet <edumazet@google.com> wrote:
>>> On Thu, Jan 22, 2026 at 4:29 PM Jakub Kicinski <kuba@kernel.org> wrote:  
>>>> I guess if nothing else we should do it for consistency?  
>>>
>>> To be clear, I will cook a V2 with this part.
>>>  
>>
>> _bond_options_ns_ip6_target_set() probably needs a fix as well.
>>
>> Is AI review stopping at the first error ?
> 
> I don't think it explicitly stops after the first occurrence but it
> seems to have limited ability to dig around. Simplifying, it only digs
> around for 5-10min, and it spends a lot of time proving that the
> issue is real. So once it's spend time proving one issue it's often out
> of time, and may not search for another one :(
> 
> Ccing Chris, hopefully he'll chime in later if I'm wrong.

It's supposed to keep hunting for bugs until it has reviewed the entire
thing, and then run all the bugs it found through the false positive pass.

But, sometimes it gets really excited and jumps out early, or as Jakub
mentions sometimes the context window fills and it starts skipping
steps.  I've tried different ways to force things, but unfortunately
none of them have worked 100% of the time.

-chris

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-01-22 17:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-20 15:28 [PATCH net] bonding: annotate data-races around slave->last_rx Eric Dumazet
2026-01-22  2:22 ` [net] " Jakub Kicinski
2026-01-22  4:42   ` Eric Dumazet
2026-01-22 15:28     ` Jakub Kicinski
2026-01-22 15:50       ` Eric Dumazet
2026-01-22 15:56         ` Eric Dumazet
2026-01-22 15:57           ` Eric Dumazet
2026-01-22 16:58           ` Jakub Kicinski
2026-01-22 17:42             ` Chris Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox