netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] team: replace team lock with rtnl lock
@ 2025-05-21 13:38 Tetsuo Handa
  2025-05-21 18:00 ` Jakub Kicinski
  2025-05-22 10:04 ` Jiri Pirko
  0 siblings, 2 replies; 9+ messages in thread
From: Tetsuo Handa @ 2025-05-21 13:38 UTC (permalink / raw)
  To: Network Development, Jiri Pirko, Jakub Kicinski
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Paolo Abeni,
	Hillf Danton, Stanislav Fomichev, Willem de Bruijn

syzbot is reporting locking order problem between wiphy and team.
As per Jiri Pirko's comment, let's check whether all callers are
already holding rtnl lock. This patch will help simplifying locking
dependency if all callers are already holding rtnl lock.

Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04
Suggested-by: Jiri Pirko <jiri@resnulli.us>
Link: https://lkml.kernel.org/r/ZoZ2RH9BcahEB9Sb@nanopsycho.orion
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 drivers/net/team/team_core.c              | 55 +++++++----------------
 drivers/net/team/team_mode_activebackup.c |  2 +-
 drivers/net/team/team_mode_loadbalance.c  | 11 ++---
 include/linux/if_team.h                   |  3 --
 4 files changed, 23 insertions(+), 48 deletions(-)

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index b75ceb90359f..e4e49f8e566f 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -933,7 +933,7 @@ static bool team_port_find(const struct team *team,
  * Enable/disable port by adding to enabled port hashlist and setting
  * port->index (Might be racy so reader could see incorrect ifindex when
  * processing a flying packet, but that is not a problem). Write guarded
- * by team->lock.
+ * by RTNL.
  */
 static void team_port_enable(struct team *team,
 			     struct team_port *port)
@@ -1660,8 +1660,6 @@ static int team_init(struct net_device *dev)
 		goto err_options_register;
 	netif_carrier_off(dev);
 
-	lockdep_register_key(&team->team_lock_key);
-	__mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key);
 	netdev_lockdep_set_classes(dev);
 
 	return 0;
@@ -1682,7 +1680,7 @@ static void team_uninit(struct net_device *dev)
 	struct team_port *port;
 	struct team_port *tmp;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	list_for_each_entry_safe(port, tmp, &team->port_list, list)
 		team_port_del(team, port->dev);
 
@@ -1691,9 +1689,7 @@ static void team_uninit(struct net_device *dev)
 	team_mcast_rejoin_fini(team);
 	team_notify_peers_fini(team);
 	team_queue_override_fini(team);
-	mutex_unlock(&team->lock);
 	netdev_change_features(dev);
-	lockdep_unregister_key(&team->team_lock_key);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -1778,7 +1774,7 @@ static void team_change_rx_flags(struct net_device *dev, int change)
 	struct team_port *port;
 	int inc;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	list_for_each_entry(port, &team->port_list, list) {
 		if (change & IFF_PROMISC) {
 			inc = dev->flags & IFF_PROMISC ? 1 : -1;
@@ -1789,7 +1785,6 @@ static void team_change_rx_flags(struct net_device *dev, int change)
 			dev_set_allmulti(port->dev, inc);
 		}
 	}
-	mutex_unlock(&team->lock);
 }
 
 static void team_set_rx_mode(struct net_device *dev)
@@ -1811,14 +1806,13 @@ static int team_set_mac_address(struct net_device *dev, void *p)
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
 
+	ASSERT_RTNL();
 	if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 	dev_addr_set(dev, addr->sa_data);
-	mutex_lock(&team->lock);
 	list_for_each_entry(port, &team->port_list, list)
 		if (team->ops.port_change_dev_addr)
 			team->ops.port_change_dev_addr(team, port);
-	mutex_unlock(&team->lock);
 	return 0;
 }
 
@@ -1829,10 +1823,10 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	int err;
 
 	/*
-	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * Alhough this is reader, it's guarded by RTNL. It's not possible
 	 * to traverse list in reverse under rcu_read_lock
 	 */
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	team->port_mtu_change_allowed = true;
 	list_for_each_entry(port, &team->port_list, list) {
 		err = dev_set_mtu(port->dev, new_mtu);
@@ -1843,7 +1837,6 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 		}
 	}
 	team->port_mtu_change_allowed = false;
-	mutex_unlock(&team->lock);
 
 	WRITE_ONCE(dev->mtu, new_mtu);
 
@@ -1853,7 +1846,6 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		dev_set_mtu(port->dev, dev->mtu);
 	team->port_mtu_change_allowed = false;
-	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -1904,23 +1896,21 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 	int err;
 
 	/*
-	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * Alhough this is reader, it's guarded by RTNL. It's not possible
 	 * to traverse list in reverse under rcu_read_lock
 	 */
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	list_for_each_entry(port, &team->port_list, list) {
 		err = vlan_vid_add(port->dev, proto, vid);
 		if (err)
 			goto unwind;
 	}
-	mutex_unlock(&team->lock);
 
 	return 0;
 
 unwind:
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		vlan_vid_del(port->dev, proto, vid);
-	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -1930,10 +1920,9 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	list_for_each_entry(port, &team->port_list, list)
 		vlan_vid_del(port->dev, proto, vid);
-	mutex_unlock(&team->lock);
 
 	return 0;
 }
@@ -1955,9 +1944,8 @@ static void team_netpoll_cleanup(struct net_device *dev)
 {
 	struct team *team = netdev_priv(dev);
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	__team_netpoll_cleanup(team);
-	mutex_unlock(&team->lock);
 }
 
 static int team_netpoll_setup(struct net_device *dev)
@@ -1966,7 +1954,7 @@ static int team_netpoll_setup(struct net_device *dev)
 	struct team_port *port;
 	int err = 0;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	list_for_each_entry(port, &team->port_list, list) {
 		err = __team_port_enable_netpoll(port);
 		if (err) {
@@ -1974,7 +1962,6 @@ static int team_netpoll_setup(struct net_device *dev)
 			break;
 		}
 	}
-	mutex_unlock(&team->lock);
 	return err;
 }
 #endif
@@ -1985,9 +1972,8 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	err = team_port_add(team, port_dev, extack);
-	mutex_unlock(&team->lock);
 
 	if (!err)
 		netdev_change_features(dev);
@@ -2000,18 +1986,12 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	err = team_port_del(team, port_dev);
-	mutex_unlock(&team->lock);
 
 	if (err)
 		return err;
 
-	if (netif_is_team_master(port_dev)) {
-		lockdep_unregister_key(&team->team_lock_key);
-		lockdep_register_key(&team->team_lock_key);
-		lockdep_set_class(&team->lock, &team->team_lock_key);
-	}
 	netdev_change_features(dev);
 
 	return err;
@@ -2308,6 +2288,7 @@ static struct team *team_nl_team_get(struct genl_info *info)
 	struct net_device *dev;
 	struct team *team;
 
+	ASSERT_RTNL();
 	if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX])
 		return NULL;
 
@@ -2319,13 +2300,12 @@ static struct team *team_nl_team_get(struct genl_info *info)
 	}
 
 	team = netdev_priv(dev);
-	mutex_lock(&team->lock);
 	return team;
 }
 
 static void team_nl_team_put(struct team *team)
 {
-	mutex_unlock(&team->lock);
+	ASSERT_RTNL();
 	dev_put(team->dev);
 }
 
@@ -2961,11 +2941,8 @@ static void __team_port_change_port_removed(struct team_port *port)
 
 static void team_port_change_check(struct team_port *port, bool linkup)
 {
-	struct team *team = port->team;
-
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
 	__team_port_change_check(port, linkup);
-	mutex_unlock(&team->lock);
 }
 
 
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index e0f599e2a51d..4e133451f4d6 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -68,7 +68,7 @@ static void ab_active_port_get(struct team *team, struct team_gsetter_ctx *ctx)
 	struct team_port *active_port;
 
 	active_port = rcu_dereference_protected(ab_priv(team)->active_port,
-						lockdep_is_held(&team->lock));
+						rtnl_is_locked());
 	if (active_port)
 		ctx->data.u32_val = active_port->dev->ifindex;
 	else
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 00f8989c29c0..6f9944108f5a 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -302,7 +302,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 		/* Clear old filter data */
 		__fprog_destroy(lb_priv->ex->orig_fprog);
 		orig_fp = rcu_dereference_protected(lb_priv->fp,
-						lockdep_is_held(&team->lock));
+						    rtnl_is_locked());
 	}
 
 	rcu_assign_pointer(lb_priv->fp, fp);
@@ -325,7 +325,7 @@ static void lb_bpf_func_free(struct team *team)
 
 	__fprog_destroy(lb_priv->ex->orig_fprog);
 	fp = rcu_dereference_protected(lb_priv->fp,
-				       lockdep_is_held(&team->lock));
+				       rtnl_is_locked());
 	bpf_prog_destroy(fp);
 }
 
@@ -336,7 +336,7 @@ static void lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx)
 	char *name;
 
 	func = rcu_dereference_protected(lb_priv->select_tx_port_func,
-					 lockdep_is_held(&team->lock));
+					 rtnl_is_locked());
 	name = lb_select_tx_port_get_name(func);
 	BUG_ON(!name);
 	ctx->data.str_val = name;
@@ -478,7 +478,8 @@ static void lb_stats_refresh(struct work_struct *work)
 	team = lb_priv_ex->team;
 	lb_priv = get_lb_priv(team);
 
-	if (!mutex_trylock(&team->lock)) {
+	/* This rtnl_trylock() might be easy to compate... */
+	if (!rtnl_trylock()) {
 		schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0);
 		return;
 	}
@@ -515,7 +516,7 @@ static void lb_stats_refresh(struct work_struct *work)
 	schedule_delayed_work(&lb_priv_ex->stats.refresh_dw,
 			      (lb_priv_ex->stats.refresh_interval * HZ) / 10);
 
-	mutex_unlock(&team->lock);
+	rtnl_unlock();
 }
 
 static void lb_stats_refresh_interval_get(struct team *team,
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index cdc684e04a2f..ce97d891cf72 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -191,8 +191,6 @@ struct team {
 
 	const struct header_ops *header_ops_cache;
 
-	struct mutex lock; /* used for overall locking, e.g. port lists write */
-
 	/*
 	 * List of enabled ports and their count
 	 */
@@ -223,7 +221,6 @@ struct team {
 		atomic_t count_pending;
 		struct delayed_work dw;
 	} mcast_rejoin;
-	struct lock_class_key team_lock_key;
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-05-21 13:38 [PATCH net] team: replace team lock with rtnl lock Tetsuo Handa
@ 2025-05-21 18:00 ` Jakub Kicinski
  2025-05-22  1:00   ` Tetsuo Handa
  2025-05-22 10:04 ` Jiri Pirko
  1 sibling, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2025-05-21 18:00 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Network Development, Jiri Pirko, Andrew Lunn, David S. Miller,
	Eric Dumazet, Paolo Abeni, Hillf Danton, Stanislav Fomichev,
	Willem de Bruijn

On Wed, 21 May 2025 22:38:55 +0900 Tetsuo Handa wrote:
> syzbot is reporting locking order problem between wiphy and team.
> As per Jiri Pirko's comment, let's check whether all callers are
> already holding rtnl lock. This patch will help simplifying locking
> dependency if all callers are already holding rtnl lock.
> 
> Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04
> Suggested-by: Jiri Pirko <jiri@resnulli.us>

I don't think Jiri suggested it, he provided a review and asked
questions. Suggest means he is the proponent of the patch.
And as he pointed out this patch promptly generates all sort 
of locking warnings, please test this properly.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-05-21 18:00 ` Jakub Kicinski
@ 2025-05-22  1:00   ` Tetsuo Handa
  2025-05-22 13:47     ` Tetsuo Handa
  0 siblings, 1 reply; 9+ messages in thread
From: Tetsuo Handa @ 2025-05-22  1:00 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Network Development, Jiri Pirko, Andrew Lunn, David S. Miller,
	Eric Dumazet, Paolo Abeni, Hillf Danton, Stanislav Fomichev,
	Willem de Bruijn

On 2025/05/22 3:00, Jakub Kicinski wrote:
> On Wed, 21 May 2025 22:38:55 +0900 Tetsuo Handa wrote:
>> syzbot is reporting locking order problem between wiphy and team.
>> As per Jiri Pirko's comment, let's check whether all callers are
>> already holding rtnl lock. This patch will help simplifying locking
>> dependency if all callers are already holding rtnl lock.
>>
>> Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com
>> Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04
>> Suggested-by: Jiri Pirko <jiri@resnulli.us>
> 
> I don't think Jiri suggested it, he provided a review and asked
> questions. Suggest means he is the proponent of the patch.

I think Jiri Pirko knows better than I, for Jiri is the maintainer of
TEAM DRIVER. I just tried what Jiri commented:

  I wonder, since we already rely on rtnl in lots of team code, perhaps we
  can remove team->lock completely and convert the rest of the code to be
  protected by rtnl lock as well

> And as he pointed out this patch promptly generates all sort 
> of locking warnings, please test this properly.

I didn't get any compile-time warnings, and
https://lkml.kernel.org/r/682e6b1f.a00a0220.2a3337.0007.GAE@google.com didn't
get any run-time locking warnings.

What locking warnings did you get? Is there an automated testing environment
(like https://lkml.kernel.org/r/66a4b1a7.050a0220.12c792.8f9e@mx.google.com )
which I can use for testing this patch?


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-05-21 13:38 [PATCH net] team: replace team lock with rtnl lock Tetsuo Handa
  2025-05-21 18:00 ` Jakub Kicinski
@ 2025-05-22 10:04 ` Jiri Pirko
  1 sibling, 0 replies; 9+ messages in thread
From: Jiri Pirko @ 2025-05-22 10:04 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Network Development, Jakub Kicinski, Andrew Lunn, David S. Miller,
	Eric Dumazet, Paolo Abeni, Hillf Danton, Stanislav Fomichev,
	Willem de Bruijn

Wed, May 21, 2025 at 03:38:55PM +0200, penguin-kernel@I-love.SAKURA.ne.jp wrote:

[...]

>@@ -2319,13 +2300,12 @@ static struct team *team_nl_team_get(struct genl_info *info)
> 	}
> 
> 	team = netdev_priv(dev);
>-	mutex_lock(&team->lock);
> 	return team;
> }
> 
> static void team_nl_team_put(struct team *team)
> {
>-	mutex_unlock(&team->lock);
>+	ASSERT_RTNL();
> 	dev_put(team->dev);
> }
> 

I don't understand. Why do you ignored my comments to these 2 hunks?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-05-22  1:00   ` Tetsuo Handa
@ 2025-05-22 13:47     ` Tetsuo Handa
  2025-06-20 23:20       ` Stanislav Fomichev
  0 siblings, 1 reply; 9+ messages in thread
From: Tetsuo Handa @ 2025-05-22 13:47 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Network Development, Jiri Pirko, Andrew Lunn, David S. Miller,
	Eric Dumazet, Paolo Abeni, Hillf Danton, Stanislav Fomichev,
	Willem de Bruijn

On 2025/05/22 10:00, Tetsuo Handa wrote:
> On 2025/05/22 3:00, Jakub Kicinski wrote:
>> And as he pointed out this patch promptly generates all sort 
>> of locking warnings, please test this properly.
> 
> I didn't get any compile-time warnings, and
> https://lkml.kernel.org/r/682e6b1f.a00a0220.2a3337.0007.GAE@google.com didn't
> get any run-time locking warnings.
> 
> What locking warnings did you get? Is there an automated testing environment
> (like https://lkml.kernel.org/r/66a4b1a7.050a0220.12c792.8f9e@mx.google.com )
> which I can use for testing this patch?
> 

Ah, I got which posts you are referring to. I was failing to receive Jiri's mails
because my spam filter setting was sending mails from .us domain to trash.
Now I removed the .us entry.



Jiri Pirko wrote:
> Sat, May 17, 2025 at 09:32:20AM +0200, penguin-kernel@I-love.SAKURA.ne.jp wrote:
> 
> [..]
> 
> >@@ -2319,13 +2301,12 @@ static struct team *team_nl_team_get(struct genl_info *info)
> > 	}
> > 
> > 	team = netdev_priv(dev);
> >-	mutex_lock(&team->lock);
> > 	return team;
> > }
> 
> 
> Why do you think this is safe?
> 
> Rtnl is held only for set doit.

I assumed that the caller already held rtnl lock.

> 
> 
> > 
> > static void team_nl_team_put(struct team *team)
> > {
> >-	mutex_unlock(&team->lock);
> >+	ASSERT_RTNL();
> 
> Did you test this? How? Howcome you didn't hit this assertion?

Tests using syzbot's reproducer did not hit this assertion.

> 
> 
> > 	dev_put(team->dev);
> > }
> > 

Anyway, we can't remove team lock. Too bad.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-05-22 13:47     ` Tetsuo Handa
@ 2025-06-20 23:20       ` Stanislav Fomichev
  2025-06-21 16:27         ` Tetsuo Handa
  0 siblings, 1 reply; 9+ messages in thread
From: Stanislav Fomichev @ 2025-06-20 23:20 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Jakub Kicinski, Network Development, Jiri Pirko, Andrew Lunn,
	David S. Miller, Eric Dumazet, Paolo Abeni, Hillf Danton,
	Willem de Bruijn

On 05/22, Tetsuo Handa wrote:
> On 2025/05/22 10:00, Tetsuo Handa wrote:
> > On 2025/05/22 3:00, Jakub Kicinski wrote:
> >> And as he pointed out this patch promptly generates all sort 
> >> of locking warnings, please test this properly.
> > 
> > I didn't get any compile-time warnings, and
> > https://lkml.kernel.org/r/682e6b1f.a00a0220.2a3337.0007.GAE@google.com didn't
> > get any run-time locking warnings.
> > 
> > What locking warnings did you get? Is there an automated testing environment
> > (like https://lkml.kernel.org/r/66a4b1a7.050a0220.12c792.8f9e@mx.google.com )
> > which I can use for testing this patch?
> > 
> 
> Ah, I got which posts you are referring to. I was failing to receive Jiri's mails
> because my spam filter setting was sending mails from .us domain to trash.
> Now I removed the .us entry.
> 
> 
> 
> Jiri Pirko wrote:
> > Sat, May 17, 2025 at 09:32:20AM +0200, penguin-kernel@I-love.SAKURA.ne.jp wrote:
> > 
> > [..]
> > 
> > >@@ -2319,13 +2301,12 @@ static struct team *team_nl_team_get(struct genl_info *info)
> > > 	}
> > > 
> > > 	team = netdev_priv(dev);
> > >-	mutex_lock(&team->lock);
> > > 	return team;
> > > }
> > 
> > 
> > Why do you think this is safe?
> > 
> > Rtnl is held only for set doit.
> 
> I assumed that the caller already held rtnl lock.
> 
> > 
> > 
> > > 
> > > static void team_nl_team_put(struct team *team)
> > > {
> > >-	mutex_unlock(&team->lock);
> > >+	ASSERT_RTNL();
> > 
> > Did you test this? How? Howcome you didn't hit this assertion?
> 
> Tests using syzbot's reproducer did not hit this assertion.
> 
> > 
> > 
> > > 	dev_put(team->dev);
> > > }
> > > 

[..]

> Anyway, we can't remove team lock. Too bad.

I was hoping to see another revision, but just noticed this part. Can
you share more on why we can't remove the team lock? I can try to
give it a stab if you're not planning to send a follow up...

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-06-20 23:20       ` Stanislav Fomichev
@ 2025-06-21 16:27         ` Tetsuo Handa
  0 siblings, 0 replies; 9+ messages in thread
From: Tetsuo Handa @ 2025-06-21 16:27 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Jakub Kicinski, Network Development, Jiri Pirko, Andrew Lunn,
	David S. Miller, Eric Dumazet, Paolo Abeni, Hillf Danton,
	Willem de Bruijn

On 2025/06/21 8:20, Stanislav Fomichev wrote:
>> Anyway, we can't remove team lock. Too bad.
> 
> I was hoping to see another revision, but just noticed this part. Can
> you share more on why we can't remove the team lock? I can try to
> give it a stab if you're not planning to send a follow up...

I think we can't remove the team lock unless we convert to always use
the rtnl lock. I don't have plan to propose such change. Please send
your approach.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net] team: replace team lock with rtnl lock
@ 2025-06-23 15:31 Stanislav Fomichev
  2025-06-25 22:50 ` patchwork-bot+netdevbpf
  0 siblings, 1 reply; 9+ messages in thread
From: Stanislav Fomichev @ 2025-06-23 15:31 UTC (permalink / raw)
  To: netdev
  Cc: davem, edumazet, kuba, pabeni, jiri, andrew+netdev, sdf,
	linux-kernel, Tetsuo Handa, syzbot+705c61d60b091ef42c04,
	syzbot+71fd22ae4b81631e22fd

syszbot reports various ordering issues for lower instance locks and
team lock. Switch to using rtnl lock for protecting team device,
similar to bonding. Based on the patch by Tetsuo Handa.

Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04
Reported-by: syzbot+71fd22ae4b81631e22fd@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=71fd22ae4b81631e22fd
Fixes: 6b1d3c5f675c ("team: grab team lock during team_change_rx_flags")
Link: https://lkml.kernel.org/r/ZoZ2RH9BcahEB9Sb@nanopsycho.orion
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 drivers/net/team/team_core.c              | 96 +++++++++++------------
 drivers/net/team/team_mode_activebackup.c |  3 +-
 drivers/net/team/team_mode_loadbalance.c  | 13 ++-
 include/linux/if_team.h                   |  3 -
 4 files changed, 50 insertions(+), 65 deletions(-)

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 8bc56186b2a3..17f07eb0ee52 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -933,7 +933,7 @@ static bool team_port_find(const struct team *team,
  * Enable/disable port by adding to enabled port hashlist and setting
  * port->index (Might be racy so reader could see incorrect ifindex when
  * processing a flying packet, but that is not a problem). Write guarded
- * by team->lock.
+ * by RTNL.
  */
 static void team_port_enable(struct team *team,
 			     struct team_port *port)
@@ -1660,8 +1660,6 @@ static int team_init(struct net_device *dev)
 		goto err_options_register;
 	netif_carrier_off(dev);
 
-	lockdep_register_key(&team->team_lock_key);
-	__mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key);
 	netdev_lockdep_set_classes(dev);
 
 	return 0;
@@ -1682,7 +1680,8 @@ static void team_uninit(struct net_device *dev)
 	struct team_port *port;
 	struct team_port *tmp;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	list_for_each_entry_safe(port, tmp, &team->port_list, list)
 		team_port_del(team, port->dev);
 
@@ -1691,9 +1690,7 @@ static void team_uninit(struct net_device *dev)
 	team_mcast_rejoin_fini(team);
 	team_notify_peers_fini(team);
 	team_queue_override_fini(team);
-	mutex_unlock(&team->lock);
 	netdev_change_features(dev);
-	lockdep_unregister_key(&team->team_lock_key);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -1778,7 +1775,8 @@ static void team_change_rx_flags(struct net_device *dev, int change)
 	struct team_port *port;
 	int inc;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	list_for_each_entry(port, &team->port_list, list) {
 		if (change & IFF_PROMISC) {
 			inc = dev->flags & IFF_PROMISC ? 1 : -1;
@@ -1789,7 +1787,6 @@ static void team_change_rx_flags(struct net_device *dev, int change)
 			dev_set_allmulti(port->dev, inc);
 		}
 	}
-	mutex_unlock(&team->lock);
 }
 
 static void team_set_rx_mode(struct net_device *dev)
@@ -1811,14 +1808,14 @@ static int team_set_mac_address(struct net_device *dev, void *p)
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
 
+	ASSERT_RTNL();
+
 	if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 	dev_addr_set(dev, addr->sa_data);
-	mutex_lock(&team->lock);
 	list_for_each_entry(port, &team->port_list, list)
 		if (team->ops.port_change_dev_addr)
 			team->ops.port_change_dev_addr(team, port);
-	mutex_unlock(&team->lock);
 	return 0;
 }
 
@@ -1828,11 +1825,8 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	struct team_port *port;
 	int err;
 
-	/*
-	 * Alhough this is reader, it's guarded by team lock. It's not possible
-	 * to traverse list in reverse under rcu_read_lock
-	 */
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	team->port_mtu_change_allowed = true;
 	list_for_each_entry(port, &team->port_list, list) {
 		err = dev_set_mtu(port->dev, new_mtu);
@@ -1843,7 +1837,6 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 		}
 	}
 	team->port_mtu_change_allowed = false;
-	mutex_unlock(&team->lock);
 
 	WRITE_ONCE(dev->mtu, new_mtu);
 
@@ -1853,7 +1846,6 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		dev_set_mtu(port->dev, dev->mtu);
 	team->port_mtu_change_allowed = false;
-	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -1903,24 +1895,19 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 	struct team_port *port;
 	int err;
 
-	/*
-	 * Alhough this is reader, it's guarded by team lock. It's not possible
-	 * to traverse list in reverse under rcu_read_lock
-	 */
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	list_for_each_entry(port, &team->port_list, list) {
 		err = vlan_vid_add(port->dev, proto, vid);
 		if (err)
 			goto unwind;
 	}
-	mutex_unlock(&team->lock);
 
 	return 0;
 
 unwind:
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		vlan_vid_del(port->dev, proto, vid);
-	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -1930,10 +1917,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	list_for_each_entry(port, &team->port_list, list)
 		vlan_vid_del(port->dev, proto, vid);
-	mutex_unlock(&team->lock);
 
 	return 0;
 }
@@ -1955,9 +1942,9 @@ static void team_netpoll_cleanup(struct net_device *dev)
 {
 	struct team *team = netdev_priv(dev);
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	__team_netpoll_cleanup(team);
-	mutex_unlock(&team->lock);
 }
 
 static int team_netpoll_setup(struct net_device *dev)
@@ -1966,7 +1953,8 @@ static int team_netpoll_setup(struct net_device *dev)
 	struct team_port *port;
 	int err = 0;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	list_for_each_entry(port, &team->port_list, list) {
 		err = __team_port_enable_netpoll(port);
 		if (err) {
@@ -1974,7 +1962,6 @@ static int team_netpoll_setup(struct net_device *dev)
 			break;
 		}
 	}
-	mutex_unlock(&team->lock);
 	return err;
 }
 #endif
@@ -1985,9 +1972,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	err = team_port_add(team, port_dev, extack);
-	mutex_unlock(&team->lock);
 
 	if (!err)
 		netdev_change_features(dev);
@@ -2000,18 +1987,13 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	mutex_lock(&team->lock);
+	ASSERT_RTNL();
+
 	err = team_port_del(team, port_dev);
-	mutex_unlock(&team->lock);
 
 	if (err)
 		return err;
 
-	if (netif_is_team_master(port_dev)) {
-		lockdep_unregister_key(&team->team_lock_key);
-		lockdep_register_key(&team->team_lock_key);
-		lockdep_set_class(&team->lock, &team->team_lock_key);
-	}
 	netdev_change_features(dev);
 
 	return err;
@@ -2304,9 +2286,10 @@ int team_nl_noop_doit(struct sk_buff *skb, struct genl_info *info)
 static struct team *team_nl_team_get(struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
-	int ifindex;
 	struct net_device *dev;
-	struct team *team;
+	int ifindex;
+
+	ASSERT_RTNL();
 
 	if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX])
 		return NULL;
@@ -2318,14 +2301,11 @@ static struct team *team_nl_team_get(struct genl_info *info)
 		return NULL;
 	}
 
-	team = netdev_priv(dev);
-	mutex_lock(&team->lock);
-	return team;
+	return netdev_priv(dev);
 }
 
 static void team_nl_team_put(struct team *team)
 {
-	mutex_unlock(&team->lock);
 	dev_put(team->dev);
 }
 
@@ -2515,9 +2495,13 @@ int team_nl_options_get_doit(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	LIST_HEAD(sel_opt_inst_list);
 
+	rtnl_lock();
+
 	team = team_nl_team_get(info);
-	if (!team)
-		return -EINVAL;
+	if (!team) {
+		err = -EINVAL;
+		goto rtnl_unlock;
+	}
 
 	list_for_each_entry(opt_inst, &team->option_inst_list, list)
 		list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list);
@@ -2527,6 +2511,9 @@ int team_nl_options_get_doit(struct sk_buff *skb, struct genl_info *info)
 
 	team_nl_team_put(team);
 
+rtnl_unlock:
+	rtnl_unlock();
+
 	return err;
 }
 
@@ -2805,15 +2792,22 @@ int team_nl_port_list_get_doit(struct sk_buff *skb,
 	struct team *team;
 	int err;
 
+	rtnl_lock();
+
 	team = team_nl_team_get(info);
-	if (!team)
-		return -EINVAL;
+	if (!team) {
+		err = -EINVAL;
+		goto rtnl_unlock;
+	}
 
 	err = team_nl_send_port_list_get(team, info->snd_portid, info->snd_seq,
 					 NLM_F_ACK, team_nl_send_unicast, NULL);
 
 	team_nl_team_put(team);
 
+rtnl_unlock:
+	rtnl_unlock();
+
 	return err;
 }
 
@@ -2961,11 +2955,9 @@ static void __team_port_change_port_removed(struct team_port *port)
 
 static void team_port_change_check(struct team_port *port, bool linkup)
 {
-	struct team *team = port->team;
+	ASSERT_RTNL();
 
-	mutex_lock(&team->lock);
 	__team_port_change_check(port, linkup);
-	mutex_unlock(&team->lock);
 }
 
 
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index e0f599e2a51d..1c3336c7a1b2 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -67,8 +67,7 @@ static void ab_active_port_get(struct team *team, struct team_gsetter_ctx *ctx)
 {
 	struct team_port *active_port;
 
-	active_port = rcu_dereference_protected(ab_priv(team)->active_port,
-						lockdep_is_held(&team->lock));
+	active_port = rtnl_dereference(ab_priv(team)->active_port);
 	if (active_port)
 		ctx->data.u32_val = active_port->dev->ifindex;
 	else
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 00f8989c29c0..b14538bde2f8 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -301,8 +301,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 	if (lb_priv->ex->orig_fprog) {
 		/* Clear old filter data */
 		__fprog_destroy(lb_priv->ex->orig_fprog);
-		orig_fp = rcu_dereference_protected(lb_priv->fp,
-						lockdep_is_held(&team->lock));
+		orig_fp = rtnl_dereference(lb_priv->fp);
 	}
 
 	rcu_assign_pointer(lb_priv->fp, fp);
@@ -324,8 +323,7 @@ static void lb_bpf_func_free(struct team *team)
 		return;
 
 	__fprog_destroy(lb_priv->ex->orig_fprog);
-	fp = rcu_dereference_protected(lb_priv->fp,
-				       lockdep_is_held(&team->lock));
+	fp = rtnl_dereference(lb_priv->fp);
 	bpf_prog_destroy(fp);
 }
 
@@ -335,8 +333,7 @@ static void lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx)
 	lb_select_tx_port_func_t *func;
 	char *name;
 
-	func = rcu_dereference_protected(lb_priv->select_tx_port_func,
-					 lockdep_is_held(&team->lock));
+	func = rtnl_dereference(lb_priv->select_tx_port_func);
 	name = lb_select_tx_port_get_name(func);
 	BUG_ON(!name);
 	ctx->data.str_val = name;
@@ -478,7 +475,7 @@ static void lb_stats_refresh(struct work_struct *work)
 	team = lb_priv_ex->team;
 	lb_priv = get_lb_priv(team);
 
-	if (!mutex_trylock(&team->lock)) {
+	if (!rtnl_trylock()) {
 		schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0);
 		return;
 	}
@@ -515,7 +512,7 @@ static void lb_stats_refresh(struct work_struct *work)
 	schedule_delayed_work(&lb_priv_ex->stats.refresh_dw,
 			      (lb_priv_ex->stats.refresh_interval * HZ) / 10);
 
-	mutex_unlock(&team->lock);
+	rtnl_unlock();
 }
 
 static void lb_stats_refresh_interval_get(struct team *team,
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index cdc684e04a2f..ce97d891cf72 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -191,8 +191,6 @@ struct team {
 
 	const struct header_ops *header_ops_cache;
 
-	struct mutex lock; /* used for overall locking, e.g. port lists write */
-
 	/*
 	 * List of enabled ports and their count
 	 */
@@ -223,7 +221,6 @@ struct team {
 		atomic_t count_pending;
 		struct delayed_work dw;
 	} mcast_rejoin;
-	struct lock_class_key team_lock_key;
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net] team: replace team lock with rtnl lock
  2025-06-23 15:31 Stanislav Fomichev
@ 2025-06-25 22:50 ` patchwork-bot+netdevbpf
  0 siblings, 0 replies; 9+ messages in thread
From: patchwork-bot+netdevbpf @ 2025-06-25 22:50 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: netdev, davem, edumazet, kuba, pabeni, jiri, andrew+netdev,
	linux-kernel, penguin-kernel, syzbot+705c61d60b091ef42c04,
	syzbot+71fd22ae4b81631e22fd

Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Mon, 23 Jun 2025 08:31:47 -0700 you wrote:
> syszbot reports various ordering issues for lower instance locks and
> team lock. Switch to using rtnl lock for protecting team device,
> similar to bonding. Based on the patch by Tetsuo Handa.
> 
> Cc: Jiri Pirko <jiri@resnulli.us>
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04
> Reported-by: syzbot+71fd22ae4b81631e22fd@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=71fd22ae4b81631e22fd
> Fixes: 6b1d3c5f675c ("team: grab team lock during team_change_rx_flags")
> Link: https://lkml.kernel.org/r/ZoZ2RH9BcahEB9Sb@nanopsycho.orion
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> 
> [...]

Here is the summary with links:
  - [net] team: replace team lock with rtnl lock
    https://git.kernel.org/netdev/net-next/c/bfb4fb77f9a8

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-06-25 22:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-21 13:38 [PATCH net] team: replace team lock with rtnl lock Tetsuo Handa
2025-05-21 18:00 ` Jakub Kicinski
2025-05-22  1:00   ` Tetsuo Handa
2025-05-22 13:47     ` Tetsuo Handa
2025-06-20 23:20       ` Stanislav Fomichev
2025-06-21 16:27         ` Tetsuo Handa
2025-05-22 10:04 ` Jiri Pirko
  -- strict thread matches above, loose matches on Subject: below --
2025-06-23 15:31 Stanislav Fomichev
2025-06-25 22:50 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).