netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
@ 2025-10-21  5:22 Tonghao Zhang
  2025-10-21 11:21 ` Simon Horman
  2025-10-22  2:02 ` Jay Vosburgh
  0 siblings, 2 replies; 4+ messages in thread
From: Tonghao Zhang @ 2025-10-21  5:22 UTC (permalink / raw)
  To: netdev
  Cc: Tonghao Zhang, Jay Vosburgh, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Jonathan Corbet,
	Andrew Lunn, Nikolay Aleksandrov, Hangbin Liu

The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
the lock and updating send_peer_notif. This patch addresses the issue by
using a workqueue. Since updating send_peer_notif does not require high
real-time performance, such delayed updates are entirely acceptable.

Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
---
 drivers/net/bonding/bond_3ad.c  |  7 ++-----
 drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
 include/net/bonding.h           |  2 ++
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 2fca8e84ab10..1db2e34a351f 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
 {
 	struct bonding *bond = port->slave->bond;
 
-	if (bond->params.broadcast_neighbor && rtnl_trylock()) {
-		bond->send_peer_notif = bond->params.num_peer_notif *
-			max(1, bond->params.peer_notif_delay);
-		rtnl_unlock();
-	}
+	if (bond->params.broadcast_neighbor)
+		bond_peer_notify_work_rearm(bond, 0);
 }
 
 /**
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2d6883296e32..5791c3e39baa 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
 		bond_loadbalance_arp_mon(bond);
 }
 
+/* Use this to update send_peer_notif when RTNL may be held in other places. */
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
+{
+	queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
+}
+
+/* Peer notify update handler. Holds only RTNL */
+static void bond_peer_notify_handler(struct work_struct *work)
+{
+	struct bonding *bond = container_of(work, struct bonding,
+					    peer_notify_work.work);
+
+	if (!rtnl_trylock())
+		goto rearm;
+
+	bond->send_peer_notif = bond->params.num_peer_notif *
+		max(1, bond->params.peer_notif_delay);
+
+	rtnl_unlock();
+	return;
+
+rearm:
+	bond_peer_notify_work_rearm(bond, 1);
+}
+
 /*-------------------------- netdev event handling --------------------------*/
 
 /* Change device name */
@@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
 	INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
 	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
 	INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
+	INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
 }
 
 static void bond_work_cancel_all(struct bonding *bond)
@@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
 	cancel_delayed_work_sync(&bond->ad_work);
 	cancel_delayed_work_sync(&bond->mcast_work);
 	cancel_delayed_work_sync(&bond->slave_arr_work);
+	cancel_delayed_work_sync(&bond->peer_notify_work);
 }
 
 static int bond_open(struct net_device *bond_dev)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index e06f0d63b2c1..4ce530371416 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -255,6 +255,7 @@ struct bonding {
 	struct   delayed_work ad_work;
 	struct   delayed_work mcast_work;
 	struct   delayed_work slave_arr_work;
+	struct   delayed_work peer_notify_work;
 #ifdef CONFIG_DEBUG_FS
 	/* debugging support via debugfs */
 	struct	 dentry *debug_dir;
@@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
 					      int level);
 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
 void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
 void bond_work_init_all(struct bonding *bond);
 
 #ifdef CONFIG_PROC_FS
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
  2025-10-21  5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
@ 2025-10-21 11:21 ` Simon Horman
  2025-10-22  2:02 ` Jay Vosburgh
  1 sibling, 0 replies; 4+ messages in thread
From: Simon Horman @ 2025-10-21 11:21 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: netdev, Jay Vosburgh, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Jonathan Corbet, Andrew Lunn,
	Nikolay Aleksandrov, Hangbin Liu

On Tue, Oct 21, 2025 at 01:22:49PM +0800, Tonghao Zhang wrote:
> The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
> the lock and updating send_peer_notif. This patch addresses the issue by
> using a workqueue. Since updating send_peer_notif does not require high
> real-time performance, such delayed updates are entirely acceptable.
> 
> Cc: Jay Vosburgh <jv@jvosburgh.net>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Simon Horman <horms@kernel.org>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Andrew Lunn <andrew+netdev@lunn.ch>
> Cc: Nikolay Aleksandrov <razor@blackwall.org>
> Cc: Hangbin Liu <liuhangbin@gmail.com>
> Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
> Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
> ---
>  drivers/net/bonding/bond_3ad.c  |  7 ++-----
>  drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
>  include/net/bonding.h           |  2 ++
>  3 files changed, 31 insertions(+), 5 deletions(-)

This is not a proper review. So please wait, say a day, for one.

But this patch does not apply cleanly to net-next,
and thus will need to be rebased and reposted.

-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
  2025-10-21  5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
  2025-10-21 11:21 ` Simon Horman
@ 2025-10-22  2:02 ` Jay Vosburgh
  2025-10-26  9:59   ` Tonghao Zhang
  1 sibling, 1 reply; 4+ messages in thread
From: Jay Vosburgh @ 2025-10-22  2:02 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Jonathan Corbet, Andrew Lunn,
	Nikolay Aleksandrov, Hangbin Liu

Tonghao Zhang <tonghao@bamaicloud.com> wrote:

>The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
>the lock and updating send_peer_notif. This patch addresses the issue by
>using a workqueue. Since updating send_peer_notif does not require high
>real-time performance, such delayed updates are entirely acceptable.

	Would it be less complicated overall to convert send_peer_notif
to an atomic_t, and handle updates via atomic_inc/dec/etc instead of
messing with workqueues and RTNL just to change one variable?

	As you say, it's not performance critical, and, despite your
previous patch that moves some send_peer_notif code inside an RTNL
block, if using atomic makes the code less complicated that may be
better in the long run.

	-J

>Cc: Jay Vosburgh <jv@jvosburgh.net>
>Cc: "David S. Miller" <davem@davemloft.net>
>Cc: Eric Dumazet <edumazet@google.com>
>Cc: Jakub Kicinski <kuba@kernel.org>
>Cc: Paolo Abeni <pabeni@redhat.com>
>Cc: Simon Horman <horms@kernel.org>
>Cc: Jonathan Corbet <corbet@lwn.net>
>Cc: Andrew Lunn <andrew+netdev@lunn.ch>
>Cc: Nikolay Aleksandrov <razor@blackwall.org>
>Cc: Hangbin Liu <liuhangbin@gmail.com>
>Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
>Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
>---
> drivers/net/bonding/bond_3ad.c  |  7 ++-----
> drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
> include/net/bonding.h           |  2 ++
> 3 files changed, 31 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 2fca8e84ab10..1db2e34a351f 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
> {
> 	struct bonding *bond = port->slave->bond;
> 
>-	if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>-		bond->send_peer_notif = bond->params.num_peer_notif *
>-			max(1, bond->params.peer_notif_delay);
>-		rtnl_unlock();
>-	}
>+	if (bond->params.broadcast_neighbor)
>+		bond_peer_notify_work_rearm(bond, 0);
> }
> 
> /**
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 2d6883296e32..5791c3e39baa 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
> 		bond_loadbalance_arp_mon(bond);
> }
> 
>+/* Use this to update send_peer_notif when RTNL may be held in other places. */
>+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
>+{
>+	queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
>+}
>+
>+/* Peer notify update handler. Holds only RTNL */
>+static void bond_peer_notify_handler(struct work_struct *work)
>+{
>+	struct bonding *bond = container_of(work, struct bonding,
>+					    peer_notify_work.work);
>+
>+	if (!rtnl_trylock())
>+		goto rearm;
>+
>+	bond->send_peer_notif = bond->params.num_peer_notif *
>+		max(1, bond->params.peer_notif_delay);
>+
>+	rtnl_unlock();
>+	return;
>+
>+rearm:
>+	bond_peer_notify_work_rearm(bond, 1);
>+}
>+
> /*-------------------------- netdev event handling --------------------------*/
> 
> /* Change device name */
>@@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
> 	INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
> 	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
> 	INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
>+	INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
> }
> 
> static void bond_work_cancel_all(struct bonding *bond)
>@@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
> 	cancel_delayed_work_sync(&bond->ad_work);
> 	cancel_delayed_work_sync(&bond->mcast_work);
> 	cancel_delayed_work_sync(&bond->slave_arr_work);
>+	cancel_delayed_work_sync(&bond->peer_notify_work);
> }
> 
> static int bond_open(struct net_device *bond_dev)
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index e06f0d63b2c1..4ce530371416 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -255,6 +255,7 @@ struct bonding {
> 	struct   delayed_work ad_work;
> 	struct   delayed_work mcast_work;
> 	struct   delayed_work slave_arr_work;
>+	struct   delayed_work peer_notify_work;
> #ifdef CONFIG_DEBUG_FS
> 	/* debugging support via debugfs */
> 	struct	 dentry *debug_dir;
>@@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
> 					      int level);
> int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
> void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
> void bond_work_init_all(struct bonding *bond);
> 
> #ifdef CONFIG_PROC_FS
>-- 
>2.34.1
>
>

---
	-Jay Vosburgh, jv@jvosburgh.net

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
  2025-10-22  2:02 ` Jay Vosburgh
@ 2025-10-26  9:59   ` Tonghao Zhang
  0 siblings, 0 replies; 4+ messages in thread
From: Tonghao Zhang @ 2025-10-26  9:59 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Jonathan Corbet, Andrew Lunn,
	Nikolay Aleksandrov, Hangbin Liu



> On Oct 22, 2025, at 10:02, Jay Vosburgh <jv@jvosburgh.net> wrote:
> 
> Tonghao Zhang <tonghao@bamaicloud.com> wrote:
> 
>> The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
>> the lock and updating send_peer_notif. This patch addresses the issue by
>> using a workqueue. Since updating send_peer_notif does not require high
>> real-time performance, such delayed updates are entirely acceptable.
> 
> Would it be less complicated overall to convert send_peer_notif
> to an atomic_t, and handle updates via atomic_inc/dec/etc instead of
> messing with workqueues and RTNL just to change one variable?
> 
> As you say, it's not performance critical, and, despite your
> previous patch that moves some send_peer_notif code inside an RTNL
> block, if using atomic makes the code less complicated that may be
> better in the long run.
Your suggestion is helpful to me, I post new patch, using the atomic instead of workqueue.
> 
> -J
> 
>> Cc: Jay Vosburgh <jv@jvosburgh.net>
>> Cc: "David S. Miller" <davem@davemloft.net>
>> Cc: Eric Dumazet <edumazet@google.com>
>> Cc: Jakub Kicinski <kuba@kernel.org>
>> Cc: Paolo Abeni <pabeni@redhat.com>
>> Cc: Simon Horman <horms@kernel.org>
>> Cc: Jonathan Corbet <corbet@lwn.net>
>> Cc: Andrew Lunn <andrew+netdev@lunn.ch>
>> Cc: Nikolay Aleksandrov <razor@blackwall.org>
>> Cc: Hangbin Liu <liuhangbin@gmail.com>
>> Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
>> Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
>> ---
>> drivers/net/bonding/bond_3ad.c  |  7 ++-----
>> drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
>> include/net/bonding.h           |  2 ++
>> 3 files changed, 31 insertions(+), 5 deletions(-)
>> 
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index 2fca8e84ab10..1db2e34a351f 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
>> {
>> struct bonding *bond = port->slave->bond;
>> 
>> - if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>> - bond->send_peer_notif = bond->params.num_peer_notif *
>> - max(1, bond->params.peer_notif_delay);
>> - rtnl_unlock();
>> - }
>> + if (bond->params.broadcast_neighbor)
>> + bond_peer_notify_work_rearm(bond, 0);
>> }
>> 
>> /**
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 2d6883296e32..5791c3e39baa 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
>> bond_loadbalance_arp_mon(bond);
>> }
>> 
>> +/* Use this to update send_peer_notif when RTNL may be held in other places. */
>> +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
>> +{
>> + queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
>> +}
>> +
>> +/* Peer notify update handler. Holds only RTNL */
>> +static void bond_peer_notify_handler(struct work_struct *work)
>> +{
>> + struct bonding *bond = container_of(work, struct bonding,
>> +     peer_notify_work.work);
>> +
>> + if (!rtnl_trylock())
>> + goto rearm;
>> +
>> + bond->send_peer_notif = bond->params.num_peer_notif *
>> + max(1, bond->params.peer_notif_delay);
>> +
>> + rtnl_unlock();
>> + return;
>> +
>> +rearm:
>> + bond_peer_notify_work_rearm(bond, 1);
>> +}
>> +
>> /*-------------------------- netdev event handling --------------------------*/
>> 
>> /* Change device name */
>> @@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
>> INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
>> INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
>> INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
>> + INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
>> }
>> 
>> static void bond_work_cancel_all(struct bonding *bond)
>> @@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
>> cancel_delayed_work_sync(&bond->ad_work);
>> cancel_delayed_work_sync(&bond->mcast_work);
>> cancel_delayed_work_sync(&bond->slave_arr_work);
>> + cancel_delayed_work_sync(&bond->peer_notify_work);
>> }
>> 
>> static int bond_open(struct net_device *bond_dev)
>> diff --git a/include/net/bonding.h b/include/net/bonding.h
>> index e06f0d63b2c1..4ce530371416 100644
>> --- a/include/net/bonding.h
>> +++ b/include/net/bonding.h
>> @@ -255,6 +255,7 @@ struct bonding {
>> struct   delayed_work ad_work;
>> struct   delayed_work mcast_work;
>> struct   delayed_work slave_arr_work;
>> + struct   delayed_work peer_notify_work;
>> #ifdef CONFIG_DEBUG_FS
>> /* debugging support via debugfs */
>> struct  dentry *debug_dir;
>> @@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
>>       int level);
>> int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>> void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>> +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
>> void bond_work_init_all(struct bonding *bond);
>> 
>> #ifdef CONFIG_PROC_FS
>> -- 
>> 2.34.1
>> 
>> 
> 
> ---
> -Jay Vosburgh, jv@jvosburgh.net



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-10-26 10:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-21  5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
2025-10-21 11:21 ` Simon Horman
2025-10-22  2:02 ` Jay Vosburgh
2025-10-26  9:59   ` Tonghao Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).