* [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
@ 2025-10-21 5:22 Tonghao Zhang
2025-10-21 11:21 ` Simon Horman
2025-10-22 2:02 ` Jay Vosburgh
0 siblings, 2 replies; 4+ messages in thread
From: Tonghao Zhang @ 2025-10-21 5:22 UTC (permalink / raw)
To: netdev
Cc: Tonghao Zhang, Jay Vosburgh, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Jonathan Corbet,
Andrew Lunn, Nikolay Aleksandrov, Hangbin Liu
The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
the lock and updating send_peer_notif. This patch addresses the issue by
using a workqueue. Since updating send_peer_notif does not require high
real-time performance, such delayed updates are entirely acceptable.
Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
---
drivers/net/bonding/bond_3ad.c | 7 ++-----
drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
include/net/bonding.h | 2 ++
3 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 2fca8e84ab10..1db2e34a351f 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
{
struct bonding *bond = port->slave->bond;
- if (bond->params.broadcast_neighbor && rtnl_trylock()) {
- bond->send_peer_notif = bond->params.num_peer_notif *
- max(1, bond->params.peer_notif_delay);
- rtnl_unlock();
- }
+ if (bond->params.broadcast_neighbor)
+ bond_peer_notify_work_rearm(bond, 0);
}
/**
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2d6883296e32..5791c3e39baa 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
bond_loadbalance_arp_mon(bond);
}
+/* Use this to update send_peer_notif when RTNL may be held in other places. */
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
+{
+ queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
+}
+
+/* Peer notify update handler. Holds only RTNL */
+static void bond_peer_notify_handler(struct work_struct *work)
+{
+ struct bonding *bond = container_of(work, struct bonding,
+ peer_notify_work.work);
+
+ if (!rtnl_trylock())
+ goto rearm;
+
+ bond->send_peer_notif = bond->params.num_peer_notif *
+ max(1, bond->params.peer_notif_delay);
+
+ rtnl_unlock();
+ return;
+
+rearm:
+ bond_peer_notify_work_rearm(bond, 1);
+}
+
/*-------------------------- netdev event handling --------------------------*/
/* Change device name */
@@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
+ INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
}
static void bond_work_cancel_all(struct bonding *bond)
@@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
cancel_delayed_work_sync(&bond->ad_work);
cancel_delayed_work_sync(&bond->mcast_work);
cancel_delayed_work_sync(&bond->slave_arr_work);
+ cancel_delayed_work_sync(&bond->peer_notify_work);
}
static int bond_open(struct net_device *bond_dev)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index e06f0d63b2c1..4ce530371416 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -255,6 +255,7 @@ struct bonding {
struct delayed_work ad_work;
struct delayed_work mcast_work;
struct delayed_work slave_arr_work;
+ struct delayed_work peer_notify_work;
#ifdef CONFIG_DEBUG_FS
/* debugging support via debugfs */
struct dentry *debug_dir;
@@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
int level);
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
void bond_work_init_all(struct bonding *bond);
#ifdef CONFIG_PROC_FS
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
2025-10-21 5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
@ 2025-10-21 11:21 ` Simon Horman
2025-10-22 2:02 ` Jay Vosburgh
1 sibling, 0 replies; 4+ messages in thread
From: Simon Horman @ 2025-10-21 11:21 UTC (permalink / raw)
To: Tonghao Zhang
Cc: netdev, Jay Vosburgh, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Jonathan Corbet, Andrew Lunn,
Nikolay Aleksandrov, Hangbin Liu
On Tue, Oct 21, 2025 at 01:22:49PM +0800, Tonghao Zhang wrote:
> The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
> the lock and updating send_peer_notif. This patch addresses the issue by
> using a workqueue. Since updating send_peer_notif does not require high
> real-time performance, such delayed updates are entirely acceptable.
>
> Cc: Jay Vosburgh <jv@jvosburgh.net>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Simon Horman <horms@kernel.org>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Andrew Lunn <andrew+netdev@lunn.ch>
> Cc: Nikolay Aleksandrov <razor@blackwall.org>
> Cc: Hangbin Liu <liuhangbin@gmail.com>
> Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
> Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
> ---
> drivers/net/bonding/bond_3ad.c | 7 ++-----
> drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
> include/net/bonding.h | 2 ++
> 3 files changed, 31 insertions(+), 5 deletions(-)
This is not a proper review. So please wait, say a day, for one.
But this patch does not apply cleanly to net-next,
and thus will need to be rebased and reposted.
--
pw-bot: cr
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
2025-10-21 5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
2025-10-21 11:21 ` Simon Horman
@ 2025-10-22 2:02 ` Jay Vosburgh
2025-10-26 9:59 ` Tonghao Zhang
1 sibling, 1 reply; 4+ messages in thread
From: Jay Vosburgh @ 2025-10-22 2:02 UTC (permalink / raw)
To: Tonghao Zhang
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, Jonathan Corbet, Andrew Lunn,
Nikolay Aleksandrov, Hangbin Liu
Tonghao Zhang <tonghao@bamaicloud.com> wrote:
>The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
>the lock and updating send_peer_notif. This patch addresses the issue by
>using a workqueue. Since updating send_peer_notif does not require high
>real-time performance, such delayed updates are entirely acceptable.
Would it be less complicated overall to convert send_peer_notif
to an atomic_t, and handle updates via atomic_inc/dec/etc instead of
messing with workqueues and RTNL just to change one variable?
As you say, it's not performance critical, and, despite your
previous patch that moves some send_peer_notif code inside an RTNL
block, if using atomic makes the code less complicated that may be
better in the long run.
-J
>Cc: Jay Vosburgh <jv@jvosburgh.net>
>Cc: "David S. Miller" <davem@davemloft.net>
>Cc: Eric Dumazet <edumazet@google.com>
>Cc: Jakub Kicinski <kuba@kernel.org>
>Cc: Paolo Abeni <pabeni@redhat.com>
>Cc: Simon Horman <horms@kernel.org>
>Cc: Jonathan Corbet <corbet@lwn.net>
>Cc: Andrew Lunn <andrew+netdev@lunn.ch>
>Cc: Nikolay Aleksandrov <razor@blackwall.org>
>Cc: Hangbin Liu <liuhangbin@gmail.com>
>Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
>Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
>---
> drivers/net/bonding/bond_3ad.c | 7 ++-----
> drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
> include/net/bonding.h | 2 ++
> 3 files changed, 31 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 2fca8e84ab10..1db2e34a351f 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
> {
> struct bonding *bond = port->slave->bond;
>
>- if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>- bond->send_peer_notif = bond->params.num_peer_notif *
>- max(1, bond->params.peer_notif_delay);
>- rtnl_unlock();
>- }
>+ if (bond->params.broadcast_neighbor)
>+ bond_peer_notify_work_rearm(bond, 0);
> }
>
> /**
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 2d6883296e32..5791c3e39baa 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
> bond_loadbalance_arp_mon(bond);
> }
>
>+/* Use this to update send_peer_notif when RTNL may be held in other places. */
>+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
>+{
>+ queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
>+}
>+
>+/* Peer notify update handler. Holds only RTNL */
>+static void bond_peer_notify_handler(struct work_struct *work)
>+{
>+ struct bonding *bond = container_of(work, struct bonding,
>+ peer_notify_work.work);
>+
>+ if (!rtnl_trylock())
>+ goto rearm;
>+
>+ bond->send_peer_notif = bond->params.num_peer_notif *
>+ max(1, bond->params.peer_notif_delay);
>+
>+ rtnl_unlock();
>+ return;
>+
>+rearm:
>+ bond_peer_notify_work_rearm(bond, 1);
>+}
>+
> /*-------------------------- netdev event handling --------------------------*/
>
> /* Change device name */
>@@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
> INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
> INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
> INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
>+ INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
> }
>
> static void bond_work_cancel_all(struct bonding *bond)
>@@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
> cancel_delayed_work_sync(&bond->ad_work);
> cancel_delayed_work_sync(&bond->mcast_work);
> cancel_delayed_work_sync(&bond->slave_arr_work);
>+ cancel_delayed_work_sync(&bond->peer_notify_work);
> }
>
> static int bond_open(struct net_device *bond_dev)
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index e06f0d63b2c1..4ce530371416 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -255,6 +255,7 @@ struct bonding {
> struct delayed_work ad_work;
> struct delayed_work mcast_work;
> struct delayed_work slave_arr_work;
>+ struct delayed_work peer_notify_work;
> #ifdef CONFIG_DEBUG_FS
> /* debugging support via debugfs */
> struct dentry *debug_dir;
>@@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
> int level);
> int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
> void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
> void bond_work_init_all(struct bonding *bond);
>
> #ifdef CONFIG_PROC_FS
>--
>2.34.1
>
>
---
-Jay Vosburgh, jv@jvosburgh.net
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated
2025-10-22 2:02 ` Jay Vosburgh
@ 2025-10-26 9:59 ` Tonghao Zhang
0 siblings, 0 replies; 4+ messages in thread
From: Tonghao Zhang @ 2025-10-26 9:59 UTC (permalink / raw)
To: Jay Vosburgh
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, Jonathan Corbet, Andrew Lunn,
Nikolay Aleksandrov, Hangbin Liu
> On Oct 22, 2025, at 10:02, Jay Vosburgh <jv@jvosburgh.net> wrote:
>
> Tonghao Zhang <tonghao@bamaicloud.com> wrote:
>
>> The RTNL might be locked, preventing ad_cond_set_peer_notif from acquiring
>> the lock and updating send_peer_notif. This patch addresses the issue by
>> using a workqueue. Since updating send_peer_notif does not require high
>> real-time performance, such delayed updates are entirely acceptable.
>
> Would it be less complicated overall to convert send_peer_notif
> to an atomic_t, and handle updates via atomic_inc/dec/etc instead of
> messing with workqueues and RTNL just to change one variable?
>
> As you say, it's not performance critical, and, despite your
> previous patch that moves some send_peer_notif code inside an RTNL
> block, if using atomic makes the code less complicated that may be
> better in the long run.
Your suggestion is helpful to me, I post new patch, using the atomic instead of workqueue.
>
> -J
>
>> Cc: Jay Vosburgh <jv@jvosburgh.net>
>> Cc: "David S. Miller" <davem@davemloft.net>
>> Cc: Eric Dumazet <edumazet@google.com>
>> Cc: Jakub Kicinski <kuba@kernel.org>
>> Cc: Paolo Abeni <pabeni@redhat.com>
>> Cc: Simon Horman <horms@kernel.org>
>> Cc: Jonathan Corbet <corbet@lwn.net>
>> Cc: Andrew Lunn <andrew+netdev@lunn.ch>
>> Cc: Nikolay Aleksandrov <razor@blackwall.org>
>> Cc: Hangbin Liu <liuhangbin@gmail.com>
>> Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
>> Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
>> ---
>> drivers/net/bonding/bond_3ad.c | 7 ++-----
>> drivers/net/bonding/bond_main.c | 27 +++++++++++++++++++++++++++
>> include/net/bonding.h | 2 ++
>> 3 files changed, 31 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index 2fca8e84ab10..1db2e34a351f 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -986,11 +986,8 @@ static void ad_cond_set_peer_notif(struct port *port)
>> {
>> struct bonding *bond = port->slave->bond;
>>
>> - if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>> - bond->send_peer_notif = bond->params.num_peer_notif *
>> - max(1, bond->params.peer_notif_delay);
>> - rtnl_unlock();
>> - }
>> + if (bond->params.broadcast_neighbor)
>> + bond_peer_notify_work_rearm(bond, 0);
>> }
>>
>> /**
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 2d6883296e32..5791c3e39baa 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -3990,6 +3990,31 @@ static void bond_arp_monitor(struct work_struct *work)
>> bond_loadbalance_arp_mon(bond);
>> }
>>
>> +/* Use this to update send_peer_notif when RTNL may be held in other places. */
>> +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
>> +{
>> + queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
>> +}
>> +
>> +/* Peer notify update handler. Holds only RTNL */
>> +static void bond_peer_notify_handler(struct work_struct *work)
>> +{
>> + struct bonding *bond = container_of(work, struct bonding,
>> + peer_notify_work.work);
>> +
>> + if (!rtnl_trylock())
>> + goto rearm;
>> +
>> + bond->send_peer_notif = bond->params.num_peer_notif *
>> + max(1, bond->params.peer_notif_delay);
>> +
>> + rtnl_unlock();
>> + return;
>> +
>> +rearm:
>> + bond_peer_notify_work_rearm(bond, 1);
>> +}
>> +
>> /*-------------------------- netdev event handling --------------------------*/
>>
>> /* Change device name */
>> @@ -4412,6 +4437,7 @@ void bond_work_init_all(struct bonding *bond)
>> INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
>> INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
>> INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
>> + INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
>> }
>>
>> static void bond_work_cancel_all(struct bonding *bond)
>> @@ -4422,6 +4448,7 @@ static void bond_work_cancel_all(struct bonding *bond)
>> cancel_delayed_work_sync(&bond->ad_work);
>> cancel_delayed_work_sync(&bond->mcast_work);
>> cancel_delayed_work_sync(&bond->slave_arr_work);
>> + cancel_delayed_work_sync(&bond->peer_notify_work);
>> }
>>
>> static int bond_open(struct net_device *bond_dev)
>> diff --git a/include/net/bonding.h b/include/net/bonding.h
>> index e06f0d63b2c1..4ce530371416 100644
>> --- a/include/net/bonding.h
>> +++ b/include/net/bonding.h
>> @@ -255,6 +255,7 @@ struct bonding {
>> struct delayed_work ad_work;
>> struct delayed_work mcast_work;
>> struct delayed_work slave_arr_work;
>> + struct delayed_work peer_notify_work;
>> #ifdef CONFIG_DEBUG_FS
>> /* debugging support via debugfs */
>> struct dentry *debug_dir;
>> @@ -710,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
>> int level);
>> int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>> void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>> +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
>> void bond_work_init_all(struct bonding *bond);
>>
>> #ifdef CONFIG_PROC_FS
>> --
>> 2.34.1
>>
>>
>
> ---
> -Jay Vosburgh, jv@jvosburgh.net
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-10-26 10:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-21 5:22 [PATCH net-next] net: bonding: use workqueue to make sure peer notify updated Tonghao Zhang
2025-10-21 11:21 ` Simon Horman
2025-10-22 2:02 ` Jay Vosburgh
2025-10-26 9:59 ` Tonghao Zhang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).