All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jay Vosburgh <jv@jvosburgh.net>
To: Tonghao Zhang <tonghao@bamaicloud.com>
Cc: netdev@vger.kernel.org, "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Simon Horman <horms@kernel.org>, Jonathan Corbet <corbet@lwn.net>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	Nikolay Aleksandrov <razor@blackwall.org>,
	Hangbin Liu <liuhangbin@gmail.com>
Subject: Re: [PATCH v2] net: bonding: use atomic instead of rtnl_mutex, to make sure peer notify updated
Date: Mon, 03 Nov 2025 22:48:26 +0100	[thread overview]
Message-ID: <253222.1762206506@vermin> (raw)
In-Reply-To: <20251028034547.78830-1-tonghao@bamaicloud.com>

Tonghao Zhang <tonghao@bamaicloud.com> wrote:

>Using atomic to protect the send_peer_notif instead of rtnl_mutex.
>This approach allows safe updates in both interrupt and process
>contexts, while avoiding code complexity.
>
>In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif()
>from acquiring the lock and updating send_peer_notif. This patch addresses
>the issue by using a atomic. Since updating send_peer_notif does not
>require high real-time performance, such atomic updates are acceptable.
>
>After coverting the rtnl lock for send_peer_notif to atomic, in bond_mii_monitor(),
>we should check the should_notify_peers (rtnllock required) instead of
>send_peer_notif. By the way, to avoid peer notify event loss, we check
>again whether to send peer notify, such as active-backup mode failover.
>
>Cc: Jay Vosburgh <jv@jvosburgh.net>
>Cc: "David S. Miller" <davem@davemloft.net>
>Cc: Eric Dumazet <edumazet@google.com>
>Cc: Jakub Kicinski <kuba@kernel.org>
>Cc: Paolo Abeni <pabeni@redhat.com>
>Cc: Simon Horman <horms@kernel.org>
>Cc: Jonathan Corbet <corbet@lwn.net>
>Cc: Andrew Lunn <andrew+netdev@lunn.ch>
>Cc: Nikolay Aleksandrov <razor@blackwall.org>
>Cc: Hangbin Liu <liuhangbin@gmail.com>
>Suggested-by: Jay Vosburgh <jv@jvosburgh.net>
>Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
>---
>v2:
>- refine the codes
>- check bond_should_notify_peers again in bond_mii_monitor(), to avoid
>  event loss. 
>- v1 https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/
>---
> drivers/net/bonding/bond_3ad.c  |  7 ++---
> drivers/net/bonding/bond_main.c | 46 ++++++++++++++++-----------------
> include/net/bonding.h           |  9 ++++++-
> 3 files changed, 32 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 49717b7b82a2..05c573e45450 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port)
> {
> 	struct bonding *bond = port->slave->bond;
> 
>-	if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>-		bond->send_peer_notif = bond->params.num_peer_notif *
>-			max(1, bond->params.peer_notif_delay);
>-		rtnl_unlock();
>-	}
>+	if (bond->params.broadcast_neighbor)
>+		bond_peer_notify_reset(bond);
> }
> 
> /**
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 8e592f37c28b..ae90221838d4 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond)
> {
> 	struct bond_up_slave *usable;
> 	struct slave *slave = NULL;
>+	int send_peer_notif;
> 
>-	if (!bond->send_peer_notif ||
>-	    bond->send_peer_notif %
>-	    max(1, bond->params.peer_notif_delay) != 0 ||
>+	send_peer_notif = atomic_read(&bond->send_peer_notif);
>+	if (!send_peer_notif ||
>+	    send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 ||
> 	    !netif_carrier_ok(bond->dev))
> 		return false;
> 
>@@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 						      BOND_SLAVE_NOTIFY_NOW);
> 
> 		if (new_active) {
>-			bool should_notify_peers = false;
>-
> 			bond_set_slave_active_flags(new_active,
> 						    BOND_SLAVE_NOTIFY_NOW);
> 
>@@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 						      old_active);
> 
> 			if (netif_running(bond->dev)) {
>-				bond->send_peer_notif =
>-					bond->params.num_peer_notif *
>-					max(1, bond->params.peer_notif_delay);
>-				should_notify_peers =
>-					bond_should_notify_peers(bond);
>+				bond_peer_notify_reset(bond);
>+
>+				if (bond_should_notify_peers(bond)) {
>+					atomic_dec(&bond->send_peer_notif);
>+					call_netdevice_notifiers(
>+							NETDEV_NOTIFY_PEERS,
>+							bond->dev);
>+				}
> 			}
> 
> 			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
>-			if (should_notify_peers) {
>-				bond->send_peer_notif--;
>-				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>-							 bond->dev);
>-			}
> 		}
> 	}
> 
>@@ -2801,7 +2798,7 @@ static void bond_mii_monitor(struct work_struct *work)
> 
> 	rcu_read_unlock();
> 
>-	if (commit || bond->send_peer_notif) {
>+	if (commit || should_notify_peers) {
> 		/* Race avoidance with bond_close cancel of workqueue */
> 		if (!rtnl_trylock()) {
> 			delay = 1;
>@@ -2816,16 +2813,15 @@ static void bond_mii_monitor(struct work_struct *work)
> 			bond_miimon_commit(bond);
> 		}
> 
>-		if (bond->send_peer_notif) {
>-			bond->send_peer_notif--;
>-			if (should_notify_peers)
>-				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>-							 bond->dev);
>-		}
>+		/* check again to avoid send_peer_notif has been changed. */
>+		if (bond_should_notify_peers(bond))
>+			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);

	Is the risk here that user space may have set send_peer_notify
to zero?

> 
> 		rtnl_unlock();	/* might sleep, hold no other locks */
> 	}
> 
>+	atomic_dec_if_positive(&bond->send_peer_notif);
>+

	Also, it's a bit subtle, but I think this has to be outside of
the if block, or peer_notif_delay may be unreliable.  I'm not sure it
needs a comment, but could you confirm that's why this line is where it
is?

	-J

> re_arm:
> 	if (bond->params.miimon)
> 		queue_delayed_work(bond->wq, &bond->mii_work, delay);
>@@ -3773,7 +3769,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
> 			return;
> 
> 		if (should_notify_peers) {
>-			bond->send_peer_notif--;
>+			atomic_dec(&bond->send_peer_notif);
> 			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
> 						 bond->dev);
> 		}
>@@ -4267,6 +4263,8 @@ static int bond_open(struct net_device *bond_dev)
> 			queue_delayed_work(bond->wq, &bond->alb_work, 0);
> 	}
> 
>+	atomic_set(&bond->send_peer_notif, 0);
>+
> 	if (bond->params.miimon)  /* link check interval, in milliseconds. */
> 		queue_delayed_work(bond->wq, &bond->mii_work, 0);
> 
>@@ -4300,7 +4298,7 @@ static int bond_close(struct net_device *bond_dev)
> 	struct slave *slave;
> 
> 	bond_work_cancel_all(bond);
>-	bond->send_peer_notif = 0;
>+	atomic_set(&bond->send_peer_notif, 0);
> 	if (bond_is_lb(bond))
> 		bond_alb_deinitialize(bond);
> 	bond->recv_probe = NULL;
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index 49edc7da0586..afdfcb5bfaf0 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -236,7 +236,7 @@ struct bonding {
> 	 */
> 	spinlock_t mode_lock;
> 	spinlock_t stats_lock;
>-	u32	 send_peer_notif;
>+	atomic_t send_peer_notif;
> 	u8       igmp_retrans;
> #ifdef CONFIG_PROC_FS
> 	struct   proc_dir_entry *proc_entry;
>@@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s
> 	return NET_XMIT_DROP;
> }
> 
>+static inline void bond_peer_notify_reset(struct bonding *bond)
>+{
>+	atomic_set(&bond->send_peer_notif,
>+		bond->params.num_peer_notif *
>+		max(1, bond->params.peer_notif_delay));
>+}
>+
> #endif /* _NET_BONDING_H */
>-- 
>2.34.1
>

---
	-Jay Vosburgh, jv@jvosburgh.net

  parent reply	other threads:[~2025-11-03 21:48 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-28  3:45 [PATCH v2] net: bonding: use atomic instead of rtnl_mutex, to make sure peer notify updated Tonghao Zhang
2025-11-03 11:02 ` Tonghao Zhang
2025-11-03 21:48 ` Jay Vosburgh [this message]
2025-11-04 14:48   ` Tonghao Zhang
2025-11-10  8:51     ` Tonghao Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=253222.1762206506@vermin \
    --to=jv@jvosburgh.net \
    --cc=andrew+netdev@lunn.ch \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=liuhangbin@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=razor@blackwall.org \
    --cc=tonghao@bamaicloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.