netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net 1/2] bonding: set random address only when slaves already exist
@ 2025-08-20  9:10 Hangbin Liu
  2025-08-20  9:10 ` [PATCH net 2/2] selftests: bonding: add fail_over_mac testing Hangbin Liu
  2025-08-23  0:21 ` [PATCH net 1/2] bonding: set random address only when slaves already exist Jay Vosburgh
  0 siblings, 2 replies; 4+ messages in thread
From: Hangbin Liu @ 2025-08-20  9:10 UTC (permalink / raw)
  To: netdev
  Cc: Jay Vosburgh, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Nikolay Aleksandrov, Simon Horman,
	Shuah Khan, linux-kselftest, Hangbin Liu, Qiuling Ren

Commit 5c3bf6cba791 ("bonding: assign random address if device address is
same as bond") fixed an issue where, after releasing the first slave and
re-adding it to the bond with fail_over_mac=follow, both the active and
backup slaves could end up with duplicate MAC addresses. To avoid this,
the new slave was assigned a random address.

However, if this happens when adding the very first slave, the bond’s
hardware address is set to match the slave’s. Later, during the
fail_over_mac=follow check, the slave’s MAC is randomized because it
naturally matches the bond, which is incorrect.

The issue is normally hidden since the first slave usually becomes the
active one, which restores the bond's MAC address. However, if another
slave is selected as the initial active interface, the issue becomes visible.

Fix this by assigning a random address only when slaves already exist in
the bond.

Fixes: 5c3bf6cba791 ("bonding: assign random address if device address is same as bond")
Reported-by: Qiuling Ren <qren@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 257333c88710..8832bc9f107b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2132,6 +2132,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
 	} else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
 		   BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+		   bond_has_slaves(bond) &&
 		   memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
 		/* Set slave to random address to avoid duplicate mac
 		 * address in later fail over.
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net 2/2] selftests: bonding: add fail_over_mac testing
  2025-08-20  9:10 [PATCH net 1/2] bonding: set random address only when slaves already exist Hangbin Liu
@ 2025-08-20  9:10 ` Hangbin Liu
  2025-08-23  0:21 ` [PATCH net 1/2] bonding: set random address only when slaves already exist Jay Vosburgh
  1 sibling, 0 replies; 4+ messages in thread
From: Hangbin Liu @ 2025-08-20  9:10 UTC (permalink / raw)
  To: netdev
  Cc: Jay Vosburgh, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Nikolay Aleksandrov, Simon Horman,
	Shuah Khan, linux-kselftest, Hangbin Liu

Add a test to check each value of bond fail_over_mac option.

Also fix a minor garp_test print issue.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 .../drivers/net/bonding/bond_options.sh       | 139 +++++++++++++++++-
 .../drivers/net/bonding/bond_topo_2d1c.sh     |   3 +
 .../drivers/net/bonding/bond_topo_3d1c.sh     |   2 +
 3 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 7bc148889ca7..e3f3cc803b56 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
 	prio
 	arp_validate
 	num_grat_arp
+	fail_over_mac
 "
 
 lib_dir=$(dirname "$0")
@@ -352,8 +353,8 @@ garp_test()
 
 	exp_num=$(echo "${param}" | cut -f6 -d ' ')
 	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
-	slowwait_for_counter $((exp_num + 5)) $exp_num \
-		tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
+	slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+		"dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
 
 	# check result
 	real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -376,6 +377,140 @@ num_grat_arp()
 	done
 }
 
+check_all_mac_same()
+{
+	RET=0
+	# all slaves should have same mac address (with the first port's mac)
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+	local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+		[ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+		RET=1
+	fi
+}
+
+check_bond_mac_same_with_first()
+{
+	RET=0
+	# bond mac address should be same with the first added slave
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "${mac[0]}" ]; then
+		RET=1
+	fi
+}
+
+check_bond_mac_same_with_active()
+{
+	RET=0
+	# bond mac address should be same with active slave
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "$active_slave_mac" ]; then
+		RET=1
+	fi
+}
+
+check_backup_slave_mac_not_change()
+{
+	RET=0
+	# backup slave's mac address is not changed
+	if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+		| select(.linkinfo.info_slave_data.state=="BACKUP")
+		| select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+		RET=1
+	fi
+}
+
+check_backup_slave_mac_inherit()
+{
+	local backup_mac
+	RET=0
+
+	# backup slaves should use mac[1] or mac[2]
+	local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+		jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+	for backup_mac in $backup_macs; do
+		if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+			RET=1
+		fi
+	done
+}
+
+check_first_slave_random_mac()
+{
+	RET=0
+	# remove the first added slave and added it back
+	ip -n "$s_ns" link set eth0 nomaster
+	ip -n "$s_ns" link set eth0 master bond0
+
+	# the first slave should use random mac address
+	eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	[ "$eth0_mac" = "${mac[0]}" ] && RET=1
+	log_test "bond fail_over_mac follow" "random first slave mac"
+
+	# remove the first slave, the permanent MAC address should be restored back
+	ip -n "$s_ns" link set eth0 nomaster
+	eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	[ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+	local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	ip -n ${s_ns} link set ${active_slave} down
+	slowwait 2 active_slave_changed $active_slave
+	ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+	# Bring down the first interface on the switch to force the bond to
+	# select another active interface instead of the first one that joined.
+	ip -n "$g_ns" link set s0 down
+
+	# fail_over_mac none
+	bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+	check_all_mac_same
+	log_test "fail_over_mac 0" "all slaves have same mac"
+	do_active_backup_failover
+	check_all_mac_same
+	log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+	# fail_over_mac active
+	bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+	check_backup_slave_mac_not_change
+	log_test "fail_over_mac 1" "backup slave mac is not changed"
+	do_active_backup_failover
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+	check_backup_slave_mac_not_change
+	log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+	# fail_over_mac follow
+	bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+	check_bond_mac_same_with_first
+	log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+	check_backup_slave_mac_inherit
+	log_test "fail_over_mac 2" "backup slave mac inherit"
+	do_active_backup_failover
+	check_bond_mac_same_with_first
+	log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+	check_backup_slave_mac_inherit
+	log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+	check_first_slave_random_mac
+	log_test "fail_over_mac 2" "first slave mac random"
+
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index 195ef83cfbf1..167aa4a4a12a 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -39,6 +39,8 @@ g_ip4="192.0.2.254"
 s_ip6="2001:db8::1"
 c_ip6="2001:db8::10"
 g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
 
 gateway_create()
 {
@@ -62,6 +64,7 @@ server_create()
 
 	for i in $(seq 0 1); do
 		ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+		ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
 
 		ip -n ${g_ns} link set s${i} up
 		ip -n ${g_ns} link set s${i} master br0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 3a1333d9a85b..23a2932301cc 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -26,6 +26,7 @@
 #  +-------------------------------------+
 
 source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
 
 setup_prepare()
 {
@@ -36,6 +37,7 @@ setup_prepare()
 	# Add the extra device as we use 3 down links for bond0
 	local i=2
 	ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+	ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
 	ip -n ${g_ns} link set s${i} up
 	ip -n ${g_ns} link set s${i} master br0
 	ip -n ${s_ns} link set eth${i} master bond0
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net 1/2] bonding: set random address only when slaves already exist
  2025-08-20  9:10 [PATCH net 1/2] bonding: set random address only when slaves already exist Hangbin Liu
  2025-08-20  9:10 ` [PATCH net 2/2] selftests: bonding: add fail_over_mac testing Hangbin Liu
@ 2025-08-23  0:21 ` Jay Vosburgh
  2025-08-25  4:07   ` Hangbin Liu
  1 sibling, 1 reply; 4+ messages in thread
From: Jay Vosburgh @ 2025-08-23  0:21 UTC (permalink / raw)
  To: Hangbin Liu
  Cc: netdev, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Nikolay Aleksandrov, Simon Horman,
	Shuah Khan, linux-kselftest, Qiuling Ren

Hangbin Liu <liuhangbin@gmail.com> wrote:

>Commit 5c3bf6cba791 ("bonding: assign random address if device address is
>same as bond") fixed an issue where, after releasing the first slave and
>re-adding it to the bond with fail_over_mac=follow, both the active and
>backup slaves could end up with duplicate MAC addresses. To avoid this,
>the new slave was assigned a random address.
>
>However, if this happens when adding the very first slave, the bond’s
>hardware address is set to match the slave’s. Later, during the
>fail_over_mac=follow check, the slave’s MAC is randomized because it
>naturally matches the bond, which is incorrect.

	The description here seems confusing to me; what does "this"
refer to?  I don't understand the sequence of events that lead to the
issue being fixed here.

	I wonder if there's another bug somewhere, since nominally when
releasing the last interface in the bond, __bond_release_one() should
randomize the bond's MAC address, so it shouldn't match when adding (or
re-adding ?) the first interface to the bond.

	-J

>The issue is normally hidden since the first slave usually becomes the
>active one, which restores the bond's MAC address. However, if another
>slave is selected as the initial active interface, the issue becomes visible.
>
>Fix this by assigning a random address only when slaves already exist in
>the bond.
>
>Fixes: 5c3bf6cba791 ("bonding: assign random address if device address is same as bond")
>Reported-by: Qiuling Ren <qren@redhat.com>
>Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
>---
> drivers/net/bonding/bond_main.c | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 257333c88710..8832bc9f107b 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2132,6 +2132,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> 		memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
> 	} else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
> 		   BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
>+		   bond_has_slaves(bond) &&
> 		   memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
> 		/* Set slave to random address to avoid duplicate mac
> 		 * address in later fail over.
>-- 
>2.50.1
>

---
	-Jay Vosburgh, jv@jvosburgh.net


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net 1/2] bonding: set random address only when slaves already exist
  2025-08-23  0:21 ` [PATCH net 1/2] bonding: set random address only when slaves already exist Jay Vosburgh
@ 2025-08-25  4:07   ` Hangbin Liu
  0 siblings, 0 replies; 4+ messages in thread
From: Hangbin Liu @ 2025-08-25  4:07 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: netdev, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Nikolay Aleksandrov, Simon Horman,
	Shuah Khan, linux-kselftest, Qiuling Ren

On Fri, Aug 22, 2025 at 05:21:30PM -0700, Jay Vosburgh wrote:
> Hangbin Liu <liuhangbin@gmail.com> wrote:
> 
> >Commit 5c3bf6cba791 ("bonding: assign random address if device address is
> >same as bond") fixed an issue where, after releasing the first slave and
> >re-adding it to the bond with fail_over_mac=follow, both the active and
> >backup slaves could end up with duplicate MAC addresses. To avoid this,
> >the new slave was assigned a random address.
> >
> >However, if this happens when adding the very first slave, the bond’s
> >hardware address is set to match the slave’s. Later, during the
> >fail_over_mac=follow check, the slave’s MAC is randomized because it
> >naturally matches the bond, which is incorrect.
> 
> 	The description here seems confusing to me; what does "this"
> refer to?  I don't understand the sequence of events that lead to the
> issue being fixed here.
> 
> 	I wonder if there's another bug somewhere, since nominally when
> releasing the last interface in the bond, __bond_release_one() should
> randomize the bond's MAC address, so it shouldn't match when adding (or
> re-adding ?) the first interface to the bond.
> 

Sorry I didn't make it clear. A easy reproducer would describe the issue. e.g.
(omit the lo interface)

[root@virtme-ng net]# ip link add type veth
[root@virtme-ng net]# ip link add bond0 type bond mode 1 miimon 100 fail_over_mac 2
[root@virtme-ng net]# ip link show
3: veth0@veth1: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 02:0a:04:c2:d6:21 brd ff:ff:ff:ff:ff:ff
4: veth1@veth0: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 82:a8:52:f4:81:4e brd ff:ff:ff:ff:ff:ff
5: bond0: <BROADCAST,MULTICAST,MASTER> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 92:5d:9c:47:e7:53 brd ff:ff:ff:ff:ff:ff
[root@virtme-ng net]# ip link set veth0 master bond0
[root@virtme-ng net]# ip link show
3: veth0@veth1: <NO-CARRIER,BROADCAST,MULTICAST,SLAVE,UP,M-DOWN> mtu 1500 qdisc noqueue master bond0 state LOWERLAYERDOWN mode DEFAULT group default qlen 1000
    link/ether 4e:b5:4a:b4:03:18 brd ff:ff:ff:ff:ff:ff
4: veth1@veth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 82:a8:52:f4:81:4e brd ff:ff:ff:ff:ff:ff
5: bond0: <BROADCAST,MULTICAST,MASTER> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 02:0a:04:c2:d6:21 brd ff:ff:ff:ff:ff:ff

Here we can see the veth0's mac address is randomized. The reason is in
function bond_enslave(), we set the bond mac address to the same as slave's
if it's the first one.

        /* If this is the first slave, then we need to set the master's hardware
         * address to be the same as the slave's.
         */
        if (!bond_has_slaves(bond) &&
            bond->dev->addr_assign_type == NET_ADDR_RANDOM) {
                res = bond_set_dev_addr(bond->dev, slave_dev);
                if (res)
                        goto err_undo_flags;
        }

And later

       } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
                   BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
                   memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
                /* Set slave to random address to avoid duplicate mac
                 * address in later fail over.
                 */
                eth_random_addr(ss.__data);
        } else {

Here we check the bond and slave's mac address, which would be the same
definitely, which cause the first slave's mac got changed.

Thanks
Hangbin

> 
> >The issue is normally hidden since the first slave usually becomes the
> >active one, which restores the bond's MAC address. However, if another
> >slave is selected as the initial active interface, the issue becomes visible.
> >
> >Fix this by assigning a random address only when slaves already exist in
> >the bond.
> >
> >Fixes: 5c3bf6cba791 ("bonding: assign random address if device address is same as bond")
> >Reported-by: Qiuling Ren <qren@redhat.com>
> >Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> >---
> > drivers/net/bonding/bond_main.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> >diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> >index 257333c88710..8832bc9f107b 100644
> >--- a/drivers/net/bonding/bond_main.c
> >+++ b/drivers/net/bonding/bond_main.c
> >@@ -2132,6 +2132,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> > 		memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
> > 	} else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
> > 		   BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
> >+		   bond_has_slaves(bond) &&
> > 		   memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
> > 		/* Set slave to random address to avoid duplicate mac
> > 		 * address in later fail over.
> >-- 
> >2.50.1
> >
> 
> ---
> 	-Jay Vosburgh, jv@jvosburgh.net
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-08-25  4:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-20  9:10 [PATCH net 1/2] bonding: set random address only when slaves already exist Hangbin Liu
2025-08-20  9:10 ` [PATCH net 2/2] selftests: bonding: add fail_over_mac testing Hangbin Liu
2025-08-23  0:21 ` [PATCH net 1/2] bonding: set random address only when slaves already exist Jay Vosburgh
2025-08-25  4:07   ` Hangbin Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).