* [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller
@ 2025-08-26 4:11 Hangbin Liu
2025-08-26 5:01 ` Kuniyuki Iwashima
2025-08-26 8:51 ` [syzbot ci] " syzbot ci
0 siblings, 2 replies; 4+ messages in thread
From: Hangbin Liu @ 2025-08-26 4:11 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, MD Danish Anwar, Alexander Lobakin,
Jaakko Karrenpalo, Fernando Fernandez Mancera, Murali Karicheri,
WingMan Kwok, Stanislav Fomichev, Xiao Liang, Kuniyuki Iwashima,
Johannes Berg, Yu Liao, Arvid Brodin, Hangbin Liu
hsr_for_each_port is called in many places without holding the RCU read
lock, this may trigger warnings on debug kernels like:
[ 40.457015] [ T201] WARNING: suspicious RCU usage
[ 40.457020] [ T201] 6.17.0-rc2-virtme #1 Not tainted
[ 40.457025] [ T201] -----------------------------
[ 40.457029] [ T201] net/hsr/hsr_main.c:137 RCU-list traversed in non-reader section!!
[ 40.457036] [ T201]
other info that might help us debug this:
[ 40.457040] [ T201]
rcu_scheduler_active = 2, debug_locks = 1
[ 40.457045] [ T201] 2 locks held by ip/201:
[ 40.457050] [ T201] #0: ffffffff93040a40 (&ops->srcu){.+.+}-{0:0}, at: rtnl_link_ops_get+0xf2/0x280
[ 40.457080] [ T201] #1: ffffffff92e7f968 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x5e1/0xb20
[ 40.457102] [ T201]
stack backtrace:
[ 40.457108] [ T201] CPU: 2 UID: 0 PID: 201 Comm: ip Not tainted 6.17.0-rc2-virtme #1 PREEMPT(full)
[ 40.457114] [ T201] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 40.457117] [ T201] Call Trace:
[ 40.457120] [ T201] <TASK>
[ 40.457126] [ T201] dump_stack_lvl+0x6f/0xb0
[ 40.457136] [ T201] lockdep_rcu_suspicious.cold+0x4f/0xb1
[ 40.457148] [ T201] hsr_port_get_hsr+0xfe/0x140
[ 40.457158] [ T201] hsr_add_port+0x192/0x940
[ 40.457167] [ T201] ? __pfx_hsr_add_port+0x10/0x10
[ 40.457176] [ T201] ? lockdep_init_map_type+0x5c/0x270
[ 40.457189] [ T201] hsr_dev_finalize+0x4bc/0xbf0
[ 40.457204] [ T201] hsr_newlink+0x3c3/0x8f0
[ 40.457212] [ T201] ? __pfx_hsr_newlink+0x10/0x10
[ 40.457222] [ T201] ? rtnl_create_link+0x173/0xe40
[ 40.457233] [ T201] rtnl_newlink_create+0x2cf/0x750
[ 40.457243] [ T201] ? __pfx_rtnl_newlink_create+0x10/0x10
[ 40.457247] [ T201] ? __dev_get_by_name+0x12/0x50
[ 40.457252] [ T201] ? rtnl_dev_get+0xac/0x140
[ 40.457259] [ T201] ? __pfx_rtnl_dev_get+0x10/0x10
[ 40.457285] [ T201] __rtnl_newlink+0x22c/0xa50
[ 40.457305] [ T201] rtnl_newlink+0x637/0xb20
Fix it by wrapping the call with rcu_read_lock()/rcu_read_unlock().
Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
net/hsr/hsr_device.c | 37 ++++++++++++++++++++++++++++++++-----
net/hsr/hsr_main.c | 12 ++++++++++--
net/hsr/hsr_netlink.c | 4 ----
3 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 88657255fec1..67955b21b4a4 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -49,12 +49,15 @@ static bool hsr_check_carrier(struct hsr_port *master)
ASSERT_RTNL();
+ rcu_read_lock();
hsr_for_each_port(master->hsr, port) {
if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) {
+ rcu_read_unlock();
netif_carrier_on(master->dev);
return true;
}
}
+ rcu_read_unlock();
netif_carrier_off(master->dev);
@@ -105,9 +108,12 @@ int hsr_get_max_mtu(struct hsr_priv *hsr)
struct hsr_port *port;
mtu_max = ETH_DATA_LEN;
+
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
if (port->type != HSR_PT_MASTER)
mtu_max = min(port->dev->mtu, mtu_max);
+ rcu_read_unlock();
if (mtu_max < HSR_HLEN)
return 0;
@@ -139,6 +145,7 @@ static int hsr_dev_open(struct net_device *dev)
hsr = netdev_priv(dev);
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
@@ -159,6 +166,7 @@ static int hsr_dev_open(struct net_device *dev)
netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
designation, port->dev->name);
}
+ rcu_read_unlock();
if (!designation)
netdev_warn(dev, "No slave devices configured\n");
@@ -172,6 +180,8 @@ static int hsr_dev_close(struct net_device *dev)
struct hsr_priv *hsr;
hsr = netdev_priv(dev);
+
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
@@ -185,6 +195,7 @@ static int hsr_dev_close(struct net_device *dev)
break;
}
}
+ rcu_read_unlock();
return 0;
}
@@ -205,10 +216,13 @@ static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
* may become enabled.
*/
features &= ~NETIF_F_ONE_FOR_ALL;
+
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
features = netdev_increment_features(features,
port->dev->features,
mask);
+ rcu_read_unlock();
return features;
}
@@ -410,14 +424,11 @@ static void hsr_announce(struct timer_list *t)
hsr = timer_container_of(hsr, t, announce_timer);
- rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr);
if (is_admin_up(master->dev))
mod_timer(&hsr->announce_timer, jiffies + interval);
-
- rcu_read_unlock();
}
/* Announce (supervision frame) timer function for RedBox
@@ -430,7 +441,6 @@ static void hsr_proxy_announce(struct timer_list *t)
unsigned long interval = 0;
struct hsr_node *node;
- rcu_read_lock();
/* RedBOX sends supervisory frames to HSR network with MAC addresses
* of SAN nodes stored in ProxyNodeTable.
*/
@@ -438,6 +448,7 @@ static void hsr_proxy_announce(struct timer_list *t)
if (!interlink)
goto done;
+ rcu_read_lock();
list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
if (hsr_addr_is_redbox(hsr, node->macaddress_A))
continue;
@@ -484,6 +495,7 @@ static void hsr_set_rx_mode(struct net_device *dev)
hsr = netdev_priv(dev);
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
@@ -497,6 +509,7 @@ static void hsr_set_rx_mode(struct net_device *dev)
break;
}
}
+ rcu_read_unlock();
}
static void hsr_change_rx_flags(struct net_device *dev, int change)
@@ -506,6 +519,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
hsr = netdev_priv(dev);
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
@@ -521,6 +535,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
break;
}
}
+ rcu_read_unlock();
}
static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
@@ -534,6 +549,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
hsr = netdev_priv(dev);
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER ||
port->type == HSR_PT_INTERLINK)
@@ -547,6 +563,8 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
netdev_err(dev, "add vid failed for Slave-A\n");
if (is_slave_b_added)
vlan_vid_del(port->dev, proto, vid);
+
+ rcu_read_unlock();
return ret;
}
@@ -559,6 +577,8 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
netdev_err(dev, "add vid failed for Slave-B\n");
if (is_slave_a_added)
vlan_vid_del(port->dev, proto, vid);
+
+ rcu_read_unlock();
return ret;
}
@@ -568,6 +588,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
break;
}
}
+ rcu_read_unlock();
return 0;
}
@@ -580,6 +601,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
hsr = netdev_priv(dev);
+ rcu_read_lock();
hsr_for_each_port(hsr, port) {
switch (port->type) {
case HSR_PT_SLAVE_A:
@@ -590,6 +612,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
break;
}
}
+ rcu_read_unlock();
return 0;
}
@@ -672,9 +695,13 @@ struct net_device *hsr_get_port_ndev(struct net_device *ndev,
struct hsr_priv *hsr = netdev_priv(ndev);
struct hsr_port *port;
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
- if (port->type == pt)
+ if (port->type == pt) {
+ rcu_read_unlock();
return port->dev;
+ }
+ rcu_read_unlock();
return NULL;
}
EXPORT_SYMBOL(hsr_get_port_ndev);
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 192893c3f2ec..eec6e20a8494 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -22,9 +22,13 @@ static bool hsr_slave_empty(struct hsr_priv *hsr)
{
struct hsr_port *port;
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
- if (port->type != HSR_PT_MASTER)
+ if (port->type != HSR_PT_MASTER) {
+ rcu_read_unlock();
return false;
+ }
+ rcu_read_unlock();
return true;
}
@@ -134,9 +138,13 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
{
struct hsr_port *port;
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
- if (port->type == pt)
+ if (port->type == pt) {
+ rcu_read_unlock();
return port;
+ }
+ rcu_read_unlock();
return NULL;
}
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index b120470246cc..f57c289e2322 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -241,10 +241,8 @@ void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
kfree_skb(skb);
fail:
- rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
netdev_warn(master->dev, "Could not send HSR ring error message\n");
- rcu_read_unlock();
}
/* This is called when we haven't heard from the node with MAC address addr for
@@ -278,10 +276,8 @@ void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN])
kfree_skb(skb);
fail:
- rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
netdev_warn(master->dev, "Could not send HSR node down\n");
- rcu_read_unlock();
}
/* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table
--
2.50.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller
2025-08-26 4:11 [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller Hangbin Liu
@ 2025-08-26 5:01 ` Kuniyuki Iwashima
2025-08-26 6:56 ` Hangbin Liu
2025-08-26 8:51 ` [syzbot ci] " syzbot ci
1 sibling, 1 reply; 4+ messages in thread
From: Kuniyuki Iwashima @ 2025-08-26 5:01 UTC (permalink / raw)
To: Hangbin Liu
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, MD Danish Anwar, Alexander Lobakin,
Jaakko Karrenpalo, Fernando Fernandez Mancera, Murali Karicheri,
WingMan Kwok, Stanislav Fomichev, Xiao Liang, Johannes Berg,
Yu Liao, Arvid Brodin
On Mon, Aug 25, 2025 at 9:12 PM Hangbin Liu <liuhangbin@gmail.com> wrote:
>
> hsr_for_each_port is called in many places without holding the RCU read
> lock, this may trigger warnings on debug kernels like:
>
> [ 40.457015] [ T201] WARNING: suspicious RCU usage
> [ 40.457020] [ T201] 6.17.0-rc2-virtme #1 Not tainted
> [ 40.457025] [ T201] -----------------------------
> [ 40.457029] [ T201] net/hsr/hsr_main.c:137 RCU-list traversed in non-reader section!!
> [ 40.457036] [ T201]
> other info that might help us debug this:
>
> [ 40.457040] [ T201]
> rcu_scheduler_active = 2, debug_locks = 1
> [ 40.457045] [ T201] 2 locks held by ip/201:
> [ 40.457050] [ T201] #0: ffffffff93040a40 (&ops->srcu){.+.+}-{0:0}, at: rtnl_link_ops_get+0xf2/0x280
> [ 40.457080] [ T201] #1: ffffffff92e7f968 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x5e1/0xb20
> [ 40.457102] [ T201]
> stack backtrace:
> [ 40.457108] [ T201] CPU: 2 UID: 0 PID: 201 Comm: ip Not tainted 6.17.0-rc2-virtme #1 PREEMPT(full)
> [ 40.457114] [ T201] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> [ 40.457117] [ T201] Call Trace:
> [ 40.457120] [ T201] <TASK>
> [ 40.457126] [ T201] dump_stack_lvl+0x6f/0xb0
> [ 40.457136] [ T201] lockdep_rcu_suspicious.cold+0x4f/0xb1
> [ 40.457148] [ T201] hsr_port_get_hsr+0xfe/0x140
> [ 40.457158] [ T201] hsr_add_port+0x192/0x940
> [ 40.457167] [ T201] ? __pfx_hsr_add_port+0x10/0x10
> [ 40.457176] [ T201] ? lockdep_init_map_type+0x5c/0x270
> [ 40.457189] [ T201] hsr_dev_finalize+0x4bc/0xbf0
> [ 40.457204] [ T201] hsr_newlink+0x3c3/0x8f0
> [ 40.457212] [ T201] ? __pfx_hsr_newlink+0x10/0x10
> [ 40.457222] [ T201] ? rtnl_create_link+0x173/0xe40
> [ 40.457233] [ T201] rtnl_newlink_create+0x2cf/0x750
> [ 40.457243] [ T201] ? __pfx_rtnl_newlink_create+0x10/0x10
> [ 40.457247] [ T201] ? __dev_get_by_name+0x12/0x50
> [ 40.457252] [ T201] ? rtnl_dev_get+0xac/0x140
> [ 40.457259] [ T201] ? __pfx_rtnl_dev_get+0x10/0x10
> [ 40.457285] [ T201] __rtnl_newlink+0x22c/0xa50
> [ 40.457305] [ T201] rtnl_newlink+0x637/0xb20
>
> Fix it by wrapping the call with rcu_read_lock()/rcu_read_unlock().
>
> Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
> net/hsr/hsr_device.c | 37 ++++++++++++++++++++++++++++++++-----
> net/hsr/hsr_main.c | 12 ++++++++++--
> net/hsr/hsr_netlink.c | 4 ----
> 3 files changed, 42 insertions(+), 11 deletions(-)
>
> diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
> index 88657255fec1..67955b21b4a4 100644
> --- a/net/hsr/hsr_device.c
> +++ b/net/hsr/hsr_device.c
> @@ -49,12 +49,15 @@ static bool hsr_check_carrier(struct hsr_port *master)
>
> ASSERT_RTNL();
>
> + rcu_read_lock();
> hsr_for_each_port(master->hsr, port) {
Why not use the 4th arg of list_for_each_entry_rcu() ?
Adding random rcu_read_lock() looks confusing.
> if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) {
> + rcu_read_unlock();
> netif_carrier_on(master->dev);
> return true;
> }
> }
> + rcu_read_unlock();
>
> netif_carrier_off(master->dev);
>
> @@ -105,9 +108,12 @@ int hsr_get_max_mtu(struct hsr_priv *hsr)
> struct hsr_port *port;
>
> mtu_max = ETH_DATA_LEN;
> +
> + rcu_read_lock();
> hsr_for_each_port(hsr, port)
> if (port->type != HSR_PT_MASTER)
> mtu_max = min(port->dev->mtu, mtu_max);
> + rcu_read_unlock();
>
> if (mtu_max < HSR_HLEN)
> return 0;
> @@ -139,6 +145,7 @@ static int hsr_dev_open(struct net_device *dev)
>
> hsr = netdev_priv(dev);
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> if (port->type == HSR_PT_MASTER)
> continue;
> @@ -159,6 +166,7 @@ static int hsr_dev_open(struct net_device *dev)
> netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
> designation, port->dev->name);
> }
> + rcu_read_unlock();
>
> if (!designation)
> netdev_warn(dev, "No slave devices configured\n");
> @@ -172,6 +180,8 @@ static int hsr_dev_close(struct net_device *dev)
> struct hsr_priv *hsr;
>
> hsr = netdev_priv(dev);
> +
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> if (port->type == HSR_PT_MASTER)
> continue;
> @@ -185,6 +195,7 @@ static int hsr_dev_close(struct net_device *dev)
> break;
> }
> }
> + rcu_read_unlock();
>
> return 0;
> }
> @@ -205,10 +216,13 @@ static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
> * may become enabled.
> */
> features &= ~NETIF_F_ONE_FOR_ALL;
> +
> + rcu_read_lock();
> hsr_for_each_port(hsr, port)
> features = netdev_increment_features(features,
> port->dev->features,
> mask);
> + rcu_read_unlock();
>
> return features;
> }
> @@ -410,14 +424,11 @@ static void hsr_announce(struct timer_list *t)
>
> hsr = timer_container_of(hsr, t, announce_timer);
>
> - rcu_read_lock();
> master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
> hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr);
hsr_announce() is a timer func, and what protects master after
rcu_read_unlock() in hsr_port_get_hsr() ?
>
> if (is_admin_up(master->dev))
> mod_timer(&hsr->announce_timer, jiffies + interval);
> -
> - rcu_read_unlock();
> }
>
> /* Announce (supervision frame) timer function for RedBox
> @@ -430,7 +441,6 @@ static void hsr_proxy_announce(struct timer_list *t)
> unsigned long interval = 0;
> struct hsr_node *node;
>
> - rcu_read_lock();
> /* RedBOX sends supervisory frames to HSR network with MAC addresses
> * of SAN nodes stored in ProxyNodeTable.
> */
> @@ -438,6 +448,7 @@ static void hsr_proxy_announce(struct timer_list *t)
> if (!interlink)
> goto done;
>
> + rcu_read_lock();
> list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
> if (hsr_addr_is_redbox(hsr, node->macaddress_A))
> continue;
> @@ -484,6 +495,7 @@ static void hsr_set_rx_mode(struct net_device *dev)
>
> hsr = netdev_priv(dev);
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> if (port->type == HSR_PT_MASTER)
> continue;
> @@ -497,6 +509,7 @@ static void hsr_set_rx_mode(struct net_device *dev)
> break;
> }
> }
> + rcu_read_unlock();
> }
>
> static void hsr_change_rx_flags(struct net_device *dev, int change)
> @@ -506,6 +519,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
>
> hsr = netdev_priv(dev);
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> if (port->type == HSR_PT_MASTER)
> continue;
> @@ -521,6 +535,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
> break;
> }
> }
> + rcu_read_unlock();
> }
>
> static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
> @@ -534,6 +549,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
>
> hsr = netdev_priv(dev);
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> if (port->type == HSR_PT_MASTER ||
> port->type == HSR_PT_INTERLINK)
> @@ -547,6 +563,8 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
> netdev_err(dev, "add vid failed for Slave-A\n");
> if (is_slave_b_added)
> vlan_vid_del(port->dev, proto, vid);
> +
> + rcu_read_unlock();
> return ret;
> }
>
> @@ -559,6 +577,8 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
> netdev_err(dev, "add vid failed for Slave-B\n");
> if (is_slave_a_added)
> vlan_vid_del(port->dev, proto, vid);
> +
> + rcu_read_unlock();
> return ret;
> }
>
> @@ -568,6 +588,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
> break;
> }
> }
> + rcu_read_unlock();
>
> return 0;
> }
> @@ -580,6 +601,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
>
> hsr = netdev_priv(dev);
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port) {
> switch (port->type) {
> case HSR_PT_SLAVE_A:
> @@ -590,6 +612,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
> break;
> }
> }
> + rcu_read_unlock();
>
> return 0;
> }
> @@ -672,9 +695,13 @@ struct net_device *hsr_get_port_ndev(struct net_device *ndev,
> struct hsr_priv *hsr = netdev_priv(ndev);
> struct hsr_port *port;
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port)
> - if (port->type == pt)
> + if (port->type == pt) {
> + rcu_read_unlock();
> return port->dev;
> + }
> + rcu_read_unlock();
> return NULL;
> }
> EXPORT_SYMBOL(hsr_get_port_ndev);
> diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
> index 192893c3f2ec..eec6e20a8494 100644
> --- a/net/hsr/hsr_main.c
> +++ b/net/hsr/hsr_main.c
> @@ -22,9 +22,13 @@ static bool hsr_slave_empty(struct hsr_priv *hsr)
> {
> struct hsr_port *port;
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port)
> - if (port->type != HSR_PT_MASTER)
> + if (port->type != HSR_PT_MASTER) {
> + rcu_read_unlock();
> return false;
> + }
> + rcu_read_unlock();
> return true;
> }
>
> @@ -134,9 +138,13 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
> {
> struct hsr_port *port;
>
> + rcu_read_lock();
> hsr_for_each_port(hsr, port)
> - if (port->type == pt)
> + if (port->type == pt) {
> + rcu_read_unlock();
> return port;
> + }
> + rcu_read_unlock();
> return NULL;
> }
>
> diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
> index b120470246cc..f57c289e2322 100644
> --- a/net/hsr/hsr_netlink.c
> +++ b/net/hsr/hsr_netlink.c
> @@ -241,10 +241,8 @@ void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
> kfree_skb(skb);
>
> fail:
> - rcu_read_lock();
> master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
> netdev_warn(master->dev, "Could not send HSR ring error message\n");
> - rcu_read_unlock();
> }
>
> /* This is called when we haven't heard from the node with MAC address addr for
> @@ -278,10 +276,8 @@ void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN])
> kfree_skb(skb);
>
> fail:
> - rcu_read_lock();
> master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
> netdev_warn(master->dev, "Could not send HSR node down\n");
> - rcu_read_unlock();
> }
>
> /* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table
> --
> 2.50.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller
2025-08-26 5:01 ` Kuniyuki Iwashima
@ 2025-08-26 6:56 ` Hangbin Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hangbin Liu @ 2025-08-26 6:56 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, MD Danish Anwar, Alexander Lobakin,
Jaakko Karrenpalo, Fernando Fernandez Mancera, Murali Karicheri,
WingMan Kwok, Stanislav Fomichev, Xiao Liang, Johannes Berg,
Yu Liao, Arvid Brodin
On Mon, Aug 25, 2025 at 10:01:05PM -0700, Kuniyuki Iwashima wrote:
> On Mon, Aug 25, 2025 at 9:12 PM Hangbin Liu <liuhangbin@gmail.com> wrote:
> >
> > hsr_for_each_port is called in many places without holding the RCU read
> > lock, this may trigger warnings on debug kernels like:
> >
> > [ 40.457015] [ T201] WARNING: suspicious RCU usage
> > [ 40.457020] [ T201] 6.17.0-rc2-virtme #1 Not tainted
> > [ 40.457025] [ T201] -----------------------------
> > [ 40.457029] [ T201] net/hsr/hsr_main.c:137 RCU-list traversed in non-reader section!!
> > [ 40.457036] [ T201]
> > other info that might help us debug this:
> >
> > [ 40.457040] [ T201]
> > rcu_scheduler_active = 2, debug_locks = 1
> > [ 40.457045] [ T201] 2 locks held by ip/201:
> > [ 40.457050] [ T201] #0: ffffffff93040a40 (&ops->srcu){.+.+}-{0:0}, at: rtnl_link_ops_get+0xf2/0x280
> > [ 40.457080] [ T201] #1: ffffffff92e7f968 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x5e1/0xb20
> > [ 40.457102] [ T201]
> > stack backtrace:
> > [ 40.457108] [ T201] CPU: 2 UID: 0 PID: 201 Comm: ip Not tainted 6.17.0-rc2-virtme #1 PREEMPT(full)
> > [ 40.457114] [ T201] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> > [ 40.457117] [ T201] Call Trace:
> > [ 40.457120] [ T201] <TASK>
> > [ 40.457126] [ T201] dump_stack_lvl+0x6f/0xb0
> > [ 40.457136] [ T201] lockdep_rcu_suspicious.cold+0x4f/0xb1
> > [ 40.457148] [ T201] hsr_port_get_hsr+0xfe/0x140
> > [ 40.457158] [ T201] hsr_add_port+0x192/0x940
> > [ 40.457167] [ T201] ? __pfx_hsr_add_port+0x10/0x10
> > [ 40.457176] [ T201] ? lockdep_init_map_type+0x5c/0x270
> > [ 40.457189] [ T201] hsr_dev_finalize+0x4bc/0xbf0
> > [ 40.457204] [ T201] hsr_newlink+0x3c3/0x8f0
> > [ 40.457212] [ T201] ? __pfx_hsr_newlink+0x10/0x10
> > [ 40.457222] [ T201] ? rtnl_create_link+0x173/0xe40
> > [ 40.457233] [ T201] rtnl_newlink_create+0x2cf/0x750
> > [ 40.457243] [ T201] ? __pfx_rtnl_newlink_create+0x10/0x10
> > [ 40.457247] [ T201] ? __dev_get_by_name+0x12/0x50
> > [ 40.457252] [ T201] ? rtnl_dev_get+0xac/0x140
> > [ 40.457259] [ T201] ? __pfx_rtnl_dev_get+0x10/0x10
> > [ 40.457285] [ T201] __rtnl_newlink+0x22c/0xa50
> > [ 40.457305] [ T201] rtnl_newlink+0x637/0xb20
> >
> > Fix it by wrapping the call with rcu_read_lock()/rcu_read_unlock().
> >
> > Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
> > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> > ---
> > net/hsr/hsr_device.c | 37 ++++++++++++++++++++++++++++++++-----
> > net/hsr/hsr_main.c | 12 ++++++++++--
> > net/hsr/hsr_netlink.c | 4 ----
> > 3 files changed, 42 insertions(+), 11 deletions(-)
> >
> > diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
> > index 88657255fec1..67955b21b4a4 100644
> > --- a/net/hsr/hsr_device.c
> > +++ b/net/hsr/hsr_device.c
> > @@ -49,12 +49,15 @@ static bool hsr_check_carrier(struct hsr_port *master)
> >
> > ASSERT_RTNL();
> >
> > + rcu_read_lock();
> > hsr_for_each_port(master->hsr, port) {
>
> Why not use the 4th arg of list_for_each_entry_rcu() ?
>
> Adding random rcu_read_lock() looks confusing.
Yes. Thanks for this notify. I didn't notice the 4th arg of
list_for_each_entry_rcu(). Do you have any suggestion which lock we should
check? rtnl_is_locked() seems can't cover all cases.
Or maybe add a hsr_for_each_port_rntl() for some of net_device_ops?
And others still using rcu read lock? I'm not very sure. Do you have any
suggestions?
...
> > @@ -205,10 +216,13 @@ static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
> > * may become enabled.
> > */
> > features &= ~NETIF_F_ONE_FOR_ALL;
> > +
> > + rcu_read_lock();
> > hsr_for_each_port(hsr, port)
> > features = netdev_increment_features(features,
> > port->dev->features,
> > mask);
> > + rcu_read_unlock();
> >
> > return features;
> > }
> > @@ -410,14 +424,11 @@ static void hsr_announce(struct timer_list *t)
> >
> > hsr = timer_container_of(hsr, t, announce_timer);
> >
> > - rcu_read_lock();
> > master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
> > hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr);
>
> hsr_announce() is a timer func, and what protects master after
> rcu_read_unlock() in hsr_port_get_hsr() ?
hsr_port_get_hsr() is called in more places thank hsr_for_each_port().
That's why I set the rcu read lock in side of of hsr_port_get_hsr().
How about using dev_hold() to protect master device?
Thanks
Hangbin
^ permalink raw reply [flat|nested] 4+ messages in thread
* [syzbot ci] Re: hsr: add rcu lock for all hsr_for_each_port caller
2025-08-26 4:11 [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller Hangbin Liu
2025-08-26 5:01 ` Kuniyuki Iwashima
@ 2025-08-26 8:51 ` syzbot ci
1 sibling, 0 replies; 4+ messages in thread
From: syzbot ci @ 2025-08-26 8:51 UTC (permalink / raw)
To: aleksander.lobakin, arvid.brodin, danishanwar, davem, edumazet,
ffmancera, horms, jkarrenpalo, johannes.berg, kuba, kuniyu,
liaoyu15, liuhangbin, m-karicheri2, netdev, pabeni, sdf,
shaw.leon, w-kwok2
Cc: syzbot, syzkaller-bugs
syzbot ci has tested the following series
[v1] hsr: add rcu lock for all hsr_for_each_port caller
https://lore.kernel.org/all/20250826041148.426598-1-liuhangbin@gmail.com
* [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller
and found the following issue:
BUG: sleeping function called from invalid context in dev_set_allmulti
Full report is available here:
https://ci.syzbot.org/series/3992f7f8-7052-4440-bc88-86be6f350cec
***
BUG: sleeping function called from invalid context in dev_set_allmulti
tree: net
URL: https://kernel.googlesource.com/pub/scm/linux/kernel/git/netdev/net.git
base: 51f27beeb79f9f92682158999bab489ff4fa16f6
arch: amd64
compiler: Debian clang version 20.1.7 (++20250616065708+6146a88f6049-1~exp1~20250616065826.132), Debian LLD 20.1.7
config: https://ci.syzbot.org/builds/1a20cfb3-c3a6-4ba9-9bda-2f49b971b39c/config
C repro: https://ci.syzbot.org/findings/9de559bc-f498-4b86-ab2e-34f1510e4fe4/c_repro
syz repro: https://ci.syzbot.org/findings/9de559bc-f498-4b86-ab2e-34f1510e4fe4/syz_repro
hsr1: entered promiscuous mode
hsr1: entered allmulticast mode
bond0: entered allmulticast mode
bond_slave_0: entered allmulticast mode
bond_slave_1: entered allmulticast mode
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5996, name: syz.0.17
preempt_count: 0, expected: 0
RCU nest depth: 1, expected: 0
3 locks held by syz.0.17/5996:
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:331 [inline]
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_read_lock include/linux/rcupdate.h:841 [inline]
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rtnl_link_ops_get+0x23/0x250 net/core/rtnetlink.c:570
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock net/core/rtnetlink.c:80 [inline]
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_nets_lock net/core/rtnetlink.c:341 [inline]
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x8db/0x1c70 net/core/rtnetlink.c:4056
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:331 [inline]
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:841 [inline]
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: hsr_change_rx_flags+0x28/0x2d0 net/hsr/hsr_device.c:522
CPU: 0 UID: 0 PID: 5996 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
__might_resched+0x495/0x610 kernel/sched/core.c:8957
__mutex_lock_common kernel/locking/mutex.c:575 [inline]
__mutex_lock+0x109/0x1360 kernel/locking/mutex.c:760
netdev_lock include/linux/netdevice.h:2761 [inline]
netdev_lock_ops include/net/netdev_lock.h:42 [inline]
dev_set_allmulti+0x10e/0x260 net/core/dev_api.c:312
hsr_change_rx_flags+0x1b2/0x2d0 net/hsr/hsr_device.c:530
dev_change_rx_flags net/core/dev.c:9332 [inline]
netif_set_allmulti+0x212/0x380 net/core/dev.c:9430
__dev_change_flags+0x52e/0x6d0 net/core/dev.c:9571
rtnl_configure_link net/core/rtnetlink.c:3579 [inline]
rtnl_newlink_create+0x555/0xb00 net/core/rtnetlink.c:3835
__rtnl_newlink net/core/rtnetlink.c:3942 [inline]
rtnl_newlink+0x16d6/0x1c70 net/core/rtnetlink.c:4057
rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6946
netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552
netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline]
netlink_unicast+0x82f/0x9e0 net/netlink/af_netlink.c:1346
netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896
sock_sendmsg_nosec net/socket.c:714 [inline]
__sock_sendmsg+0x21c/0x270 net/socket.c:729
____sys_sendmsg+0x505/0x830 net/socket.c:2614
___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668
__sys_sendmsg net/socket.c:2700 [inline]
__do_sys_sendmsg net/socket.c:2705 [inline]
__se_sys_sendmsg net/socket.c:2703 [inline]
__x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7ff4b418ebe9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffef8386cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ff4b43b5fa0 RCX: 00007ff4b418ebe9
RDX: 00000000000080c0 RSI: 00002000000002c0 RDI: 0000000000000003
RBP: 00007ff4b4211e19 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ff4b43b5fa0 R14: 00007ff4b43b5fa0 R15: 0000000000000003
</TASK>
=============================
[ BUG: Invalid wait context ]
syzkaller #0 Tainted: G W
-----------------------------
syz.0.17/5996 is trying to lock:
ffff888110848d30 (&dev_instance_lock_key#3){+.+.}-{4:4}, at: netdev_lock include/linux/netdevice.h:2761 [inline]
ffff888110848d30 (&dev_instance_lock_key#3){+.+.}-{4:4}, at: netdev_lock_ops include/net/netdev_lock.h:42 [inline]
ffff888110848d30 (&dev_instance_lock_key#3){+.+.}-{4:4}, at: dev_set_allmulti+0x10e/0x260 net/core/dev_api.c:312
other info that might help us debug this:
context-{5:5}
3 locks held by syz.0.17/5996:
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:331 [inline]
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_read_lock include/linux/rcupdate.h:841 [inline]
#0: ffffffff8fa59670 (&ops->srcu#2){.+.+}-{0:0}, at: rtnl_link_ops_get+0x23/0x250 net/core/rtnetlink.c:570
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock net/core/rtnetlink.c:80 [inline]
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_nets_lock net/core/rtnetlink.c:341 [inline]
#1: ffffffff8f537c08 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x8db/0x1c70 net/core/rtnetlink.c:4056
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:331 [inline]
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:841 [inline]
#2: ffffffff8e139ea0 (rcu_read_lock){....}-{1:3}, at: hsr_change_rx_flags+0x28/0x2d0 net/hsr/hsr_device.c:522
stack backtrace:
CPU: 0 UID: 0 PID: 5996 Comm: syz.0.17 Tainted: G W syzkaller #0 PREEMPT(full)
Tainted: [W]=WARN
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
print_lock_invalid_wait_context kernel/locking/lockdep.c:4830 [inline]
check_wait_context kernel/locking/lockdep.c:4902 [inline]
__lock_acquire+0xbcb/0xd20 kernel/locking/lockdep.c:5187
lock_acquire+0x120/0x360 kernel/locking/lockdep.c:5868
__mutex_lock_common kernel/locking/mutex.c:598 [inline]
__mutex_lock+0x187/0x1360 kernel/locking/mutex.c:760
netdev_lock include/linux/netdevice.h:2761 [inline]
netdev_lock_ops include/net/netdev_lock.h:42 [inline]
dev_set_allmulti+0x10e/0x260 net/core/dev_api.c:312
hsr_change_rx_flags+0x1b2/0x2d0 net/hsr/hsr_device.c:530
dev_change_rx_flags net/core/dev.c:9332 [inline]
netif_set_allmulti+0x212/0x380 net/core/dev.c:9430
__dev_change_flags+0x52e/0x6d0 net/core/dev.c:9571
rtnl_configure_link net/core/rtnetlink.c:3579 [inline]
rtnl_newlink_create+0x555/0xb00 net/core/rtnetlink.c:3835
__rtnl_newlink net/core/rtnetlink.c:3942 [inline]
rtnl_newlink+0x16d6/0x1c70 net/core/rtnetlink.c:4057
rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6946
netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552
netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline]
netlink_unicast+0x82f/0x9e0 net/netlink/af_netlink.c:1346
netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896
sock_sendmsg_nosec net/socket.c:714 [inline]
__sock_sendmsg+0x21c/0x270 net/socket.c:729
____sys_sendmsg+0x505/0x830 net/socket.c:2614
___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668
__sys_sendmsg net/socket.c:2700 [inline]
__do_sys_sendmsg net/socket.c:2705 [inline]
__se_sys_sendmsg net/socket.c:2703 [inline]
__x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7ff4b418ebe9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffef8386cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ff4b43b5fa0 RCX: 00007ff4b418ebe9
RDX: 00000000000080c0 RSI: 00002000000002c0 RDI: 0000000000000003
RBP: 00007ff4b4211e19 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ff4b43b5fa0 R14: 00007ff4b43b5fa0 R15: 0000000000000003
</TASK>
dummy0: entered allmulticast mode
***
If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
Tested-by: syzbot@syzkaller.appspotmail.com
---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-08-26 8:51 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-26 4:11 [PATCH net] hsr: add rcu lock for all hsr_for_each_port caller Hangbin Liu
2025-08-26 5:01 ` Kuniyuki Iwashima
2025-08-26 6:56 ` Hangbin Liu
2025-08-26 8:51 ` [syzbot ci] " syzbot ci
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).