* [PATCH net-next v2 1/4] net: hsr: serialize seq_blocks merge across nodes
2026-03-26 15:47 [PATCH net-next v2 0/4] net: hsr: address functional and concurrency bugs luka.gejak
@ 2026-03-26 15:47 ` luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 2/4] net: hsr: fix VLAN add unwind on slave errors luka.gejak
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: luka.gejak @ 2026-03-26 15:47 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, netdev
Cc: horms, fmaurer, liuhangbin, linux-kernel, luka.gejak
From: Luka Gejak <luka.gejak@linux.dev>
During node merging, hsr_handle_sup_frame() walks node_curr->seq_blocks
to update node_real without holding node_curr->seq_out_lock. This
allows concurrent mutations from duplicate registration paths, risking
inconsistent state or XArray/bitmap corruption.
Fix this by locking both nodes' seq_out_lock during the merge.
To prevent ABBA deadlocks, locks are acquired in order of memory
address.
Reviewed-by: Felix Maurer <fmaurer@redhat.com>
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
---
net/hsr/hsr_framereg.c | 38 ++++++++++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 577fb588bc2f..d09875b33588 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -123,6 +123,40 @@ static void hsr_free_node_rcu(struct rcu_head *rn)
hsr_free_node(node);
}
+static void hsr_lock_seq_out_pair(struct hsr_node *node_a,
+ struct hsr_node *node_b)
+{
+ if (node_a == node_b) {
+ spin_lock_bh(&node_a->seq_out_lock);
+ return;
+ }
+
+ if (node_a < node_b) {
+ spin_lock_bh(&node_a->seq_out_lock);
+ spin_lock_nested(&node_b->seq_out_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_bh(&node_b->seq_out_lock);
+ spin_lock_nested(&node_a->seq_out_lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void hsr_unlock_seq_out_pair(struct hsr_node *node_a,
+ struct hsr_node *node_b)
+{
+ if (node_a == node_b) {
+ spin_unlock_bh(&node_a->seq_out_lock);
+ return;
+ }
+
+ if (node_a < node_b) {
+ spin_unlock(&node_b->seq_out_lock);
+ spin_unlock_bh(&node_a->seq_out_lock);
+ } else {
+ spin_unlock(&node_a->seq_out_lock);
+ spin_unlock_bh(&node_b->seq_out_lock);
+ }
+}
+
void hsr_del_nodes(struct list_head *node_db)
{
struct hsr_node *node;
@@ -432,7 +466,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
}
ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
- spin_lock_bh(&node_real->seq_out_lock);
+ hsr_lock_seq_out_pair(node_real, node_curr);
for (i = 0; i < HSR_PT_PORTS; i++) {
if (!node_curr->time_in_stale[i] &&
time_after(node_curr->time_in[i], node_real->time_in[i])) {
@@ -455,7 +489,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE);
}
}
- spin_unlock_bh(&node_real->seq_out_lock);
+ hsr_unlock_seq_out_pair(node_real, node_curr);
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH net-next v2 2/4] net: hsr: fix VLAN add unwind on slave errors
2026-03-26 15:47 [PATCH net-next v2 0/4] net: hsr: address functional and concurrency bugs luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 1/4] net: hsr: serialize seq_blocks merge across nodes luka.gejak
@ 2026-03-26 15:47 ` luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 3/4] net: hsr: require valid EOT supervision TLV luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 4/4] net: hsr: reject unresolved interlink ifindex luka.gejak
3 siblings, 0 replies; 5+ messages in thread
From: luka.gejak @ 2026-03-26 15:47 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, netdev
Cc: horms, fmaurer, liuhangbin, linux-kernel, luka.gejak
From: Luka Gejak <luka.gejak@linux.dev>
When vlan_vid_add() fails for a secondary slave, the error path calls
vlan_vid_del() on the failing port instead of the peer slave that had
already succeeded. This results in asymmetric VLAN state across the HSR
pair.
Fix this by switching to a centralized unwind path that removes the VID
from any slave device that was already programmed.
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
---
net/hsr/hsr_device.c | 46 ++++++++++++++++++++++++++------------------
1 file changed, 27 insertions(+), 19 deletions(-)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 5c3eca2235ce..75c491279df8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -532,8 +532,8 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
- bool is_slave_a_added = false;
- bool is_slave_b_added = false;
+ struct net_device *slave_a_dev = NULL;
+ struct net_device *slave_b_dev = NULL;
struct hsr_port *port;
struct hsr_priv *hsr;
int ret = 0;
@@ -546,29 +546,28 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
continue;
ret = vlan_vid_add(port->dev, proto, vid);
- switch (port->type) {
- case HSR_PT_SLAVE_A:
- if (ret) {
- /* clean up Slave-B */
+ if (ret) {
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
netdev_err(dev, "add vid failed for Slave-A\n");
- if (is_slave_b_added)
- vlan_vid_del(port->dev, proto, vid);
- return ret;
+ break;
+ case HSR_PT_SLAVE_B:
+ netdev_err(dev, "add vid failed for Slave-B\n");
+ break;
+ default:
+ break;
}
- is_slave_a_added = true;
+ goto unwind;
+ }
+
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ slave_a_dev = port->dev;
break;
case HSR_PT_SLAVE_B:
- if (ret) {
- /* clean up Slave-A */
- netdev_err(dev, "add vid failed for Slave-B\n");
- if (is_slave_a_added)
- vlan_vid_del(port->dev, proto, vid);
- return ret;
- }
-
- is_slave_b_added = true;
+ slave_b_dev = port->dev;
break;
default:
break;
@@ -576,6 +575,15 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
}
return 0;
+
+unwind:
+ if (slave_a_dev)
+ vlan_vid_del(slave_a_dev, proto, vid);
+
+ if (slave_b_dev)
+ vlan_vid_del(slave_b_dev, proto, vid);
+
+ return ret;
}
static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH net-next v2 3/4] net: hsr: require valid EOT supervision TLV
2026-03-26 15:47 [PATCH net-next v2 0/4] net: hsr: address functional and concurrency bugs luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 1/4] net: hsr: serialize seq_blocks merge across nodes luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 2/4] net: hsr: fix VLAN add unwind on slave errors luka.gejak
@ 2026-03-26 15:47 ` luka.gejak
2026-03-26 15:47 ` [PATCH net-next v2 4/4] net: hsr: reject unresolved interlink ifindex luka.gejak
3 siblings, 0 replies; 5+ messages in thread
From: luka.gejak @ 2026-03-26 15:47 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, netdev
Cc: horms, fmaurer, liuhangbin, linux-kernel, luka.gejak
From: Luka Gejak <luka.gejak@linux.dev>
Supervision frames are only valid if terminated with a zero-length EOT
TLV. The current check fails to reject non-EOT entries as the terminal
TLV, potentially allowing malformed supervision traffic.
Fix this by strictly requiring the terminal TLV to be HSR_TLV_EOT
with a length of zero.
Reviewed-by: Felix Maurer <fmaurer@redhat.com>
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
---
net/hsr/hsr_forward.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index aefc9b6936ba..d26c7d0e8109 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -110,7 +110,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
}
/* end of tlvs must follow at the end */
- if (hsr_sup_tlv->HSR_TLV_type == HSR_TLV_EOT &&
+ if (hsr_sup_tlv->HSR_TLV_type != HSR_TLV_EOT ||
hsr_sup_tlv->HSR_TLV_length != 0)
return false;
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next v2 4/4] net: hsr: reject unresolved interlink ifindex
2026-03-26 15:47 [PATCH net-next v2 0/4] net: hsr: address functional and concurrency bugs luka.gejak
` (2 preceding siblings ...)
2026-03-26 15:47 ` [PATCH net-next v2 3/4] net: hsr: require valid EOT supervision TLV luka.gejak
@ 2026-03-26 15:47 ` luka.gejak
3 siblings, 0 replies; 5+ messages in thread
From: luka.gejak @ 2026-03-26 15:47 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, netdev
Cc: horms, fmaurer, liuhangbin, linux-kernel, luka.gejak
From: Luka Gejak <luka.gejak@linux.dev>
In hsr_newlink(), a provided but invalid IFLA_HSR_INTERLINK attribute
was silently ignored if __dev_get_by_index() returned NULL. This leads
to incorrect RedBox topology creation without notifying the user.
Fix this by returning -EINVAL and an extack message when the
interlink attribute is present but cannot be resolved.
Reviewed-by: Felix Maurer <fmaurer@redhat.com>
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
---
net/hsr/hsr_netlink.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index db0b0af7a692..f0ca23da3ab9 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -76,9 +76,14 @@ static int hsr_newlink(struct net_device *dev,
return -EINVAL;
}
- if (data[IFLA_HSR_INTERLINK])
+ if (data[IFLA_HSR_INTERLINK]) {
interlink = __dev_get_by_index(link_net,
nla_get_u32(data[IFLA_HSR_INTERLINK]));
+ if (!interlink) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink does not exist");
+ return -EINVAL;
+ }
+ }
if (interlink && interlink == link[0]) {
NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same");
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread