* [PATCH net] net: add RCU protection to (struct packet_type)->dev
@ 2026-01-31 21:29 Eric Dumazet
2026-02-02 3:16 ` YinFengwei
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-01-31 21:29 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Willem de Bruijn, netdev, eric.dumazet,
Eric Dumazet, Yin Fengwei
Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
Real issue is that (struct packet_type)->dev needs RCU protection:
ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
to get device name without any barrier.
At the same time, concurrent writer can remove a packet_type structure
(which is correctly freed after an RCU grace period) _and_ clear pt->dev
without an RCU grace period.
Fix this issue by using proper RCU on pt->dev pointer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Yin Fengwei <fengwei_yin@linux.alibaba.com>
Closes: https://lore.kernel.org/netdev/CANn89iKRRKPnWjJmb-_3a=sq+9h6DvTQM4DBZHT5ZRGPMzQaiA@mail.gmail.com/T/#m7b80b9fc9b9267f90e0b7aad557595f686f9c50d
---
drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 2 +-
.../ethernet/mellanox/mlx5/core/en_selftest.c | 2 +-
.../stmicro/stmmac/stmmac_selftests.c | 12 ++++----
drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 +--
drivers/scsi/fcoe/fcoe.c | 6 ++--
include/linux/netdevice.h | 2 +-
net/batman-adv/hard-interface.c | 2 +-
net/core/dev.c | 30 +++++++++++--------
net/core/net-procfs.c | 18 ++++++-----
net/core/selftests.c | 2 +-
net/ncsi/ncsi-manage.c | 2 +-
net/packet/af_packet.c | 24 ++++++++-------
net/tipc/bearer.c | 6 ++--
13 files changed, 61 insertions(+), 51 deletions(-)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
index 55e5e467facd7f546ba208361ec9fdcfd7a627d9..006d80a387431cb7d4acdd35f4f1990c8c1f3366 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
@@ -121,7 +121,7 @@ static int __xgbe_test_loopback(struct xgbe_prv_data *pdata,
tdata->pt.type = htons(ETH_P_IP);
tdata->pt.func = xgbe_test_loopback_validate;
- tdata->pt.dev = pdata->netdev;
+ RCU_INIT_POINTER(tdata->pt.dev, pdata->netdev);
tdata->pt.af_packet_priv = tdata;
tdata->packet = attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index fcad464bc4d58af1a7f76cee4cf2088b8889dd0b..d5be21a4c5a3a2635ef69ec60defcb2f665fe205 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -223,7 +223,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
lbtp->pt.type = htons(ETH_P_IP);
lbtp->pt.func = mlx5e_test_loopback_validate;
- lbtp->pt.dev = priv->netdev;
+ RCU_INIT_POINTER(lbtp->pt.dev, priv->netdev);
lbtp->pt.af_packet_priv = lbtp;
dev_add_pack(&lbtp->pt);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index e90a2c469b9a6f576c1b6f99954af08bae69007c..218ff198625e44063e85b717b75b15b1b565ca7b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -333,7 +333,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
tpriv->pt.type = htons(ETH_P_IP);
tpriv->pt.func = stmmac_test_loopback_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = attr;
@@ -752,7 +752,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
init_completion(&tpriv->comp);
tpriv->pt.type = htons(ETH_P_PAUSE);
tpriv->pt.func = stmmac_test_flowctrl_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
dev_add_pack(&tpriv->pt);
@@ -907,7 +907,7 @@ static int __stmmac_test_vlanfilt(struct stmmac_priv *priv)
tpriv->pt.type = htons(ETH_P_IP);
tpriv->pt.func = stmmac_test_vlan_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = &attr;
@@ -1001,7 +1001,7 @@ static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv)
tpriv->pt.type = htons(ETH_P_8021Q);
tpriv->pt.func = stmmac_test_vlan_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = &attr;
@@ -1278,7 +1278,7 @@ static int stmmac_test_vlanoff_common(struct stmmac_priv *priv, bool svlan)
tpriv->pt.type = svlan ? htons(ETH_P_8021Q) : htons(ETH_P_IP);
tpriv->pt.func = stmmac_test_vlan_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = &attr;
tpriv->vlan_id = 0x123;
@@ -1637,7 +1637,7 @@ static int stmmac_test_arpoffload(struct stmmac_priv *priv)
tpriv->pt.type = htons(ETH_P_ARP);
tpriv->pt.func = stmmac_test_arp_validate;
- tpriv->pt.dev = priv->dev;
+ RCU_INIT_POINTER(tpriv->pt.dev, priv->dev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = &attr;
dev_add_pack(&tpriv->pt);
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 0f68739d380a0ae67f18aadb1f0b3c6c5f3ee6e5..22ba17b624626edf1e1631d6f1e2a3ef9898e539 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -1257,12 +1257,12 @@ static int bnx2fc_interface_setup(struct bnx2fc_interface *interface)
interface->fip_packet_type.func = bnx2fc_fip_recv;
interface->fip_packet_type.type = htons(ETH_P_FIP);
- interface->fip_packet_type.dev = netdev;
+ RCU_INIT_POINTER(interface->fip_packet_type.dev, netdev);
dev_add_pack(&interface->fip_packet_type);
interface->fcoe_packet_type.func = bnx2fc_rcv;
interface->fcoe_packet_type.type = __constant_htons(ETH_P_FCOE);
- interface->fcoe_packet_type.dev = netdev;
+ RCU_INIT_POINTER(interface->fcoe_packet_type.dev, netdev);
dev_add_pack(&interface->fcoe_packet_type);
return 0;
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index c8c5dfb3ba9a124439f83afabb8d10e1abe4cf58..ea6617b378a5a051a492d5810ee0abc157261cc5 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -352,18 +352,18 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
*/
fcoe->fcoe_packet_type.func = fcoe_rcv;
fcoe->fcoe_packet_type.type = htons(ETH_P_FCOE);
- fcoe->fcoe_packet_type.dev = netdev;
+ RCU_INIT_POINTER(fcoe->fcoe_packet_type.dev, netdev);
dev_add_pack(&fcoe->fcoe_packet_type);
fcoe->fip_packet_type.func = fcoe_fip_recv;
fcoe->fip_packet_type.type = htons(ETH_P_FIP);
- fcoe->fip_packet_type.dev = netdev;
+ RCU_INIT_POINTER(fcoe->fip_packet_type.dev, netdev);
dev_add_pack(&fcoe->fip_packet_type);
if (netdev != real_dev) {
fcoe->fip_vlan_packet_type.func = fcoe_fip_vlan_recv;
fcoe->fip_vlan_packet_type.type = htons(ETH_P_FIP);
- fcoe->fip_vlan_packet_type.dev = real_dev;
+ RCU_INIT_POINTER(fcoe->fip_vlan_packet_type.dev, real_dev);
dev_add_pack(&fcoe->fip_vlan_packet_type);
}
return 0;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d99b0fbc1942ad1dbbd372cfb9e809e413251f15..c92889d7c0d51bc218c622f4f3b7019534a38dd6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2931,7 +2931,7 @@ void netif_set_affinity_auto(struct net_device *dev);
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
bool ignore_outgoing;
- struct net_device *dev; /* NULL is wildcarded here */
+ struct net_device __rcu *dev; /* NULL is wildcarded here */
netdevice_tracker dev_tracker;
int (*func) (struct sk_buff *,
struct net_device *,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 5113f879736b54f0231d0a030dd4bef5a320e9ae..36ce70463ba5ef5dc3549ce9f2a8814b865fc678 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -740,7 +740,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
kref_get(&hard_iface->refcount);
hard_iface->batman_adv_ptype.type = ethertype;
hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv;
- hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
+ RCU_INIT_POINTER(hard_iface->batman_adv_ptype.dev, hard_iface->net_dev);
dev_add_pack(&hard_iface->batman_adv_ptype);
batadv_info(hard_iface->mesh_iface, "Adding interface: %s\n",
diff --git a/net/core/dev.c b/net/core/dev.c
index ccef685023c299dbd9fc1ccb7a914a282219a327..11d0c598f7d28e824bbd23a670ba75f4561fe810 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -587,16 +587,19 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
+ struct net_device *dev;
+
+ dev = rcu_dereference_protected(pt->dev, lockdep_is_held(&ptype_lock));
+
if (pt->type == htons(ETH_P_ALL)) {
- if (!pt->af_packet_net && !pt->dev)
+ if (!pt->af_packet_net && !dev)
return NULL;
- return pt->dev ? &pt->dev->ptype_all :
- &pt->af_packet_net->ptype_all;
+ return dev ? &dev->ptype_all : &pt->af_packet_net->ptype_all;
}
- if (pt->dev)
- return &pt->dev->ptype_specific;
+ if (dev)
+ return &dev->ptype_specific;
return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
@@ -617,13 +620,12 @@ static inline struct list_head *ptype_head(const struct packet_type *pt)
void dev_add_pack(struct packet_type *pt)
{
- struct list_head *head = ptype_head(pt);
-
- if (WARN_ON_ONCE(!head))
- return;
+ struct list_head *head;
spin_lock(&ptype_lock);
- list_add_rcu(&pt->list, head);
+ head = ptype_head(pt);
+ if (!WARN_ON_ONCE(!head))
+ list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
@@ -643,13 +645,15 @@ EXPORT_SYMBOL(dev_add_pack);
*/
void __dev_remove_pack(struct packet_type *pt)
{
- struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
+ struct list_head *head;
+ spin_lock(&ptype_lock);
+
+ head = ptype_head(pt);
if (!head)
- return;
+ goto out;
- spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 70e0e9a3b650c0753f0b865642aa372a956a4bf5..160dd729178fd37a6340148d9e35f95bd92aecdb 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -230,11 +230,11 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
pt = v;
nxt = pt->list.next;
- if (pt->dev) {
- if (nxt != &pt->dev->ptype_all)
+ dev = rcu_dereference(pt->dev);
+ if (dev) {
+ if (nxt != &dev->ptype_all)
goto found;
- dev = pt->dev;
for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
if (!list_empty(&dev->ptype_all)) {
nxt = dev->ptype_all.next;
@@ -280,18 +280,22 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
static int ptype_seq_show(struct seq_file *seq, void *v)
{
struct packet_type *pt = v;
+ struct net_device *dev;
- if (v == SEQ_START_TOKEN)
+ if (v == SEQ_START_TOKEN) {
seq_puts(seq, "Type Device Function\n");
- else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
- (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
+ return 0;
+ }
+ dev = rcu_dereference(pt->dev);
+ if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!dev || net_eq(dev_net(dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
seq_printf(seq, " %-8s %ps\n",
- pt->dev ? pt->dev->name : "", pt->func);
+ dev ? dev->name : "", pt->func);
}
return 0;
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 8b81feb82c4ae719b770a5b5480dd07aaae5a54b..e536d998023bb3fb7dc3a8107bc0777fd5ef4eef 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -246,7 +246,7 @@ static int __net_test_loopback(struct net_device *ndev,
tpriv->pt.type = htons(ETH_P_IP);
tpriv->pt.func = net_test_loopback_validate;
- tpriv->pt.dev = ndev;
+ rcu_assign_pointer(tpriv->pt.dev, ndev);
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = attr;
dev_add_pack(&tpriv->pt);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 446e4e3b9553a0aea936801f545ebc8ca9cdb736..bf1272f33dc18f3731127e7de727001d587ffc7a 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1799,7 +1799,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
/* Register NCSI packet Rx handler */
ndp->ptype.type = cpu_to_be16(ETH_P_NCSI);
ndp->ptype.func = ncsi_rcv_rsp;
- ndp->ptype.dev = dev;
+ RCU_INIT_POINTER(ndp->ptype.dev, dev);
dev_add_pack(&ndp->ptype);
pdev = to_platform_device(dev->dev.parent);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 494d628d10a5105a6a32788b4673993f218ec881..a3130c790d9cf898fe4070fd9bfcd4fe07817b76 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3118,6 +3118,7 @@ static int packet_release(struct socket *sock)
struct sock *sk = sock->sk;
struct packet_sock *po;
struct packet_fanout *f;
+ struct net_device *dev;
struct net *net;
union tpacket_req_u req_u;
@@ -3137,9 +3138,10 @@ static int packet_release(struct socket *sock)
unregister_prot_hook(sk, false);
packet_cached_dev_reset(po);
- if (po->prot_hook.dev) {
- netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
- po->prot_hook.dev = NULL;
+ dev = rcu_dereference_protected(po->prot_hook.dev, 1);
+ if (dev) {
+ netdev_put(dev, &po->prot_hook.dev_tracker);
+ rcu_assign_pointer(po->prot_hook.dev, NULL);
}
spin_unlock(&po->bind_lock);
@@ -3188,8 +3190,8 @@ static int packet_release(struct socket *sock)
static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
__be16 proto)
{
+ struct net_device *odev, *dev = NULL;
struct packet_sock *po = pkt_sk(sk);
- struct net_device *dev = NULL;
bool unlisted = false;
bool need_rehook;
int ret = 0;
@@ -3220,7 +3222,8 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
+ odev = rcu_dereference_protected(po->prot_hook.dev, 1);
+ need_rehook = po->prot_hook.type != proto || odev != dev;
if (need_rehook) {
dev_hold(dev);
@@ -3241,16 +3244,16 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
- netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
+ netdev_put(odev, &po->prot_hook.dev_tracker);
if (unlikely(unlisted)) {
- po->prot_hook.dev = NULL;
+ RCU_INIT_POINTER(po->prot_hook.dev, NULL);
WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
netdev_hold(dev, &po->prot_hook.dev_tracker,
GFP_ATOMIC);
- po->prot_hook.dev = dev;
+ rcu_assign_pointer(po->prot_hook.dev, dev);
WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
@@ -4209,9 +4212,8 @@ static int packet_notifier(struct notifier_block *this,
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- netdev_put(po->prot_hook.dev,
- &po->prot_hook.dev_tracker);
- po->prot_hook.dev = NULL;
+ netdev_put(dev, &po->prot_hook.dev_tracker);
+ rcu_assign_pointer(po->prot_hook.dev, NULL);
}
spin_unlock(&po->bind_lock);
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ae1ddbf71853924cb01c56bf75e40190f48dec45..c8a7ab9ee437f3361f60557e0c7da0639d5beb0f 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -456,7 +456,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
- b->pt.dev = dev;
+ RCU_INIT_POINTER(b->pt.dev, dev);
b->pt.type = htons(ETH_P_TIPC);
b->pt.func = tipc_l2_rcv_msg;
dev_add_pack(&b->pt);
@@ -665,7 +665,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
(skb->pkt_type <= PACKET_MULTICAST))) {
skb_mark_not_on_list(skb);
TIPC_SKB_CB(skb)->flags = 0;
- tipc_rcv(dev_net(b->pt.dev), skb, b);
+ tipc_rcv(dev_net(rcu_dereference(b->pt.dev)), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
@@ -804,7 +804,7 @@ int tipc_attach_loopback(struct net *net)
return -ENODEV;
netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL);
- tn->loopback_pt.dev = dev;
+ RCU_INIT_POINTER(tn->loopback_pt.dev, dev);
tn->loopback_pt.type = htons(ETH_P_TIPC);
tn->loopback_pt.func = tipc_loopback_rcv_pkt;
dev_add_pack(&tn->loopback_pt);
--
2.53.0.rc1.225.gd81095ad13-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-01-31 21:29 [PATCH net] net: add RCU protection to (struct packet_type)->dev Eric Dumazet
@ 2026-02-02 3:16 ` YinFengwei
2026-02-02 4:19 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: YinFengwei @ 2026-02-02 3:16 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, dongchenchen2
+ Chenchen as he hit the exact same issue.
Hi Eric,
> Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
>
> Real issue is that (struct packet_type)->dev needs RCU protection:
>
> ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
> to get device name without any barrier.
>
> At the same time, concurrent writer can remove a packet_type structure
> (which is correctly freed after an RCU grace period) _and_ clear pt->dev
> without an RCU grace period.
>
> Fix this issue by using proper RCU on pt->dev pointer.
Still can hit this issue with same backtrace even with this fixing patch.
Look at the __dev_remove_pack(), the pt->list is protected by ptype_lock
while prot_hook.dev is protected by bind_lock. Could it make sure the
procfs interface see either the list element with prot_hook.dev not NULL
or can't see the list element with NULL prot_hook.dev?
Regards
Yin, Fengwei
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Yin Fengwei <fengwei_yin@linux.alibaba.com>
> Closes: https://lore.kernel.org/netdev/CANn89iKRRKPnWjJmb-_3a=sq+9h6DvTQM4DBZHT5ZRGPMzQaiA@mail.gmail.com/T/#m7b80b9fc9b9267f90e0b7aad557595f686f9c50d
> ---
> drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 2 +-
> .../ethernet/mellanox/mlx5/core/en_selftest.c | 2 +-
> .../stmicro/stmmac/stmmac_selftests.c | 12 ++++----
> drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 +--
> drivers/scsi/fcoe/fcoe.c | 6 ++--
> include/linux/netdevice.h | 2 +-
> net/batman-adv/hard-interface.c | 2 +-
> net/core/dev.c | 30 +++++++++++--------
> net/core/net-procfs.c | 18 ++++++-----
> net/core/selftests.c | 2 +-
> net/ncsi/ncsi-manage.c | 2 +-
> net/packet/af_packet.c | 24 ++++++++-------
> net/tipc/bearer.c | 6 ++--
> 13 files changed, 61 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
> index 55e5e467facd7f546ba208361ec9fdcfd7a627d9..006d80a387431cb7d4acdd35f4f1990c8c1f3366 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c
> @@ -121,7 +121,7 @@ static int __xgbe_test_loopback(struct xgbe_prv_data *pdata,
>
> tdata->pt.type = htons(ETH_P_IP);
> tdata->pt.func = xgbe_test_loopback_validate;
> - tdata->pt.dev = pdata->netdev;
> + RCU_INIT_POINTER(tdata->pt.dev, pdata->netdev);
> tdata->pt.af_packet_priv = tdata;
> tdata->packet = attr;
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 3:16 ` YinFengwei
@ 2026-02-02 4:19 ` Eric Dumazet
2026-02-02 7:06 ` YinFengwei
2026-02-02 8:21 ` dongchenchen (A)
0 siblings, 2 replies; 10+ messages in thread
From: Eric Dumazet @ 2026-02-02 4:19 UTC (permalink / raw)
To: YinFengwei
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, dongchenchen2
On Mon, Feb 2, 2026 at 4:16 AM YinFengwei <fengwei_yin@linux.alibaba.com> wrote:
>
> + Chenchen as he hit the exact same issue.
>
> Hi Eric,
>
> > Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
> >
> > Real issue is that (struct packet_type)->dev needs RCU protection:
> >
> > ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
> > to get device name without any barrier.
> >
> > At the same time, concurrent writer can remove a packet_type structure
> > (which is correctly freed after an RCU grace period) _and_ clear pt->dev
> > without an RCU grace period.
> >
> > Fix this issue by using proper RCU on pt->dev pointer.
> Still can hit this issue with same backtrace even with this fixing patch.
>
> Look at the __dev_remove_pack(), the pt->list is protected by ptype_lock
> while prot_hook.dev is protected by bind_lock. Could it make sure the
> procfs interface see either the list element with prot_hook.dev not NULL
> or can't see the list element with NULL prot_hook.dev?
>
> Regards
> Yin, Fengwei
Please share the new stack trace (with the symbols), or the repro, thanks !
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 4:19 ` Eric Dumazet
@ 2026-02-02 7:06 ` YinFengwei
2026-02-02 8:21 ` dongchenchen (A)
1 sibling, 0 replies; 10+ messages in thread
From: YinFengwei @ 2026-02-02 7:06 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, dongchenchen2
Hi Eric,
> On Mon, Feb 2, 2026 at 4:16 AM YinFengwei <fengwei_yin@linux.alibaba.com> wrote:
> >
> > + Chenchen as he hit the exact same issue.
> >
> > Hi Eric,
> >
> > > Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
> > >
> > > Real issue is that (struct packet_type)->dev needs RCU protection:
> > >
> > > ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
> > > to get device name without any barrier.
> > >
> > > At the same time, concurrent writer can remove a packet_type structure
> > > (which is correctly freed after an RCU grace period) _and_ clear pt->dev
> > > without an RCU grace period.
> > >
> > > Fix this issue by using proper RCU on pt->dev pointer.
> > Still can hit this issue with same backtrace even with this fixing patch.
> >
> > Look at the __dev_remove_pack(), the pt->list is protected by ptype_lock
> > while prot_hook.dev is protected by bind_lock. Could it make sure the
> > procfs interface see either the list element with prot_hook.dev not NULL
> > or can't see the list element with NULL prot_hook.dev?
> >
> > Regards
> > Yin, Fengwei
>
> Please share the new stack trace (with the symbols), or the repro, thanks !
About the reproducing environment:
1. We can reproduce it on bare mental arm64 server and kvm guest on arm64 server.
So I use virtualization env to reproduce it now.
2. With debian cloud disk image, it took very long to hit it (like days).
3. With a buildroot based disk image from our QA team, it took less than 1 hours
to hit it. Unfortunately, I can't share the buildroot based disk image.
4. I didn't reproduce it on x86 env.
The base commit I applied your fix (So you dup the code matches the stacktrace
decode output):
commit 18f7fcd5e69a04df57b563360b88be72471d6b62 (HEAD -> master, tag:
v6.19-rc8, origin/master)
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun Feb 1 14:01:13 2026 -0800
Linux 6.19-rc8
Let me know if you need furhter information. Thanks.
Regards
Yin, Fengwei
---------------------------------------------------------------------------------
The stack trace is like:
[ 1248.009152] watchdog: BUG: soft lockup - CPU#0 stuck for 149s!
[b2ac940_rcu_sta:12914]
[ 1248.009167] Modules linked in:
[ 1248.009171] CPU: 0 UID: 0 PID: 12914 Comm: b2ac940_rcu_sta Tainted: G L 6.19.0-rc8-00001-gbcedc33b3f13 #8 VOLUNTARY
[ 1248.009241] Tainted: [L]=SOFTLOCKUP
[ 1248.009248] Hardware name: linux,dummy-virt (DT)
[ 1248.009250] pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
[ 1248.009251] pc : ptype_seq_next+0x1c/0x180
[ 1248.009257] lr : traverse.part.0+0x88/0x198
[ 1248.009260] sp : ffff800087a13b20
[ 1248.009260] x29: ffff800087a13b20 x28: ffff000004d35900 x27:0000000000000000
[ 1248.009262] x26: 0000000000000000 x25: ffff0000040ddad0 x24:ffff0000040ddac0
[ 1248.009264] x23: 00000000000080db x22: 0000000000000033 x21:0000000000000000
[ 1248.009266] x20: ffff000003bbc158 x19: ffff0000040dda98 x18:0000000000000000
[ 1248.009268] x17: 0000000000000000 x16: 0000000000000000 x15:0000000000000000
[ 1248.009270] x14: 0000000000000000 x13: 0a6e6f6974636e75 x12:4620202020202065
[ 1248.009272] x11: 0000000000000000 x10: 0000000000000001 x9 :ffff8000804d5ac8
[ 1248.009274] x8 : 000000000000000a x7 : ffff8000818cb9e2 x6 :000000000000000a
[ 1248.009289] x5 : 0000000000000000 x4 : ffff8000827d2e40 x3 :0000000772a30439
[ 1248.009290] x2 : ffff0000040ddac0 x1 : ffff000003bbc158 x0 :ffff0000040dda98
[ 1248.009292] Call trace:
[ 1248.009293] ptype_seq_next+0x1c/0x180 (P)
[ 1248.009296] seq_read_iter+0x300/0x500
[ 1248.009297] seq_read+0xe8/0x128
[ 1248.009298] proc_reg_read+0xb8/0x108
[ 1248.009300] do_loop_readv_writev.part.0+0xc0/0x128
[ 1248.009302] vfs_readv+0x178/0x1e0
[ 1248.009303] do_preadv+0x98/0x100
[ 1248.009305] __arm64_sys_preadv+0x28/0x40
[ 1248.009306] invoke_syscall+0x50/0x120
[ 1248.009307] el0_svc_common.constprop.0+0x48/0xf0
[ 1248.009309] do_el0_svc+0x24/0x38
[ 1248.009310] el0_svc+0x38/0x168
[ 1248.009312] el0t_64_sync_handler+0xa0/0xe8
[ 1248.009314] el0t_64_sync+0x1ac/0x1b0
stack trace decode:
[ 1248.009152] watchdog: BUG: soft lockup - CPU#0 stuck for 149s! [b2ac940_rcu_sta:12914]
[ 1248.009167] Modules linked in:
[ 1248.009241] Tainted: [L]=SOFTLOCKUP
[ 1248.009248] Hardware name: linux,dummy-virt (DT)
[ 1248.009250] pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
[ 1248.009251] pc : ptype_seq_next (/mnt/ssd/fyin/linux/net/core/net-procfs.c:228)
[ 1248.009257] lr : traverse.part.0 (/mnt/ssd/fyin/linux/fs/seq_file.c:120)
[ 1248.009260] sp : ffff800087a13b20
[ 1248.009260] x29: ffff800087a13b20 x28: ffff000004d35900 x27:0000000000000000
[ 1248.009262] x26: 0000000000000000 x25: ffff0000040ddad0 x24:ffff0000040ddac0
[ 1248.009264] x23: 00000000000080db x22: 0000000000000033 x21:0000000000000000
[ 1248.009266] x20: ffff000003bbc158 x19: ffff0000040dda98 x18:0000000000000000
[ 1248.009268] x17: 0000000000000000 x16: 0000000000000000 x15:0000000000000000
[ 1248.009270] x14: 0000000000000000 x13: 0a6e6f6974636e75 x12:4620202020202065
[ 1248.009272] x11: 0000000000000000 x10: 0000000000000001 x9 :ffff8000804d5ac8
[ 1248.009274] x8 : 000000000000000a x7 : ffff8000818cb9e2 x6 :000000000000000a
[ 1248.009289] x5 : 0000000000000000 x4 : ffff8000827d2e40 x3 :0000000772a30439
[ 1248.009290] x2 : ffff0000040ddac0 x1 : ffff000003bbc158 x0 :ffff0000040dda98
[ 1248.009292] Call trace:
[ 1248.009293] ptype_seq_next (/mnt/ssd/fyin/linux/net/core/net-procfs.c:228) (P)
[ 1248.009296] seq_read_iter (/mnt/ssd/fyin/linux/fs/seq_file.c:101 /mnt/ssd/fyin/linux/fs/seq_file.c:195)
[ 1248.009297] seq_read (/mnt/ssd/fyin/linux/fs/seq_file.c:163)
[ 1248.009298] proc_reg_read (/mnt/ssd/fyin/linux/fs/proc/inode.c:308 /mnt/ssd/fyin/linux/fs/proc/inode.c:320)
[ 1248.009300] do_loop_readv_writev.part.0 (/mnt/ssd/fyin/linux/fs/read_write.c:850)
[ 1248.009302] vfs_readv (/mnt/ssd/fyin/linux/fs/read_write.c:840 /mnt/ssd/fyin/linux/fs/read_write.c:1020)
[ 1248.009303] do_preadv (/mnt/ssd/fyin/linux/fs/read_write.c:1132)
[ 1248.009305] __arm64_sys_preadv (/mnt/ssd/fyin/linux/fs/read_write.c:1174)
[ 1248.009306] invoke_syscall (/mnt/ssd/fyin/linux/./arch/arm64/include/asm/current.h:19 /mnt/ssd/fyin/linux/arch/arm64/kernel/syscall.c:54)
[ 1248.009307] el0_svc_common.constprop.0 (/mnt/ssd/fyin/linux/./include/linux/thread_info.h:140 /mnt/ssd/fyin/linux/arch/arm64/kernel/syscall.c:140)
[ 1248.009309] do_el0_svc (/mnt/ssd/fyin/linux/arch/arm64/kernel/syscall.c:152)
[ 1248.009310] el0_svc (/mnt/ssd/fyin/linux/./arch/arm64/include/asm/alternative-macros.h:254 /mnt/ssd/fyin/linux/./arch/arm64/include/asm/cpufeature.h:809 /mnt/ssd/fyin/linux/./arch/arm64/include/asm/irqflags.h:73 /mnt/ssd/fyin/linux/arch/arm64/kernel/entry-common.c:80 /mnt/ssd/fyin/linux/arch/arm64/kernel/entry-common.c:725)
[ 1248.009312] el0t_64_sync_handler (/mnt/ssd/fyin/linux/arch/arm64/kernel/entry-common.c:744)
[ 1248.009314] el0t_64_sync (/mnt/ssd/fyin/linux/arch/arm64/kernel/entry.S:596)
The C reproducer:
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <linux/futex.h>
#ifndef __NR_bind
#define __NR_bind 200
#endif
#ifndef __NR_close
#define __NR_close 57
#endif
#ifndef __NR_ioctl
#define __NR_ioctl 29
#endif
#ifndef __NR_mmap
#define __NR_mmap 222
#endif
#ifndef __NR_openat
#define __NR_openat 56
#endif
#ifndef __NR_preadv
#define __NR_preadv 69
#endif
#ifndef __NR_socket
#define __NR_socket 198
#endif
static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}
static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
static void thread_start(void* (*fn)(void*), void* arg)
{
pthread_t th;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 128 << 10);
int i = 0;
for (; i < 100; i++) {
if (pthread_create(&th, &attr, fn, arg) == 0) {
pthread_attr_destroy(&attr);
return;
}
if (errno == EAGAIN) {
usleep(50);
continue;
}
break;
}
exit(1);
}
typedef struct {
int state;
} event_t;
static void event_init(event_t* ev)
{
ev->state = 0;
}
static void event_reset(event_t* ev)
{
ev->state = 0;
}
static void event_set(event_t* ev)
{
if (ev->state)
exit(1);
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}
static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}
static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}
static int event_timedwait(event_t* ev, uint64_t timeout)
{
uint64_t start = current_time_ms();
uint64_t now = start;
for (;;) {
uint64_t remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
}
static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
static long syz_open_procfs(volatile long a0, volatile long a1)
{
char buf[128];
memset(buf, 0, sizeof(buf));
if (a0 == 0) {
snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1);
} else if (a0 == -1) {
snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1);
} else {
snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1);
}
int fd = open(buf, O_RDWR);
if (fd == -1)
fd = open(buf, O_RDONLY);
return fd;
}
static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
for (int i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
continue;
}
if (write(fd, abort, 1) < 0) {
}
close(fd);
}
closedir(dir);
} else {
}
while (waitpid(-1, status, __WALL) != pid) {
}
}
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
write_file("/proc/self/oom_score_adj", "1000");
}
struct thread_t {
int created, call;
event_t ready, done;
};
static struct thread_t threads[16];
static void execute_call(int call);
static int running;
static void* thr(void* arg)
{
struct thread_t* th = (struct thread_t*)arg;
for (;;) {
event_wait(&th->ready);
event_reset(&th->ready);
execute_call(th->call);
__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
event_set(&th->done);
}
return 0;
}
static void execute_one(void)
{
if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
}
int i, call, thread;
for (call = 0; call < 9; call++) {
for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
thread++) {
struct thread_t* th = &threads[thread];
if (!th->created) {
th->created = 1;
event_init(&th->ready);
event_init(&th->done);
event_set(&th->done);
thread_start(thr, th);
}
if (!event_isset(&th->done))
continue;
event_reset(&th->done);
th->call = call;
__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
event_set(&th->ready);
if (call == 6)
break;
event_timedwait(&th->done, 50);
break;
}
}
for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
sleep_ms(1);
}
static void execute_one(void);
#define WAIT_FLAGS __WALL
static void loop(void)
{
int iter = 0;
for (;; iter++) {
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
setup_test();
execute_one();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
sleep_ms(10);
if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
break;
if (current_time_ms() - start < 5000)
continue;
kill_and_wait(pid, &status);
break;
}
}
}
uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0x0,
0xffffffffffffffff};
void execute_call(int call)
{
intptr_t res = 0;
switch (call) {
case 0:
// close arguments: [
// fd: fd (resource)
// ]
syscall(__NR_close, /*fd=*/3);
break;
case 1:
// socket$packet arguments: [
// domain: const = 0x11 (8 bytes)
// type: packet_socket_type = 0x3 (8 bytes)
// proto: const = 0x300 (4 bytes)
// ]
// returns sock_packet
res = syscall(__NR_socket, /*domain=*/0x11ul, /*type=SOCK_RAW*/ 3ul,
/*proto=*/0x300);
if (res != -1)
r[0] = res;
break;
case 2:
// socket$nl_audit arguments: [
// domain: const = 0x10 (8 bytes)
// type: const = 0x3 (8 bytes)
// proto: const = 0x9 (4 bytes)
// ]
// returns sock_nl_audit
res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/9);
if (res != -1)
r[1] = res;
break;
case 3:
// ioctl$sock_SIOCGIFINDEX arguments: [
// fd: sock (resource)
// cmd: const = 0x8933 (4 bytes)
// arg: ptr[out, ifreq_dev_t[devnames, ifindex]] {
// ifreq_dev_t[devnames, ifindex] {
// ifr_ifrn: buffer: {6c 6f 00 00 00 00 00 00 00 00 00 00 00 00 00
// 00} (length 0x10) elem: ifindex (resource) pad = 0x0 (20 bytes)
// }
// }
// ]
memcpy((void*)0x20000100,
"lo\000\000\000\000\000\000\000\000\000\000\000\000\000\000", 16);
res =
syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/0x8933, /*arg=*/0x20000100ul);
if (res != -1)
r[2] = *(uint32_t*)0x20000110;
break;
case 4:
// bind$packet arguments: [
// fd: sock_packet (resource)
// addr: ptr[in, sockaddr_ll] {
// sockaddr_ll {
// sll_family: const = 0x11 (2 bytes)
// sll_protocol: packet_protocols = 0x3 (2 bytes)
// sll_ifindex: ifindex (resource)
// sll_hatype: const = 0x1 (2 bytes)
// sll_pkttype: int8 = 0xc1 (1 bytes)
// sll_halen: const = 0x6 (1 bytes)
// sll_addr: union mac_addr {
// local: mac_addr_t[const[0xaa, int8]] {
// a0: buffer: {aa aa aa aa aa} (length 0x5)
// a1: const = 0xaa (1 bytes)
// }
// }
// pad: buffer: {00 00} (length 0x2)
// }
// }
// addrlen: len = 0x14 (8 bytes)
// ]
*(uint16_t*)0x20001340 = 0x11;
*(uint16_t*)0x20001342 = htobe16(3);
*(uint32_t*)0x20001344 = r[2];
*(uint16_t*)0x20001348 = 1;
*(uint8_t*)0x2000134a = 0xc1;
*(uint8_t*)0x2000134b = 6;
memset((void*)0x2000134c, 170, 5);
*(uint8_t*)0x20001351 = 0xaa;
memset((void*)0x20001352, 0, 2);
syscall(__NR_bind, /*fd=*/r[0], /*addr=*/0x20001340ul, /*addrlen=*/0x14ul);
break;
case 5:
// syz_open_procfs arguments: [
// pid: pid (resource)
// file: ptr[in, buffer] {
// buffer: {6e 65 74 2f 70 74 79 70 65 00} (length 0xa)
// }
// ]
// returns fd
memcpy((void*)0x20000180, "net/ptype\000", 10);
res = -1;
res = syz_open_procfs(/*pid=*/0, /*file=*/0x20000180);
if (res != -1)
r[3] = res;
break;
case 6:
// preadv arguments: [
// fd: fd (resource)
// vec: ptr[in, array[iovec[out, array[int8]]]] {
// array[iovec[out, array[int8]]] {
// iovec[out, array[int8]] {
// addr: ptr[out, buffer] {
// buffer: (DirOut)
// }
// len: len = 0xe8 (8 bytes)
// }
// }
// }
// vlen: len = 0x1 (8 bytes)
// off_low: int32 = 0x80db (4 bytes)
// off_high: int32 = 0x2 (4 bytes)
// ]
*(uint64_t*)0x20001840 = 0x20000280;
*(uint64_t*)0x20001848 = 0xe8;
syscall(__NR_preadv, /*fd=*/r[3], /*vec=*/0x20001840ul, /*vlen=*/1ul,
/*off_low=*/0x80db, /*off_high=*/2);
break;
case 7:
// close arguments: [
// fd: fd (resource)
// ]
syscall(__NR_close, /*fd=*/3);
break;
case 8:
// openat arguments: [
// fd: fd_dir (resource)
// file: nil
// flags: open_flags = 0x42 (4 bytes)
// mode: open_mode = 0x17f (2 bytes)
// ]
// returns fd
syscall(
__NR_openat, /*fd=*/0xffffff9c, /*file=*/0ul,
/*flags=O_CREAT|O_RDWR*/ 0x42,
/*mode=S_IXOTH|S_IWOTH|S_IROTH|S_IXGRP|S_IWGRP|S_IRGRP|S_IXUSR|0x100*/
0x17f);
break;
}
}
int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
/*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
const char* reason;
(void)reason;
loop();
return 0;
}
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 4:19 ` Eric Dumazet
2026-02-02 7:06 ` YinFengwei
@ 2026-02-02 8:21 ` dongchenchen (A)
2026-02-02 8:22 ` Eric Dumazet
2026-02-02 8:47 ` Eric Dumazet
1 sibling, 2 replies; 10+ messages in thread
From: dongchenchen (A) @ 2026-02-02 8:21 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
> On Mon, Feb 2, 2026 at 4:16 AM YinFengwei <fengwei_yin@linux.alibaba.com> wrote:
>> + Chenchen as he hit the exact same issue.
>>
>> Hi Eric,
>>
>>> Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
>>>
>>> Real issue is that (struct packet_type)->dev needs RCU protection:
>>>
>>> ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
>>> to get device name without any barrier.
>>>
>>> At the same time, concurrent writer can remove a packet_type structure
>>> (which is correctly freed after an RCU grace period) _and_ clear pt->dev
>>> without an RCU grace period.
>>>
>>> Fix this issue by using proper RCU on pt->dev pointer.
>> Still can hit this issue with same backtrace even with this fixing patch.
>>
>> Look at the __dev_remove_pack(), the pt->list is protected by ptype_lock
>> while prot_hook.dev is protected by bind_lock. Could it make sure the
>> procfs interface see either the list element with prot_hook.dev not NULL
>> or can't see the list element with NULL prot_hook.dev?
>>
>> Regards
>> Yin, Fengwei
> Please share the new stack trace (with the symbols), or the repro, thanks !
Hi, Eric.
I encountered a similar issue.
https://lore.kernel.org/all/20260128112348.3950437-1-dongchenchen2@huawei.com/
we can reproduce it using the following method[1][2][3].
I think the reason why using only rcu to protect dev cannot fix this problem
is that pt->head.nxt is not protected by rcu.
list_del_rcu
__list_del
next->prev = prev;
WRITE_ONCE(prev->next, next);
While traversing the ptype sequence, once the nxt pointer is obtained,
pt can be concurrently deleted or modified. When pt->dev does not correspond
to the list that pt is currently in, the list head detection will no longer
work correctly.
CPU1 CPU2
ptype_seq_next
nxt = pt->list.next;
//nxt = ptype_head(pt) = dev->ptype_all
packet_release/packet_notifier
unregister_prot_hook(sk, false);
//no sync wait, pt->list.next not change
rcu_assign_pointer(po->prot_hook.dev, NULL);
dev = rcu_dereference(pt->dev);
if (pt->dev) //check fail
if (nxt != &pt->dev->ptype_all)
goto found;
if (nxt != &ptype_all) //check success
goto found;
found:
return list_entry(nxt, struct packet_type, list);
//return list head to seq traversal
-----
Best Regards,
Dong Chenchen
[1] gcc packet_type.c -o packet_type_test
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <netinet/in.h>
#include <linux/if_packet.h>
#include <linux/sockios.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/if_tun.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <sched.h>
#define TUN_NAME "tun0"
int tun_create(char *dev_name, int flags) {
struct ifreq ifr;
int fd, err;
if ((fd = open("/dev/net/tun", O_RDWR)) < 0) {
perror("open /dev/net/tun");
return -1;
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = flags;
if (dev_name) {
strncpy(ifr.ifr_name, dev_name, IFNAMSIZ);
}
if ((err = ioctl(fd, TUNSETIFF, (void *)&ifr)) < 0) {
perror("ioctl TUNSETIFF");
close(fd);
return err;
}
return fd;
}
void tun_set(const char *dev_name, short flag) {
int sockfd;
struct ifreq ifr;
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
if (sockfd < 0) {
perror("socket");
return;
}
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, dev_name, IFNAMSIZ);
if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) {
perror("ioctl SIOCGIFFLAGS");
close(sockfd);
return;
}
ifr.ifr_flags |= flag;
if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) {
perror("ioctl SIOCSIFFLAGS");
close(sockfd);
return;
}
close(sockfd);
}
int main() {
int tunfd, sockfd;
struct ifreq ifr;
struct sockaddr_ll sll;
unshare(CLONE_NEWNET);
tunfd = tun_create(TUN_NAME, IFF_TUN | IFF_NO_PI);
tun_set(TUN_NAME, IFF_UP);
sockfd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL));
if (sockfd < 0) {
close(tunfd);
perror("socket");
exit(1);
}
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, TUN_NAME, IFNAMSIZ);
if (ioctl(sockfd, SIOCGIFINDEX, &ifr) < 0) {
perror("ioctl SIOCGIFINDEX");
close(sockfd);
close(tunfd);
exit(1);
}
memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_protocol = htons(ETH_P_ALL);
sll.sll_ifindex = ifr.ifr_ifindex;
sll.sll_pkttype = PACKET_HOST;
sll.sll_hatype = ARPHRD_ETHER;
if (bind(sockfd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
perror("bind");
close(sockfd);
close(tunfd);
exit(1);
}
printf("begin sleep\n");
sleep(1);
close(sockfd);
close(tunfd);
return 0;
}
[2] add delay to pype_seq_next
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 160dd729178f..73f5a20ef57c 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -230,11 +230,14 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
pt = v;
nxt = pt->list.next;
+ if (pt->dev)
+ mdelay(5000);
dev = rcu_dereference(pt->dev);
[3] run_test.sh
./packet_type_test &
cat /proc/$(pgrep -x "packet_type_test")/net/ptype &
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 8:21 ` dongchenchen (A)
@ 2026-02-02 8:22 ` Eric Dumazet
2026-02-02 8:47 ` Eric Dumazet
1 sibling, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2026-02-02 8:22 UTC (permalink / raw)
To: dongchenchen (A)
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
On Mon, Feb 2, 2026 at 9:21 AM dongchenchen (A)
<dongchenchen2@huawei.com> wrote:
>
>
> > On Mon, Feb 2, 2026 at 4:16 AM YinFengwei <fengwei_yin@linux.alibaba.com> wrote:
> >> + Chenchen as he hit the exact same issue.
> >>
> >> Hi Eric,
> >>
> >>> Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
> >>>
> >>> Real issue is that (struct packet_type)->dev needs RCU protection:
> >>>
> >>> ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
> >>> to get device name without any barrier.
> >>>
> >>> At the same time, concurrent writer can remove a packet_type structure
> >>> (which is correctly freed after an RCU grace period) _and_ clear pt->dev
> >>> without an RCU grace period.
> >>>
> >>> Fix this issue by using proper RCU on pt->dev pointer.
> >> Still can hit this issue with same backtrace even with this fixing patch.
> >>
> >> Look at the __dev_remove_pack(), the pt->list is protected by ptype_lock
> >> while prot_hook.dev is protected by bind_lock. Could it make sure the
> >> procfs interface see either the list element with prot_hook.dev not NULL
> >> or can't see the list element with NULL prot_hook.dev?
> >>
> >> Regards
> >> Yin, Fengwei
> > Please share the new stack trace (with the symbols), or the repro, thanks !
>
> Hi, Eric.
> I encountered a similar issue.
> https://lore.kernel.org/all/20260128112348.3950437-1-dongchenchen2@huawei.com/
> we can reproduce it using the following method[1][2][3].
>
> I think the reason why using only rcu to protect dev cannot fix this problem
> is that pt->head.nxt is not protected by rcu.
>
> list_del_rcu
> __list_del
> next->prev = prev;
> WRITE_ONCE(prev->next, next);
>
> While traversing the ptype sequence, once the nxt pointer is obtained,
> pt can be concurrently deleted or modified. When pt->dev does not correspond
> to the list that pt is currently in, the list head detection will no longer
> work correctly.
>
> CPU1 CPU2
> ptype_seq_next
> nxt = pt->list.next;
> //nxt = ptype_head(pt) = dev->ptype_all
> packet_release/packet_notifier
> unregister_prot_hook(sk, false);
> //no sync wait, pt->list.next not change
> rcu_assign_pointer(po->prot_hook.dev, NULL);
> dev = rcu_dereference(pt->dev);
> if (pt->dev) //check fail
> if (nxt != &pt->dev->ptype_all)
> goto found;
> if (nxt != &ptype_all) //check success
> goto found;
> found:
> return list_entry(nxt, struct packet_type, list);
> //return list head to seq traversal
>
> -----
> Best Regards,
> Dong Chenchen
Thanks a lot, I will take a look.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 8:21 ` dongchenchen (A)
2026-02-02 8:22 ` Eric Dumazet
@ 2026-02-02 8:47 ` Eric Dumazet
2026-02-02 9:14 ` Eric Dumazet
1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-02-02 8:47 UTC (permalink / raw)
To: dongchenchen (A)
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
On Mon, Feb 2, 2026 at 9:21 AM dongchenchen (A)
<dongchenchen2@huawei.com> wrote:
>
>
> }
>
> [2] add delay to pype_seq_next
> diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> index 160dd729178f..73f5a20ef57c 100644
> --- a/net/core/net-procfs.c
> +++ b/net/core/net-procfs.c
> @@ -230,11 +230,14 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>
> pt = v;
> nxt = pt->list.next;
> + if (pt->dev)
> + mdelay(5000);
Waiting 5 seconds while holding RCU is going to fire the soft lockup
detection after ~two calls to ptype_seq_next()
What is the actual trace you got, and what happens if you reduce to
mdelay(1000) ?
> dev = rcu_dereference(pt->dev);
>
> [3] run_test.sh
> ./packet_type_test &
> cat /proc/$(pgrep -x "packet_type_test")/net/ptype &
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 8:47 ` Eric Dumazet
@ 2026-02-02 9:14 ` Eric Dumazet
2026-02-02 10:10 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-02-02 9:14 UTC (permalink / raw)
To: dongchenchen (A)
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
On Mon, Feb 2, 2026 at 9:47 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Feb 2, 2026 at 9:21 AM dongchenchen (A)
> <dongchenchen2@huawei.com> wrote:
> >
> >
> > }
> >
> > [2] add delay to pype_seq_next
> > diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> > index 160dd729178f..73f5a20ef57c 100644
> > --- a/net/core/net-procfs.c
> > +++ b/net/core/net-procfs.c
> > @@ -230,11 +230,14 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> >
> > pt = v;
> > nxt = pt->list.next;
> > + if (pt->dev)
> > + mdelay(5000);
>
> Waiting 5 seconds while holding RCU is going to fire the soft lockup
> detection after ~two calls to ptype_seq_next()
>
> What is the actual trace you got, and what happens if you reduce to
> mdelay(1000) ?
OK, I took a look, and ptype_get_idx() needs to return information
about what list the returned pt is in.
I will send a V2 ,thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 9:14 ` Eric Dumazet
@ 2026-02-02 10:10 ` Eric Dumazet
2026-02-02 11:27 ` dongchenchen (A)
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-02-02 10:10 UTC (permalink / raw)
To: dongchenchen (A)
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
On Mon, Feb 2, 2026 at 10:14 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Feb 2, 2026 at 9:47 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Mon, Feb 2, 2026 at 9:21 AM dongchenchen (A)
> > <dongchenchen2@huawei.com> wrote:
> > >
> > >
> > > }
> > >
> > > [2] add delay to pype_seq_next
> > > diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> > > index 160dd729178f..73f5a20ef57c 100644
> > > --- a/net/core/net-procfs.c
> > > +++ b/net/core/net-procfs.c
> > > @@ -230,11 +230,14 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> > >
> > > pt = v;
> > > nxt = pt->list.next;
> > > + if (pt->dev)
> > > + mdelay(5000);
> >
> > Waiting 5 seconds while holding RCU is going to fire the soft lockup
> > detection after ~two calls to ptype_seq_next()
> >
> > What is the actual trace you got, and what happens if you reduce to
> > mdelay(1000) ?
>
> OK, I took a look, and ptype_get_idx() needs to return information
> about what list the returned pt is in.
>
> I will send a V2 ,thanks.
Here is what I will squash, the diff against V1 is :
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 160dd729178fd37a6340148d9e35f95bd92aecdb..ad63556c9e0abd15cbfac7777c31894d2eef037b
100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -170,8 +170,15 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
+
+struct ptype_iter_state {
+ struct seq_net_private p;
+ struct net_device *dev;
+};
+
static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct ptype_iter_state *iter = seq->private;
struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
struct net_device *dev;
@@ -181,12 +188,16 @@ static void *ptype_get_idx(struct seq_file *seq,
loff_t pos)
for_each_netdev_rcu(seq_file_net(seq), dev) {
ptype_list = &dev->ptype_all;
list_for_each_entry_rcu(pt, ptype_list, list) {
- if (i == pos)
+ if (i == pos) {
+ iter->dev = dev;
return pt;
+ }
++i;
}
}
+ iter->dev = NULL;
+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
if (i == pos)
return pt;
@@ -218,6 +229,7 @@ static void *ptype_seq_start(struct seq_file *seq,
loff_t *pos)
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct ptype_iter_state *iter = seq->private;
struct net *net = seq_file_net(seq);
struct net_device *dev;
struct packet_type *pt;
@@ -229,19 +241,21 @@ static void *ptype_seq_next(struct seq_file
*seq, void *v, loff_t *pos)
return ptype_get_idx(seq, 0);
pt = v;
- nxt = pt->list.next;
- dev = rcu_dereference(pt->dev);
+ nxt = READ_ONCE(pt->list.next);
+ dev = iter->dev;
if (dev) {
if (nxt != &dev->ptype_all)
goto found;
for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
- if (!list_empty(&dev->ptype_all)) {
- nxt = dev->ptype_all.next;
+ nxt = READ_ONCE(dev->ptype_all.next);
+ if (nxt != &dev->ptype_all) {
+ iter->dev = NULL;
goto found;
}
}
- nxt = net->ptype_all.next;
+ iter->dev = NULL;
+ nxt = READ_ONCE(net->ptype_all.next);
goto net_ptype_all;
}
@@ -252,20 +266,20 @@ static void *ptype_seq_next(struct seq_file
*seq, void *v, loff_t *pos)
if (nxt == &net->ptype_all) {
/* continue with ->ptype_specific if it's not empty */
- nxt = net->ptype_specific.next;
+ nxt = READ_ONCE(net->ptype_specific.next);
if (nxt != &net->ptype_specific)
goto found;
}
hash = 0;
- nxt = ptype_base[0].next;
+ nxt = READ_ONCE(ptype_base[0].next);
} else
hash = ntohs(pt->type) & PTYPE_HASH_MASK;
while (nxt == &ptype_base[hash]) {
if (++hash >= PTYPE_HASH_SIZE)
return NULL;
- nxt = ptype_base[hash].next;
+ nxt = READ_ONCE(ptype_base[hash].next);
}
found:
return list_entry(nxt, struct packet_type, list);
@@ -279,6 +293,7 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
static int ptype_seq_show(struct seq_file *seq, void *v)
{
+ struct ptype_iter_state *iter = seq->private;
struct packet_type *pt = v;
struct net_device *dev;
@@ -286,7 +301,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "Type Device Function\n");
return 0;
}
- dev = rcu_dereference(pt->dev);
+ dev = iter->dev;
if ((!pt->af_packet_net || net_eq(pt->af_packet_net,
seq_file_net(seq))) &&
(!dev || net_eq(dev_net(dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
@@ -319,7 +334,7 @@ static int __net_init dev_proc_net_init(struct net *net)
&softnet_seq_ops))
goto out_dev;
if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
- sizeof(struct seq_net_private)))
+ sizeof(struct ptype_iter_state)))
goto out_softnet;
if (wext_proc_init(net))
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net] net: add RCU protection to (struct packet_type)->dev
2026-02-02 10:10 ` Eric Dumazet
@ 2026-02-02 11:27 ` dongchenchen (A)
0 siblings, 0 replies; 10+ messages in thread
From: dongchenchen (A) @ 2026-02-02 11:27 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Willem de Bruijn, netdev, eric.dumazet, fengwei_yin,
zhangchangzhong
> On Mon, Feb 2, 2026 at 10:14 AM Eric Dumazet <edumazet@google.com> wrote:
>> On Mon, Feb 2, 2026 at 9:47 AM Eric Dumazet <edumazet@google.com> wrote:
>>> On Mon, Feb 2, 2026 at 9:21 AM dongchenchen (A)
>>> <dongchenchen2@huawei.com> wrote:
>>>>
>>>> }
>>>>
>>>> [2] add delay to pype_seq_next
>>>> diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
>>>> index 160dd729178f..73f5a20ef57c 100644
>>>> --- a/net/core/net-procfs.c
>>>> +++ b/net/core/net-procfs.c
>>>> @@ -230,11 +230,14 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>>>>
>>>> pt = v;
>>>> nxt = pt->list.next;
>>>> + if (pt->dev)
>>>> + mdelay(5000);
>>> Waiting 5 seconds while holding RCU is going to fire the soft lockup
>>> detection after ~two calls to ptype_seq_next()
>>>
>>> What is the actual trace you got, and what happens if you reduce to
>>> mdelay(1000) ?
>> OK, I took a look, and ptype_get_idx() needs to return information
>> about what list the returned pt is in.
>>
>> I will send a V2 ,thanks.
> Here is what I will squash, the diff against V1 is :
>
> diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> index 160dd729178fd37a6340148d9e35f95bd92aecdb..ad63556c9e0abd15cbfac7777c31894d2eef037b
> 100644
> --- a/net/core/net-procfs.c
> +++ b/net/core/net-procfs.c
> @@ -170,8 +170,15 @@ static const struct seq_operations softnet_seq_ops = {
> .show = softnet_seq_show,
> };
>
> +
> +struct ptype_iter_state {
> + struct seq_net_private p;
> + struct net_device *dev;
> +};
> +
> static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct list_head *ptype_list = NULL;
> struct packet_type *pt = NULL;
> struct net_device *dev;
> @@ -181,12 +188,16 @@ static void *ptype_get_idx(struct seq_file *seq,
> loff_t pos)
> for_each_netdev_rcu(seq_file_net(seq), dev) {
> ptype_list = &dev->ptype_all;
> list_for_each_entry_rcu(pt, ptype_list, list) {
> - if (i == pos)
> + if (i == pos) {
> + iter->dev = dev;
> return pt;
> + }
> ++i;
> }
> }
>
> + iter->dev = NULL;
> +
> list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
> if (i == pos)
> return pt;
> @@ -218,6 +229,7 @@ static void *ptype_seq_start(struct seq_file *seq,
> loff_t *pos)
>
> static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct net *net = seq_file_net(seq);
> struct net_device *dev;
> struct packet_type *pt;
> @@ -229,19 +241,21 @@ static void *ptype_seq_next(struct seq_file
> *seq, void *v, loff_t *pos)
> return ptype_get_idx(seq, 0);
>
> pt = v;
> - nxt = pt->list.next;
> - dev = rcu_dereference(pt->dev);
> + nxt = READ_ONCE(pt->list.next);
> + dev = iter->dev;
> if (dev) {
> if (nxt != &dev->ptype_all)
> goto found;
>
> for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
> - if (!list_empty(&dev->ptype_all)) {
> - nxt = dev->ptype_all.next;
> + nxt = READ_ONCE(dev->ptype_all.next);
> + if (nxt != &dev->ptype_all) {
> + iter->dev = NULL;
> goto found;
> }
> }
> - nxt = net->ptype_all.next;
> + iter->dev = NULL;
> + nxt = READ_ONCE(net->ptype_all.next);
> goto net_ptype_all;
> }
>
> @@ -252,20 +266,20 @@ static void *ptype_seq_next(struct seq_file
> *seq, void *v, loff_t *pos)
>
> if (nxt == &net->ptype_all) {
> /* continue with ->ptype_specific if it's not empty */
> - nxt = net->ptype_specific.next;
> + nxt = READ_ONCE(net->ptype_specific.next);
> if (nxt != &net->ptype_specific)
> goto found;
> }
>
> hash = 0;
> - nxt = ptype_base[0].next;
> + nxt = READ_ONCE(ptype_base[0].next);
> } else
> hash = ntohs(pt->type) & PTYPE_HASH_MASK;
>
> while (nxt == &ptype_base[hash]) {
> if (++hash >= PTYPE_HASH_SIZE)
> return NULL;
> - nxt = ptype_base[hash].next;
> + nxt = READ_ONCE(ptype_base[hash].next);
> }
> found:
> return list_entry(nxt, struct packet_type, list);
> @@ -279,6 +293,7 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
>
> static int ptype_seq_show(struct seq_file *seq, void *v)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct packet_type *pt = v;
> struct net_device *dev;
>
> @@ -286,7 +301,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
> seq_puts(seq, "Type Device Function\n");
> return 0;
> }
> - dev = rcu_dereference(pt->dev);
> + dev = iter->dev;
> if ((!pt->af_packet_net || net_eq(pt->af_packet_net,
> seq_file_net(seq))) &&
> (!dev || net_eq(dev_net(dev), seq_file_net(seq)))) {
> if (pt->type == htons(ETH_P_ALL))
> @@ -319,7 +334,7 @@ static int __net_init dev_proc_net_init(struct net *net)
> &softnet_seq_ops))
> goto out_dev;
> if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
> - sizeof(struct seq_net_private)))
> + sizeof(struct ptype_iter_state)))
> goto out_softnet;
>
> if (wext_proc_init(net))
I verified that this issue can be fixed with this patch.
Thanks a lot!
-----
Best Regards,
Dong Chenchen
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2026-02-02 11:27 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-31 21:29 [PATCH net] net: add RCU protection to (struct packet_type)->dev Eric Dumazet
2026-02-02 3:16 ` YinFengwei
2026-02-02 4:19 ` Eric Dumazet
2026-02-02 7:06 ` YinFengwei
2026-02-02 8:21 ` dongchenchen (A)
2026-02-02 8:22 ` Eric Dumazet
2026-02-02 8:47 ` Eric Dumazet
2026-02-02 9:14 ` Eric Dumazet
2026-02-02 10:10 ` Eric Dumazet
2026-02-02 11:27 ` dongchenchen (A)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox