From: "dongchenchen (A)" <dongchenchen2@huawei.com>
To: Eric Dumazet <edumazet@google.com>,
"David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
Willem de Bruijn <willemb@google.com>, <netdev@vger.kernel.org>,
<eric.dumazet@gmail.com>,
Yin Fengwei <fengwei_yin@linux.alibaba.com>
Subject: Re: [PATCH v2 net] net: add RCU protection to (struct packet_type)->dev
Date: Mon, 2 Feb 2026 20:53:51 +0800 [thread overview]
Message-ID: <7befb796-be66-4518-96a5-6df607edd41d@huawei.com> (raw)
In-Reply-To: <20260202102658.858769-1-edumazet@google.com>
在 2026/2/2 18:26, Eric Dumazet 写道:
> Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch.
>
> Real issue is that (struct packet_type)->dev needs RCU protection:
>
> ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
> to get device name without any barrier.
>
> At the same time, concurrent writer can remove a packet_type structure
> (which is correctly freed after an RCU grace period) _and_ clear pt->dev
> without an RCU grace period.
>
> Fix this issue by using proper RCU on pt->dev pointer.
>
> Then, we need to record this dev pointer in ptype_iter_state
> so that ptype_seq_next() can properly detect the end of dev->ptype_all.
>
> We also need to add full RCU protection in ptype_seq_next().
> (Missing READ_ONCE() when reading .next values)
>
> Many thanks to Dong Chenchen for providing a repro.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Fixes: 1d10f8a1f40b ("net-procfs: show net devices bound packet types")
> Fixes: c353e8983e0d ("net: introduce per netns packet chains")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Yin Fengwei <fengwei_yin@linux.alibaba.com>
> Reported-by: Dong Chenchen <dongchenchen2@huawei.com>
> Closes: https://lore.kernel.org/netdev/CANn89iKRRKPnWjJmb-_3a=sq+9h6DvTQM4DBZHT5ZRGPMzQaiA@mail.gmail.com/T/#m7b80b9fc9b9267f90e0b7aad557595f686f9c50d
> ---
> drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 2 +-
> .../ethernet/mellanox/mlx5/core/en_selftest.c | 2 +-
> .../stmicro/stmmac/stmmac_selftests.c | 12 ++---
> drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 +-
> drivers/scsi/fcoe/fcoe.c | 6 +--
> include/linux/netdevice.h | 2 +-
> net/batman-adv/hard-interface.c | 2 +-
> net/core/dev.c | 30 ++++++-----
> net/core/net-procfs.c | 51 +++++++++++++------
> net/core/selftests.c | 2 +-
> net/ncsi/ncsi-manage.c | 2 +-
> net/packet/af_packet.c | 24 +++++----
> net/tipc/bearer.c | 6 +--
> 13 files changed, 85 insertions(+), 60 deletions(-)
Hi, Eric! Obtaining dev from ptype_iter_stateis sufficient to solve this issue, so it seems we do not need to add RCU
protection for dev in ptype procfs. Perhaps the changes related to RCU
could be moved to another patch? -----
Best Regards,
Dong Chenchen
> diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> index 70e0e9a3b650c0753f0b865642aa372a956a4bf5..ad63556c9e0abd15cbfac7777c31894d2eef037b 100644
> --- a/net/core/net-procfs.c
> +++ b/net/core/net-procfs.c
> @@ -170,8 +170,15 @@ static const struct seq_operations softnet_seq_ops = {
> .show = softnet_seq_show,
> };
>
> +
> +struct ptype_iter_state {
> + struct seq_net_private p;
> + struct net_device *dev;
> +};
> +
> static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct list_head *ptype_list = NULL;
> struct packet_type *pt = NULL;
> struct net_device *dev;
> @@ -181,12 +188,16 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
> for_each_netdev_rcu(seq_file_net(seq), dev) {
> ptype_list = &dev->ptype_all;
> list_for_each_entry_rcu(pt, ptype_list, list) {
> - if (i == pos)
> + if (i == pos) {
> + iter->dev = dev;
> return pt;
> + }
> ++i;
> }
> }
>
> + iter->dev = NULL;
> +
> list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
> if (i == pos)
> return pt;
> @@ -218,6 +229,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
>
> static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct net *net = seq_file_net(seq);
> struct net_device *dev;
> struct packet_type *pt;
> @@ -229,19 +241,21 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> return ptype_get_idx(seq, 0);
>
> pt = v;
> - nxt = pt->list.next;
> - if (pt->dev) {
> - if (nxt != &pt->dev->ptype_all)
> + nxt = READ_ONCE(pt->list.next);
> + dev = iter->dev;
> + if (dev) {
> + if (nxt != &dev->ptype_all)
> goto found;
>
> - dev = pt->dev;
> for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
> - if (!list_empty(&dev->ptype_all)) {
> - nxt = dev->ptype_all.next;
> + nxt = READ_ONCE(dev->ptype_all.next);
> + if (nxt != &dev->ptype_all) {
> + iter->dev = NULL;
> goto found;
> }
> }
> - nxt = net->ptype_all.next;
> + iter->dev = NULL;
> + nxt = READ_ONCE(net->ptype_all.next);
> goto net_ptype_all;
> }
>
> @@ -252,20 +266,20 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>
> if (nxt == &net->ptype_all) {
> /* continue with ->ptype_specific if it's not empty */
> - nxt = net->ptype_specific.next;
> + nxt = READ_ONCE(net->ptype_specific.next);
> if (nxt != &net->ptype_specific)
> goto found;
> }
>
> hash = 0;
> - nxt = ptype_base[0].next;
> + nxt = READ_ONCE(ptype_base[0].next);
> } else
> hash = ntohs(pt->type) & PTYPE_HASH_MASK;
>
> while (nxt == &ptype_base[hash]) {
> if (++hash >= PTYPE_HASH_SIZE)
> return NULL;
> - nxt = ptype_base[hash].next;
> + nxt = READ_ONCE(ptype_base[hash].next);
> }
> found:
> return list_entry(nxt, struct packet_type, list);
> @@ -279,19 +293,24 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
>
> static int ptype_seq_show(struct seq_file *seq, void *v)
> {
> + struct ptype_iter_state *iter = seq->private;
> struct packet_type *pt = v;
> + struct net_device *dev;
>
> - if (v == SEQ_START_TOKEN)
> + if (v == SEQ_START_TOKEN) {
> seq_puts(seq, "Type Device Function\n");
> - else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
> - (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
> + return 0;
> + }
> + dev = iter->dev;
> + if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
> + (!dev || net_eq(dev_net(dev), seq_file_net(seq)))) {
> if (pt->type == htons(ETH_P_ALL))
> seq_puts(seq, "ALL ");
> else
> seq_printf(seq, "%04x", ntohs(pt->type));
>
> seq_printf(seq, " %-8s %ps\n",
> - pt->dev ? pt->dev->name : "", pt->func);
> + dev ? dev->name : "", pt->func);
> }
>
> return 0;
> @@ -315,7 +334,7 @@ static int __net_init dev_proc_net_init(struct net *net)
> &softnet_seq_ops))
> goto out_dev;
> if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
> - sizeof(struct seq_net_private)))
> + sizeof(struct ptype_iter_state)))
> goto out_softnet;
>
> if (wext_proc_init(net))
>
next prev parent reply other threads:[~2026-02-02 12:54 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-02 10:26 [PATCH v2 net] net: add RCU protection to (struct packet_type)->dev Eric Dumazet
2026-02-02 12:53 ` dongchenchen (A) [this message]
2026-02-02 13:06 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7befb796-be66-4518-96a5-6df607edd41d@huawei.com \
--to=dongchenchen2@huawei.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=eric.dumazet@gmail.com \
--cc=fengwei_yin@linux.alibaba.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox