* [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
@ 2025-11-20 10:11 Tetsuo Handa
2025-11-21 9:06 ` Oleksij Rempel
0 siblings, 1 reply; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-20 10:11 UTC (permalink / raw)
To: linux-can, Network Development, Marc Kleine-Budde, Oleksij Rempel
Hello.
I am using a debug printk() patch for j1939_priv which records/counts where
refcount for j1939_priv has changed, and syzbot succeeded to record/count a
j1939_priv leak in next-20251119
( https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 ).
The output from the debug printk() patch is shown below. I think that
understanding what actions have been taken on this j1939_priv object will
help you finding the cause of j1939_priv leak bug.
Regards.
unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_priv_create net/can/j1939/main.c:150 [inline]
j1939_netdev_start+0x1de/0xa30 net/can/j1939/main.c:282
j1939_sk_bind+0x926/0xca0 net/can/j1939/socket.c:498
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_priv_get net/can/j1939/main.c:191 [inline]
j1939_can_rx_register net/can/j1939/main.c:199 [inline]
j1939_netdev_start+0x615/0xa30 net/can/j1939/main.c:305
j1939_sk_bind+0x926/0xca0 net/can/j1939/socket.c:498
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_sk_bind+0xa02/0xca0 net/can/j1939/socket.c:510
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_jsk_add net/can/j1939/socket.c:81 [inline]
j1939_sk_bind+0x769/0xca0 net/can/j1939/socket.c:530
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +2 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018
j1939_sk_send_loop net/can/j1939/socket.c:1159 [inline]
j1939_sk_sendmsg+0xaf8/0x1350 net/can/j1939/socket.c:1282
sock_sendmsg_nosec net/socket.c:727 [inline]
__sock_sendmsg+0x21c/0x270 net/socket.c:746
Call trace for vcan0@ffff888031c9c000 +2 at
j1939_priv_get net/can/j1939/main.c:191 [inline]
j1939_can_recv+0x17f/0xa30 net/can/j1939/main.c:54
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline]
j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline]
j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749
j1939_tp_cmd_recv net/can/j1939/transport.c:2071 [inline]
j1939_tp_recv+0xb24/0x1040 net/can/j1939/transport.c:2158
j1939_can_recv+0x6a0/0xa30 net/can/j1939/main.c:108
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -2 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_can_recv+0x6e0/0xa30 net/can/j1939/main.c:115
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -2 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_session_destroy net/can/j1939/transport.c:285 [inline]
__j1939_session_release net/can/j1939/transport.c:294 [inline]
kref_put include/linux/kref.h:65 [inline]
j1939_session_put+0x2f0/0x460 net/can/j1939/transport.c:299
j1939_tp_rxtimer+0x177/0x3d0 net/can/j1939/transport.c:1266
__run_hrtimer kernel/time/hrtimer.c:1777 [inline]
__hrtimer_run_queues+0x51c/0xc70 kernel/time/hrtimer.c:1841
hrtimer_run_softirq+0x187/0x2b0 kernel/time/hrtimer.c:1858
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_jsk_del net/can/j1939/socket.c:94 [inline]
j1939_sk_release+0x408/0x7c0 net/can/j1939/socket.c:649
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_can_rx_unregister net/can/j1939/main.c:221 [inline]
__j1939_rx_release net/can/j1939/main.c:230 [inline]
kref_put_mutex include/linux/kref.h:86 [inline]
j1939_netdev_stop+0xa6/0x190 net/can/j1939/main.c:325
j1939_sk_release+0x471/0x7c0 net/can/j1939/socket.c:654
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_sk_release+0x471/0x7c0 net/can/j1939/socket.c:654
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_sk_sock_destruct+0x52/0x90 net/can/j1939/socket.c:386
__sk_destruct+0x85/0x880 net/core/sock.c:2350
rcu_do_batch kernel/rcu/tree.c:2605 [inline]
rcu_core+0xcab/0x1770 kernel/rcu/tree.c:2861
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
balance for vcan0@j1939_priv is 1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-20 10:11 [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 Tetsuo Handa
@ 2025-11-21 9:06 ` Oleksij Rempel
2025-11-21 9:33 ` Tetsuo Handa
0 siblings, 1 reply; 8+ messages in thread
From: Oleksij Rempel @ 2025-11-21 9:06 UTC (permalink / raw)
To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde
Hello Tetsuo,
On Thu, Nov 20, 2025 at 07:11:22PM +0900, Tetsuo Handa wrote:
> Hello.
>
> I am using a debug printk() patch for j1939_priv which records/counts where
> refcount for j1939_priv has changed, and syzbot succeeded to record/count a
> j1939_priv leak in next-20251119
> ( https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 ).
>
> The output from the debug printk() patch is shown below. I think that
> understanding what actions have been taken on this j1939_priv object will
> help you finding the cause of j1939_priv leak bug.
Hm, looks like we have a race where new session is created in
j1939_xtp_rx_rts(), just at the moment where we call
j1939_can_rx_unregister().
Haw about following change:
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref)
rx_kref);
j1939_can_rx_unregister(priv);
+ j1939_cancel_active_session(priv, NULL);
j1939_ecu_unmap_all(priv);
j1939_priv_set(priv->ndev, NULL);
mutex_unlock(&j1939_netdev_lock);
Best Regards,
Oleksij
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-21 9:06 ` Oleksij Rempel
@ 2025-11-21 9:33 ` Tetsuo Handa
2025-11-21 10:00 ` Oleksij Rempel
0 siblings, 1 reply; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-21 9:33 UTC (permalink / raw)
To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde
On 2025/11/21 18:06, Oleksij Rempel wrote:
> Hm, looks like we have a race where new session is created in
> j1939_xtp_rx_rts(), just at the moment where we call
> j1939_can_rx_unregister().
>
> Haw about following change:
>
> --- a/net/can/j1939/main.c
> +++ b/net/can/j1939/main.c
> @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref)
> rx_kref);
>
> j1939_can_rx_unregister(priv);
> + j1939_cancel_active_session(priv, NULL);
> j1939_ecu_unmap_all(priv);
> j1939_priv_set(priv->ndev, NULL);
> mutex_unlock(&j1939_netdev_lock);
>
Well, j1939_cancel_active_session(priv, NULL) is already called from
j1939_netdev_notify(NETDEV_UNREGISTER). Unless a session is recreated
after NETDEV_UNREGISTER event was handled, I can't imagine such race.
We can see that there are three j1939_session_new() calls but only
two j1939_session_destroy() calls. There might be a refcount leak on
j1939_session which prevents j1939_priv from dropping final refcount.
Call trace for vcan0@ffff888031c9c000 +2 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline]
j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline]
j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749
Call trace for vcan0@ffff888031c9c000 -2 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_session_destroy net/can/j1939/transport.c:285 [inline]
__j1939_session_release net/can/j1939/transport.c:294 [inline]
kref_put include/linux/kref.h:65 [inline]
Do we want to update
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882
in order to also try tracing refcount for j1939_session ?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-21 9:33 ` Tetsuo Handa
@ 2025-11-21 10:00 ` Oleksij Rempel
2025-11-21 10:19 ` Tetsuo Handa
0 siblings, 1 reply; 8+ messages in thread
From: Oleksij Rempel @ 2025-11-21 10:00 UTC (permalink / raw)
To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde
On Fri, Nov 21, 2025 at 06:33:02PM +0900, Tetsuo Handa wrote:
> On 2025/11/21 18:06, Oleksij Rempel wrote:
> > Hm, looks like we have a race where new session is created in
> > j1939_xtp_rx_rts(), just at the moment where we call
> > j1939_can_rx_unregister().
> >
> > Haw about following change:
> >
> > --- a/net/can/j1939/main.c
> > +++ b/net/can/j1939/main.c
> > @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref)
> > rx_kref);
> >
> > j1939_can_rx_unregister(priv);
> > + j1939_cancel_active_session(priv, NULL);
> > j1939_ecu_unmap_all(priv);
> > j1939_priv_set(priv->ndev, NULL);
> > mutex_unlock(&j1939_netdev_lock);
> >
>
> Well, j1939_cancel_active_session(priv, NULL) is already called from
> j1939_netdev_notify(NETDEV_UNREGISTER). Unless a session is recreated
> after NETDEV_UNREGISTER event was handled, I can't imagine such race.
>
> We can see that there are three j1939_session_new() calls but only
> two j1939_session_destroy() calls. There might be a refcount leak on
> j1939_session which prevents j1939_priv from dropping final refcount.
>
> Call trace for vcan0@ffff888031c9c000 +2 at
> j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
> j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018
>
> Call trace for vcan0@ffff888031c9c000 +1 at
> j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
> j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline]
> j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline]
> j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749
>
> Call trace for vcan0@ffff888031c9c000 -2 at
> j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
> j1939_session_destroy net/can/j1939/transport.c:285 [inline]
> __j1939_session_release net/can/j1939/transport.c:294 [inline]
> kref_put include/linux/kref.h:65 [inline]
>
> Do we want to update
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882
> in order to also try tracing refcount for j1939_session ?
Ack.
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-21 10:00 ` Oleksij Rempel
@ 2025-11-21 10:19 ` Tetsuo Handa
2025-11-21 10:31 ` Oleksij Rempel
0 siblings, 1 reply; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-21 10:19 UTC (permalink / raw)
To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde
On 2025/11/21 19:00, Oleksij Rempel wrote:
>> Do we want to update
>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882
>> in order to also try tracing refcount for j1939_session ?
>
> Ack.
>
I see.
By the way, I am thinking
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 88e7160d4248..f12679446990 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -477,16 +477,22 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct net_device *ndev;
ndev = dev_get_by_index(net, addr->can_ifindex);
if (!ndev) {
ret = -ENODEV;
goto out_release_sock;
}
+ if (ndev->reg_state != NETREG_REGISTERED) {
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
can_ml = can_get_ml_priv(ndev);
if (!can_ml) {
dev_put(ndev);
ret = -ENODEV;
goto out_release_sock;
}
if (!(ndev->flags & IFF_UP)) {
as an alternative approach for
https://lkml.kernel.org/r/9a3f9a95-1f58-4d67-9ab4-1ca360f86f79@I-love.SAKURA.ne.jp
because I consider that getting a new refcount on net_device should be avoided
when NETDEV_UNREGISTER event has already started.
Maybe we can do similar thing for j1939_session in order to avoid getting a new
refcount on j1939_priv when NETDEV_UNREGISTER event has already started.
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fbf5c8001c9d..b22568fecba5 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
struct j1939_session *session;
struct j1939_sk_buff_cb *skcb;
+ if (priv->ndev->reg_state != NETREG_REGISTERED)
+ return NULL;
session = kzalloc(sizeof(*session), gfp_any());
if (!session)
return NULL;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-21 10:19 ` Tetsuo Handa
@ 2025-11-21 10:31 ` Oleksij Rempel
2025-11-22 7:00 ` Tetsuo Handa
0 siblings, 1 reply; 8+ messages in thread
From: Oleksij Rempel @ 2025-11-21 10:31 UTC (permalink / raw)
To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde
On Fri, Nov 21, 2025 at 07:19:24PM +0900, Tetsuo Handa wrote:
> On 2025/11/21 19:00, Oleksij Rempel wrote:
> >> Do we want to update
> >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882
> >> in order to also try tracing refcount for j1939_session ?
> >
> > Ack.
> >
>
> I see.
>
> By the way, I am thinking
>
> diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
> index 88e7160d4248..f12679446990 100644
> --- a/net/can/j1939/socket.c
> +++ b/net/can/j1939/socket.c
> @@ -477,16 +477,22 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
> struct net_device *ndev;
>
> ndev = dev_get_by_index(net, addr->can_ifindex);
> if (!ndev) {
> ret = -ENODEV;
> goto out_release_sock;
> }
>
> + if (ndev->reg_state != NETREG_REGISTERED) {
> + dev_put(ndev);
> + ret = -ENODEV;
> + goto out_release_sock;
> + }
> +
> can_ml = can_get_ml_priv(ndev);
> if (!can_ml) {
> dev_put(ndev);
> ret = -ENODEV;
> goto out_release_sock;
> }
>
> if (!(ndev->flags & IFF_UP)) {
>
> as an alternative approach for
> https://lkml.kernel.org/r/9a3f9a95-1f58-4d67-9ab4-1ca360f86f79@I-love.SAKURA.ne.jp
> because I consider that getting a new refcount on net_device should be avoided
> when NETDEV_UNREGISTER event has already started.
>
> Maybe we can do similar thing for j1939_session in order to avoid getting a new
> refcount on j1939_priv when NETDEV_UNREGISTER event has already started.
>
> diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
> index fbf5c8001c9d..b22568fecba5 100644
> --- a/net/can/j1939/transport.c
> +++ b/net/can/j1939/transport.c
> @@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
> struct j1939_session *session;
> struct j1939_sk_buff_cb *skcb;
>
> + if (priv->ndev->reg_state != NETREG_REGISTERED)
> + return NULL;
> session = kzalloc(sizeof(*session), gfp_any());
> if (!session)
> return NULL;
>
Yes, it make sense to proactively prevent the session. Good idea.
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-21 10:31 ` Oleksij Rempel
@ 2025-11-22 7:00 ` Tetsuo Handa
2025-11-22 13:03 ` Tetsuo Handa
0 siblings, 1 reply; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-22 7:00 UTC (permalink / raw)
To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde
On 2025/11/21 19:31, Oleksij Rempel wrote:
>> diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
>> index fbf5c8001c9d..b22568fecba5 100644
>> --- a/net/can/j1939/transport.c
>> +++ b/net/can/j1939/transport.c
>> @@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
>> struct j1939_session *session;
>> struct j1939_sk_buff_cb *skcb;
>>
>> + if (priv->ndev->reg_state != NETREG_REGISTERED)
>> + return NULL;
>> session = kzalloc(sizeof(*session), gfp_any());
>> if (!session)
>> return NULL;
>>
>
> Yes, it make sense to proactively prevent the session. Good idea.
>
Done updating my patch. But it turned out that your guess was correct before
syzbot tries my patch. A sample j1939 program (generated by Google AI search)
succeeded to trigger this race if below delay injection patch is applied.
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1492,6 +1494,9 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
struct j1939_session *session;
struct j1939_sk_buff_cb *skcb;
+ printk("j1939_session_new() delay start\n");
+ mdelay(5000); // <= Run "ip link del dev vcan0 type vcan" here.
+ printk("j1939_session_new() delay end\n");
session = kzalloc(sizeof(*session), gfp_any());
if (!session)
return NULL;
So, not only we need to make sure that all existing j1939_session are destroyed
but also we need to make sure that no new j1939_session is created if underlying
net_device is no longer in NETREG_REGISTERED state.
F.Y.I. Below change did not help. Also, although j1939_netdev_notify(NETDEV_UNREGISTER)
is called periodically, j1939_cancel_active_session(priv, NULL) is called only once
because j1939_sk_netdev_event_unregister(priv) calls j1939_priv_set(priv->ndev, NULL)
from __j1939_rx_release(). There is currently no mechanism to retry when we hit
the race above.
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref)
rx_kref);
j1939_can_rx_unregister(priv);
+ j1939_cancel_active_session(priv, NULL);
j1939_ecu_unmap_all(priv);
j1939_priv_set(priv->ndev, NULL);
mutex_unlock(&j1939_netdev_lock);
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
2025-11-22 7:00 ` Tetsuo Handa
@ 2025-11-22 13:03 ` Tetsuo Handa
0 siblings, 0 replies; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-22 13:03 UTC (permalink / raw)
To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde
On 2025/11/22 16:00, Tetsuo Handa wrote:
> So, not only we need to make sure that all existing j1939_session are destroyed
> but also we need to make sure that no new j1939_session is created if underlying
> net_device is no longer in NETREG_REGISTERED state.
For your testing, here is a delay injection patch and a complete reproducer.
---------- delay injection patch start ----------
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fbf5c8001c9d..601a32397f72 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1492,6 +1492,9 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
struct j1939_session *session;
struct j1939_sk_buff_cb *skcb;
+ pr_info("%s() delay start\n", __func__);
+ mdelay(5000);
+ pr_info("%s() delay end\n", __func__);
session = kzalloc(sizeof(*session), gfp_any());
if (!session)
return NULL;
---------- delay injection patch end ----------
---------- j1939_example.c start ----------
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sched.h>
#include <linux/can.h>
#include <linux/can/j1939.h>
#include <net/if.h>
#include <errno.h>
#define IF_NAME "vcan0"
#define SRC_ADDR 0x20 // SA
#define DST_ADDR 0x30 // DA
#define PGN_TX 0x12300 // Sender PGN
#define PGN_RX 0x12300 // Receiver PGN
static void sender_task(int sock_s);
static void receiver_task(int sock_r);
int main(int argc, char *argv[])
{
int sock_s, sock_r;
struct sockaddr_can addr_s, addr_r;
struct ifreq ifr;
// Create a new namespace.
if (unshare(CLONE_NEWNET)) {
perror("unshare failed");
return 1;
}
// Create vcan0 in that namespace.
system("/usr/sbin/ip link add dev vcan0 type vcan");
system("/usr/sbin/ip link set up vcan0");
sock_s = socket(PF_CAN, SOCK_DGRAM, CAN_J1939);
sock_r = socket(PF_CAN, SOCK_DGRAM, CAN_J1939);
if (sock_s < 0 || sock_r < 0) {
perror("socket creation failed");
return 1;
}
strcpy(ifr.ifr_name, IF_NAME);
if (ioctl(sock_s, SIOCGIFINDEX, &ifr) < 0) {
perror("ioctl SIOCGIFINDEX failed");
return 1;
}
addr_s.can_family = AF_CAN;
addr_s.can_ifindex = ifr.ifr_ifindex;
addr_s.can_addr.j1939.name = J1939_NO_NAME;
addr_s.can_addr.j1939.addr = SRC_ADDR;
addr_s.can_addr.j1939.pgn = J1939_NO_PGN;
// Delete vcan0 in that namespace while bind() on vcan0 is in progress.
if (fork() == 0) {
sleep(1);
system("/usr/sbin/ip link del dev vcan0 type vcan");
_exit(0);
}
// Delay is injected by the kernel side.
if (bind(sock_s, (struct sockaddr *)&addr_s, sizeof(addr_s)) < 0) {
perror("sender bind failed");
return 1;
}
addr_r.can_family = AF_CAN;
addr_r.can_ifindex = ifr.ifr_ifindex;
addr_r.can_addr.j1939.name = J1939_NO_NAME;
addr_r.can_addr.j1939.addr = DST_ADDR;
addr_r.can_addr.j1939.pgn = PGN_RX;
if (bind(sock_r, (struct sockaddr *)&addr_r, sizeof(addr_r)) < 0) {
perror("receiver bind failed");
return 1;
}
printf("J1939 sockets set up on %s\n", IF_NAME);
printf("Sender (SA 0x%02X) and Receiver (PGN 0x%05X) ready.\n", SRC_ADDR, PGN_RX);
sender_task(sock_s);
receiver_task(sock_r);
return 0;
}
static void sender_task(int sock_s) {
struct sockaddr_can addr_dest;
socklen_t len_dest = sizeof(addr_dest);
char data[] = "Hello J1939 Localhost!";
addr_dest.can_family = AF_CAN;
addr_dest.can_ifindex = 0;
addr_dest.can_addr.j1939.name = J1939_NO_NAME;
addr_dest.can_addr.j1939.addr = DST_ADDR;
addr_dest.can_addr.j1939.pgn = PGN_TX;
printf("Sending message: \"%s\" (Length: %lu)\n", data, (unsigned long)strlen(data) + 1);
if (sendto(sock_s, data, strlen(data) + 1, 0, (struct sockaddr *)&addr_dest, len_dest) < 0) {
perror("sendto failed");
} else {
printf("Message sent successfully.\n");
}
}
static void receiver_task(int sock_r) {
struct sockaddr_can addr_src;
socklen_t len_src = sizeof(addr_src);
char buffer[256];
ssize_t bytes_received;
printf("Waiting for messages...\n");
bytes_received = recvfrom(sock_r, buffer, sizeof(buffer) - 1, 0, (struct sockaddr *)&addr_src, &len_src);
if (bytes_received < 0) {
perror("recvfrom failed");
} else {
buffer[bytes_received] = '\0';
printf("Received %zd bytes from SA 0x%02X, PGN 0x%05X: \"%s\"\n",
bytes_received, addr_src.can_addr.j1939.addr, addr_src.can_addr.j1939.pgn, buffer);
}
}
---------- j1939_example.c end ----------
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-11-22 13:03 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-20 10:11 [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 Tetsuo Handa
2025-11-21 9:06 ` Oleksij Rempel
2025-11-21 9:33 ` Tetsuo Handa
2025-11-21 10:00 ` Oleksij Rempel
2025-11-21 10:19 ` Tetsuo Handa
2025-11-21 10:31 ` Oleksij Rempel
2025-11-22 7:00 ` Tetsuo Handa
2025-11-22 13:03 ` Tetsuo Handa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).