* [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
@ 2025-11-20 10:11 Tetsuo Handa
2025-11-21 9:06 ` Oleksij Rempel
0 siblings, 1 reply; 8+ messages in thread
From: Tetsuo Handa @ 2025-11-20 10:11 UTC (permalink / raw)
To: linux-can, Network Development, Marc Kleine-Budde, Oleksij Rempel
Hello.
I am using a debug printk() patch for j1939_priv which records/counts where
refcount for j1939_priv has changed, and syzbot succeeded to record/count a
j1939_priv leak in next-20251119
( https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 ).
The output from the debug printk() patch is shown below. I think that
understanding what actions have been taken on this j1939_priv object will
help you finding the cause of j1939_priv leak bug.
Regards.
unregister_netdevice: waiting for vcan0 to become free. Usage count = 2
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_priv_create net/can/j1939/main.c:150 [inline]
j1939_netdev_start+0x1de/0xa30 net/can/j1939/main.c:282
j1939_sk_bind+0x926/0xca0 net/can/j1939/socket.c:498
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_priv_get net/can/j1939/main.c:191 [inline]
j1939_can_rx_register net/can/j1939/main.c:199 [inline]
j1939_netdev_start+0x615/0xa30 net/can/j1939/main.c:305
j1939_sk_bind+0x926/0xca0 net/can/j1939/socket.c:498
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_sk_bind+0xa02/0xca0 net/can/j1939/socket.c:510
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_jsk_add net/can/j1939/socket.c:81 [inline]
j1939_sk_bind+0x769/0xca0 net/can/j1939/socket.c:530
__sys_bind_socket net/socket.c:1878 [inline]
__sys_bind+0x2c6/0x3e0 net/socket.c:1909
Call trace for vcan0@ffff888031c9c000 +2 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018
j1939_sk_send_loop net/can/j1939/socket.c:1159 [inline]
j1939_sk_sendmsg+0xaf8/0x1350 net/can/j1939/socket.c:1282
sock_sendmsg_nosec net/socket.c:727 [inline]
__sock_sendmsg+0x21c/0x270 net/socket.c:746
Call trace for vcan0@ffff888031c9c000 +2 at
j1939_priv_get net/can/j1939/main.c:191 [inline]
j1939_can_recv+0x17f/0xa30 net/can/j1939/main.c:54
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 +1 at
j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503
j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline]
j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline]
j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749
j1939_tp_cmd_recv net/can/j1939/transport.c:2071 [inline]
j1939_tp_recv+0xb24/0x1040 net/can/j1939/transport.c:2158
j1939_can_recv+0x6a0/0xa30 net/can/j1939/main.c:108
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -2 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_can_recv+0x6e0/0xa30 net/can/j1939/main.c:115
deliver net/can/af_can.c:575 [inline]
can_rcv_filter+0x357/0x7d0 net/can/af_can.c:609
can_receive+0x312/0x450 net/can/af_can.c:666
can_rcv+0x145/0x270 net/can/af_can.c:690
__netif_receive_skb_one_core net/core/dev.c:6130 [inline]
__netif_receive_skb+0x164/0x380 net/core/dev.c:6243
process_backlog+0x622/0x1530 net/core/dev.c:6595
__napi_poll+0xae/0x320 net/core/dev.c:7659
napi_poll net/core/dev.c:7722 [inline]
net_rx_action+0x672/0xe50 net/core/dev.c:7874
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -2 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_session_destroy net/can/j1939/transport.c:285 [inline]
__j1939_session_release net/can/j1939/transport.c:294 [inline]
kref_put include/linux/kref.h:65 [inline]
j1939_session_put+0x2f0/0x460 net/can/j1939/transport.c:299
j1939_tp_rxtimer+0x177/0x3d0 net/can/j1939/transport.c:1266
__run_hrtimer kernel/time/hrtimer.c:1777 [inline]
__hrtimer_run_queues+0x51c/0xc70 kernel/time/hrtimer.c:1841
hrtimer_run_softirq+0x187/0x2b0 kernel/time/hrtimer.c:1858
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_jsk_del net/can/j1939/socket.c:94 [inline]
j1939_sk_release+0x408/0x7c0 net/can/j1939/socket.c:649
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_can_rx_unregister net/can/j1939/main.c:221 [inline]
__j1939_rx_release net/can/j1939/main.c:230 [inline]
kref_put_mutex include/linux/kref.h:86 [inline]
j1939_netdev_stop+0xa6/0x190 net/can/j1939/main.c:325
j1939_sk_release+0x471/0x7c0 net/can/j1939/socket.c:654
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_sk_release+0x471/0x7c0 net/can/j1939/socket.c:654
__sock_release net/socket.c:662 [inline]
sock_close+0xc3/0x240 net/socket.c:1459
__fput+0x44c/0xa70 fs/file_table.c:468
task_work_run+0x1d4/0x260 kernel/task_work.c:233
resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
__exit_to_user_mode_loop kernel/entry/common.c:44 [inline]
exit_to_user_mode_loop+0xff/0x4f0 kernel/entry/common.c:75
__exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline]
syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline]
syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline]
syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline]
do_syscall_64+0x2e9/0xfa0 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Call trace for vcan0@ffff888031c9c000 -1 at
j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184
j1939_sk_sock_destruct+0x52/0x90 net/can/j1939/socket.c:386
__sk_destruct+0x85/0x880 net/core/sock.c:2350
rcu_do_batch kernel/rcu/tree.c:2605 [inline]
rcu_core+0xcab/0x1770 kernel/rcu/tree.c:2861
handle_softirqs+0x27d/0x880 kernel/softirq.c:626
balance for vcan0@j1939_priv is 1
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-20 10:11 [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 Tetsuo Handa @ 2025-11-21 9:06 ` Oleksij Rempel 2025-11-21 9:33 ` Tetsuo Handa 0 siblings, 1 reply; 8+ messages in thread From: Oleksij Rempel @ 2025-11-21 9:06 UTC (permalink / raw) To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde Hello Tetsuo, On Thu, Nov 20, 2025 at 07:11:22PM +0900, Tetsuo Handa wrote: > Hello. > > I am using a debug printk() patch for j1939_priv which records/counts where > refcount for j1939_priv has changed, and syzbot succeeded to record/count a > j1939_priv leak in next-20251119 > ( https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 ). > > The output from the debug printk() patch is shown below. I think that > understanding what actions have been taken on this j1939_priv object will > help you finding the cause of j1939_priv leak bug. Hm, looks like we have a race where new session is created in j1939_xtp_rx_rts(), just at the moment where we call j1939_can_rx_unregister(). Haw about following change: --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref) rx_kref); j1939_can_rx_unregister(priv); + j1939_cancel_active_session(priv, NULL); j1939_ecu_unmap_all(priv); j1939_priv_set(priv->ndev, NULL); mutex_unlock(&j1939_netdev_lock); Best Regards, Oleksij -- Pengutronix e.K. | | Steuerwalder Str. 21 | http://www.pengutronix.de/ | 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-21 9:06 ` Oleksij Rempel @ 2025-11-21 9:33 ` Tetsuo Handa 2025-11-21 10:00 ` Oleksij Rempel 0 siblings, 1 reply; 8+ messages in thread From: Tetsuo Handa @ 2025-11-21 9:33 UTC (permalink / raw) To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde On 2025/11/21 18:06, Oleksij Rempel wrote: > Hm, looks like we have a race where new session is created in > j1939_xtp_rx_rts(), just at the moment where we call > j1939_can_rx_unregister(). > > Haw about following change: > > --- a/net/can/j1939/main.c > +++ b/net/can/j1939/main.c > @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref) > rx_kref); > > j1939_can_rx_unregister(priv); > + j1939_cancel_active_session(priv, NULL); > j1939_ecu_unmap_all(priv); > j1939_priv_set(priv->ndev, NULL); > mutex_unlock(&j1939_netdev_lock); > Well, j1939_cancel_active_session(priv, NULL) is already called from j1939_netdev_notify(NETDEV_UNREGISTER). Unless a session is recreated after NETDEV_UNREGISTER event was handled, I can't imagine such race. We can see that there are three j1939_session_new() calls but only two j1939_session_destroy() calls. There might be a refcount leak on j1939_session which prevents j1939_priv from dropping final refcount. Call trace for vcan0@ffff888031c9c000 +2 at j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503 j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018 Call trace for vcan0@ffff888031c9c000 +1 at j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503 j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline] j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline] j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749 Call trace for vcan0@ffff888031c9c000 -2 at j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184 j1939_session_destroy net/can/j1939/transport.c:285 [inline] __j1939_session_release net/can/j1939/transport.c:294 [inline] kref_put include/linux/kref.h:65 [inline] Do we want to update https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882 in order to also try tracing refcount for j1939_session ? ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-21 9:33 ` Tetsuo Handa @ 2025-11-21 10:00 ` Oleksij Rempel 2025-11-21 10:19 ` Tetsuo Handa 0 siblings, 1 reply; 8+ messages in thread From: Oleksij Rempel @ 2025-11-21 10:00 UTC (permalink / raw) To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde On Fri, Nov 21, 2025 at 06:33:02PM +0900, Tetsuo Handa wrote: > On 2025/11/21 18:06, Oleksij Rempel wrote: > > Hm, looks like we have a race where new session is created in > > j1939_xtp_rx_rts(), just at the moment where we call > > j1939_can_rx_unregister(). > > > > Haw about following change: > > > > --- a/net/can/j1939/main.c > > +++ b/net/can/j1939/main.c > > @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref) > > rx_kref); > > > > j1939_can_rx_unregister(priv); > > + j1939_cancel_active_session(priv, NULL); > > j1939_ecu_unmap_all(priv); > > j1939_priv_set(priv->ndev, NULL); > > mutex_unlock(&j1939_netdev_lock); > > > > Well, j1939_cancel_active_session(priv, NULL) is already called from > j1939_netdev_notify(NETDEV_UNREGISTER). Unless a session is recreated > after NETDEV_UNREGISTER event was handled, I can't imagine such race. > > We can see that there are three j1939_session_new() calls but only > two j1939_session_destroy() calls. There might be a refcount leak on > j1939_session which prevents j1939_priv from dropping final refcount. > > Call trace for vcan0@ffff888031c9c000 +2 at > j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503 > j1939_tp_send+0x338/0x8c0 net/can/j1939/transport.c:2018 > > Call trace for vcan0@ffff888031c9c000 +1 at > j1939_session_new+0x127/0x450 net/can/j1939/transport.c:1503 > j1939_session_fresh_new net/can/j1939/transport.c:1543 [inline] > j1939_xtp_rx_rts_session_new net/can/j1939/transport.c:1628 [inline] > j1939_xtp_rx_rts+0xd16/0x18b0 net/can/j1939/transport.c:1749 > > Call trace for vcan0@ffff888031c9c000 -2 at > j1939_priv_put+0x23/0x370 net/can/j1939/main.c:184 > j1939_session_destroy net/can/j1939/transport.c:285 [inline] > __j1939_session_release net/can/j1939/transport.c:294 [inline] > kref_put include/linux/kref.h:65 [inline] > > Do we want to update > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882 > in order to also try tracing refcount for j1939_session ? Ack. -- Pengutronix e.K. | | Steuerwalder Str. 21 | http://www.pengutronix.de/ | 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-21 10:00 ` Oleksij Rempel @ 2025-11-21 10:19 ` Tetsuo Handa 2025-11-21 10:31 ` Oleksij Rempel 0 siblings, 1 reply; 8+ messages in thread From: Tetsuo Handa @ 2025-11-21 10:19 UTC (permalink / raw) To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde On 2025/11/21 19:00, Oleksij Rempel wrote: >> Do we want to update >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882 >> in order to also try tracing refcount for j1939_session ? > > Ack. > I see. By the way, I am thinking diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 88e7160d4248..f12679446990 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -477,16 +477,22 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net_device *ndev; ndev = dev_get_by_index(net, addr->can_ifindex); if (!ndev) { ret = -ENODEV; goto out_release_sock; } + if (ndev->reg_state != NETREG_REGISTERED) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + can_ml = can_get_ml_priv(ndev); if (!can_ml) { dev_put(ndev); ret = -ENODEV; goto out_release_sock; } if (!(ndev->flags & IFF_UP)) { as an alternative approach for https://lkml.kernel.org/r/9a3f9a95-1f58-4d67-9ab4-1ca360f86f79@I-love.SAKURA.ne.jp because I consider that getting a new refcount on net_device should be avoided when NETDEV_UNREGISTER event has already started. Maybe we can do similar thing for j1939_session in order to avoid getting a new refcount on j1939_priv when NETDEV_UNREGISTER event has already started. diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fbf5c8001c9d..b22568fecba5 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, struct j1939_session *session; struct j1939_sk_buff_cb *skcb; + if (priv->ndev->reg_state != NETREG_REGISTERED) + return NULL; session = kzalloc(sizeof(*session), gfp_any()); if (!session) return NULL; ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-21 10:19 ` Tetsuo Handa @ 2025-11-21 10:31 ` Oleksij Rempel 2025-11-22 7:00 ` Tetsuo Handa 0 siblings, 1 reply; 8+ messages in thread From: Oleksij Rempel @ 2025-11-21 10:31 UTC (permalink / raw) To: Tetsuo Handa; +Cc: linux-can, Network Development, Marc Kleine-Budde On Fri, Nov 21, 2025 at 07:19:24PM +0900, Tetsuo Handa wrote: > On 2025/11/21 19:00, Oleksij Rempel wrote: > >> Do we want to update > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/net/can/j1939?id=5ac798f79b48065b0284216c7a0057271185a882 > >> in order to also try tracing refcount for j1939_session ? > > > > Ack. > > > > I see. > > By the way, I am thinking > > diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c > index 88e7160d4248..f12679446990 100644 > --- a/net/can/j1939/socket.c > +++ b/net/can/j1939/socket.c > @@ -477,16 +477,22 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) > struct net_device *ndev; > > ndev = dev_get_by_index(net, addr->can_ifindex); > if (!ndev) { > ret = -ENODEV; > goto out_release_sock; > } > > + if (ndev->reg_state != NETREG_REGISTERED) { > + dev_put(ndev); > + ret = -ENODEV; > + goto out_release_sock; > + } > + > can_ml = can_get_ml_priv(ndev); > if (!can_ml) { > dev_put(ndev); > ret = -ENODEV; > goto out_release_sock; > } > > if (!(ndev->flags & IFF_UP)) { > > as an alternative approach for > https://lkml.kernel.org/r/9a3f9a95-1f58-4d67-9ab4-1ca360f86f79@I-love.SAKURA.ne.jp > because I consider that getting a new refcount on net_device should be avoided > when NETDEV_UNREGISTER event has already started. > > Maybe we can do similar thing for j1939_session in order to avoid getting a new > refcount on j1939_priv when NETDEV_UNREGISTER event has already started. > > diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c > index fbf5c8001c9d..b22568fecba5 100644 > --- a/net/can/j1939/transport.c > +++ b/net/can/j1939/transport.c > @@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, > struct j1939_session *session; > struct j1939_sk_buff_cb *skcb; > > + if (priv->ndev->reg_state != NETREG_REGISTERED) > + return NULL; > session = kzalloc(sizeof(*session), gfp_any()); > if (!session) > return NULL; > Yes, it make sense to proactively prevent the session. Good idea. -- Pengutronix e.K. | | Steuerwalder Str. 21 | http://www.pengutronix.de/ | 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-21 10:31 ` Oleksij Rempel @ 2025-11-22 7:00 ` Tetsuo Handa 2025-11-22 13:03 ` Tetsuo Handa 0 siblings, 1 reply; 8+ messages in thread From: Tetsuo Handa @ 2025-11-22 7:00 UTC (permalink / raw) To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde On 2025/11/21 19:31, Oleksij Rempel wrote: >> diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c >> index fbf5c8001c9d..b22568fecba5 100644 >> --- a/net/can/j1939/transport.c >> +++ b/net/can/j1939/transport.c >> @@ -1492,6 +1492,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, >> struct j1939_session *session; >> struct j1939_sk_buff_cb *skcb; >> >> + if (priv->ndev->reg_state != NETREG_REGISTERED) >> + return NULL; >> session = kzalloc(sizeof(*session), gfp_any()); >> if (!session) >> return NULL; >> > > Yes, it make sense to proactively prevent the session. Good idea. > Done updating my patch. But it turned out that your guess was correct before syzbot tries my patch. A sample j1939 program (generated by Google AI search) succeeded to trigger this race if below delay injection patch is applied. --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1492,6 +1494,9 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, struct j1939_session *session; struct j1939_sk_buff_cb *skcb; + printk("j1939_session_new() delay start\n"); + mdelay(5000); // <= Run "ip link del dev vcan0 type vcan" here. + printk("j1939_session_new() delay end\n"); session = kzalloc(sizeof(*session), gfp_any()); if (!session) return NULL; So, not only we need to make sure that all existing j1939_session are destroyed but also we need to make sure that no new j1939_session is created if underlying net_device is no longer in NETREG_REGISTERED state. F.Y.I. Below change did not help. Also, although j1939_netdev_notify(NETDEV_UNREGISTER) is called periodically, j1939_cancel_active_session(priv, NULL) is called only once because j1939_sk_netdev_event_unregister(priv) calls j1939_priv_set(priv->ndev, NULL) from __j1939_rx_release(). There is currently no mechanism to retry when we hit the race above. --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -214,6 +214,7 @@ static void __j1939_rx_release(struct kref *kref) rx_kref); j1939_can_rx_unregister(priv); + j1939_cancel_active_session(priv, NULL); j1939_ecu_unmap_all(priv); j1939_priv_set(priv->ndev, NULL); mutex_unlock(&j1939_netdev_lock); ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 2025-11-22 7:00 ` Tetsuo Handa @ 2025-11-22 13:03 ` Tetsuo Handa 0 siblings, 0 replies; 8+ messages in thread From: Tetsuo Handa @ 2025-11-22 13:03 UTC (permalink / raw) To: Oleksij Rempel; +Cc: linux-can, Network Development, Marc Kleine-Budde On 2025/11/22 16:00, Tetsuo Handa wrote: > So, not only we need to make sure that all existing j1939_session are destroyed > but also we need to make sure that no new j1939_session is created if underlying > net_device is no longer in NETREG_REGISTERED state. For your testing, here is a delay injection patch and a complete reproducer. ---------- delay injection patch start ---------- diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fbf5c8001c9d..601a32397f72 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1492,6 +1492,9 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, struct j1939_session *session; struct j1939_sk_buff_cb *skcb; + pr_info("%s() delay start\n", __func__); + mdelay(5000); + pr_info("%s() delay end\n", __func__); session = kzalloc(sizeof(*session), gfp_any()); if (!session) return NULL; ---------- delay injection patch end ---------- ---------- j1939_example.c start ---------- #define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <sched.h> #include <linux/can.h> #include <linux/can/j1939.h> #include <net/if.h> #include <errno.h> #define IF_NAME "vcan0" #define SRC_ADDR 0x20 // SA #define DST_ADDR 0x30 // DA #define PGN_TX 0x12300 // Sender PGN #define PGN_RX 0x12300 // Receiver PGN static void sender_task(int sock_s); static void receiver_task(int sock_r); int main(int argc, char *argv[]) { int sock_s, sock_r; struct sockaddr_can addr_s, addr_r; struct ifreq ifr; // Create a new namespace. if (unshare(CLONE_NEWNET)) { perror("unshare failed"); return 1; } // Create vcan0 in that namespace. system("/usr/sbin/ip link add dev vcan0 type vcan"); system("/usr/sbin/ip link set up vcan0"); sock_s = socket(PF_CAN, SOCK_DGRAM, CAN_J1939); sock_r = socket(PF_CAN, SOCK_DGRAM, CAN_J1939); if (sock_s < 0 || sock_r < 0) { perror("socket creation failed"); return 1; } strcpy(ifr.ifr_name, IF_NAME); if (ioctl(sock_s, SIOCGIFINDEX, &ifr) < 0) { perror("ioctl SIOCGIFINDEX failed"); return 1; } addr_s.can_family = AF_CAN; addr_s.can_ifindex = ifr.ifr_ifindex; addr_s.can_addr.j1939.name = J1939_NO_NAME; addr_s.can_addr.j1939.addr = SRC_ADDR; addr_s.can_addr.j1939.pgn = J1939_NO_PGN; // Delete vcan0 in that namespace while bind() on vcan0 is in progress. if (fork() == 0) { sleep(1); system("/usr/sbin/ip link del dev vcan0 type vcan"); _exit(0); } // Delay is injected by the kernel side. if (bind(sock_s, (struct sockaddr *)&addr_s, sizeof(addr_s)) < 0) { perror("sender bind failed"); return 1; } addr_r.can_family = AF_CAN; addr_r.can_ifindex = ifr.ifr_ifindex; addr_r.can_addr.j1939.name = J1939_NO_NAME; addr_r.can_addr.j1939.addr = DST_ADDR; addr_r.can_addr.j1939.pgn = PGN_RX; if (bind(sock_r, (struct sockaddr *)&addr_r, sizeof(addr_r)) < 0) { perror("receiver bind failed"); return 1; } printf("J1939 sockets set up on %s\n", IF_NAME); printf("Sender (SA 0x%02X) and Receiver (PGN 0x%05X) ready.\n", SRC_ADDR, PGN_RX); sender_task(sock_s); receiver_task(sock_r); return 0; } static void sender_task(int sock_s) { struct sockaddr_can addr_dest; socklen_t len_dest = sizeof(addr_dest); char data[] = "Hello J1939 Localhost!"; addr_dest.can_family = AF_CAN; addr_dest.can_ifindex = 0; addr_dest.can_addr.j1939.name = J1939_NO_NAME; addr_dest.can_addr.j1939.addr = DST_ADDR; addr_dest.can_addr.j1939.pgn = PGN_TX; printf("Sending message: \"%s\" (Length: %lu)\n", data, (unsigned long)strlen(data) + 1); if (sendto(sock_s, data, strlen(data) + 1, 0, (struct sockaddr *)&addr_dest, len_dest) < 0) { perror("sendto failed"); } else { printf("Message sent successfully.\n"); } } static void receiver_task(int sock_r) { struct sockaddr_can addr_src; socklen_t len_src = sizeof(addr_src); char buffer[256]; ssize_t bytes_received; printf("Waiting for messages...\n"); bytes_received = recvfrom(sock_r, buffer, sizeof(buffer) - 1, 0, (struct sockaddr *)&addr_src, &len_src); if (bytes_received < 0) { perror("recvfrom failed"); } else { buffer[bytes_received] = '\0'; printf("Received %zd bytes from SA 0x%02X, PGN 0x%05X: \"%s\"\n", bytes_received, addr_src.can_addr.j1939.addr, addr_src.can_addr.j1939.pgn, buffer); } } ---------- j1939_example.c end ---------- ^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-11-22 13:03 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-11-20 10:11 [can/j1939] unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 Tetsuo Handa 2025-11-21 9:06 ` Oleksij Rempel 2025-11-21 9:33 ` Tetsuo Handa 2025-11-21 10:00 ` Oleksij Rempel 2025-11-21 10:19 ` Tetsuo Handa 2025-11-21 10:31 ` Oleksij Rempel 2025-11-22 7:00 ` Tetsuo Handa 2025-11-22 13:03 ` Tetsuo Handa
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).