Linux CAN drivers development
 help / color / mirror / Atom feed
* [PATCH v2 (repost)] can: j1939: use netdevice_tracker for j1939_{priv,session,ecu} tracking
@ 2026-05-29  1:31 Tetsuo Handa
  2026-06-09 18:49 ` sashiko-bot
  0 siblings, 1 reply; 2+ messages in thread
From: Tetsuo Handa @ 2026-05-29  1:31 UTC (permalink / raw)
  To: linux-can, Robin van der Gracht, Oleksij Rempel, kernel,
	Oliver Hartkopp, Marc Kleine-Budde

syzbot is still reporting

  unregister_netdevice: waiting for vcan0 to become free. Usage count = 2

problem. A debug printk() patch in linux-next-20260508 identified that
there is dev_hold()/dev_put() imbalance in j1939_priv management.

  Call trace for vcan0[26] +4 at
     __dev_hold include/linux/netdevice.h:4470 [inline]
     netdev_hold include/linux/netdevice.h:4513 [inline]
     dev_hold include/linux/netdevice.h:4536 [inline]
     j1939_priv_create net/can/j1939/main.c:140 [inline]
     j1939_netdev_start+0x36b/0xc10 net/can/j1939/main.c:268
     j1939_sk_bind+0x853/0xb30 net/can/j1939/socket.c:506
     __sys_bind_socket net/socket.c:1948 [inline]
     __sys_bind+0x2e9/0x410 net/socket.c:1979

  Call trace for vcan0[28] -3 at
     __dev_put include/linux/netdevice.h:4456 [inline]
     netdev_put include/linux/netdevice.h:4523 [inline]
     dev_put include/linux/netdevice.h:4548 [inline]
     __j1939_priv_release net/can/j1939/main.c:166 [inline]
     kref_put include/linux/kref.h:65 [inline]
     j1939_priv_put+0x128/0x270 net/can/j1939/main.c:172
     j1939_sk_sock_destruct+0x52/0x90 net/can/j1939/socket.c:388
     __sk_destruct+0x8d/0x9d0 net/core/sock.c:2352
     rcu_do_batch kernel/rcu/tree.c:2617 [inline]
     rcu_core kernel/rcu/tree.c:2869 [inline]
     rcu_cpu_kthread+0x99e/0x1470 kernel/rcu/tree.c:2957
     smpboot_thread_fn+0x541/0xa50 kernel/smpboot.c:160
     kthread+0x388/0x470 kernel/kthread.c:436
     ret_from_fork+0x514/0xb70 arch/x86/kernel/process.c:158
     ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245

This refcount leak in j1939_priv might be caused by a refcount leak in
j1939_{session,ecu} because j1939_{session,ecu} holds a ref on j1939_priv.
For further investigation using upstream kernels, enable netdevice_tracker
in j1939_{priv,session,ecu} management.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
I'm waiting for your response. If you don't like this approach, please let me know.

 net/can/j1939/bus.c        | 2 ++
 net/can/j1939/j1939-priv.h | 3 +++
 net/can/j1939/main.c       | 8 ++++----
 net/can/j1939/transport.c  | 2 ++
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
index dc374286eeb6..cdc3c0a71937 100644
--- a/net/can/j1939/bus.c
+++ b/net/can/j1939/bus.c
@@ -20,6 +20,7 @@ static void __j1939_ecu_release(struct kref *kref)
 	struct j1939_priv *priv = ecu->priv;
 
 	list_del(&ecu->list);
+	netdev_put(priv->ndev, &ecu->priv_dev_tracker);
 	kfree(ecu);
 	j1939_priv_put(priv);
 }
@@ -155,6 +156,7 @@ struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
 	if (!ecu)
 		return ERR_PTR(-ENOMEM);
 	kref_init(&ecu->kref);
+	netdev_hold(priv->ndev, &ecu->priv_dev_tracker, gfp_any());
 	ecu->addr = J1939_IDLE_ADDR;
 	ecu->name = name;
 
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 81f58924b4ac..cf26352d1d8c 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -38,6 +38,7 @@ struct j1939_ecu {
 	struct hrtimer ac_timer;
 	struct kref kref;
 	struct j1939_priv *priv;
+	netdevice_tracker priv_dev_tracker;
 
 	/* count users, to help transport protocol decide for interaction */
 	int nusers;
@@ -60,6 +61,7 @@ struct j1939_priv {
 	rwlock_t lock;
 
 	struct net_device *ndev;
+	netdevice_tracker dev_tracker;
 
 	/* list of 256 ecu ptrs, that cache the claimed addresses.
 	 * also protected by the above lock
@@ -230,6 +232,7 @@ enum j1939_session_state {
 
 struct j1939_session {
 	struct j1939_priv *priv;
+	netdevice_tracker priv_dev_tracker;
 	struct list_head active_session_list_entry;
 	struct list_head sk_session_queue_entry;
 	struct kref kref;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 9937c04241bc..5e5e6c228f22 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -137,7 +137,7 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
 	priv->ndev = ndev;
 	kref_init(&priv->kref);
 	kref_init(&priv->rx_kref);
-	dev_hold(ndev);
+	netdev_hold(ndev, &priv->dev_tracker, GFP_KERNEL);
 
 	netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
 
@@ -163,7 +163,7 @@ static void __j1939_priv_release(struct kref *kref)
 	WARN_ON_ONCE(!list_empty(&priv->ecus));
 	WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
 
-	dev_put(ndev);
+	netdev_put(ndev, &priv->dev_tracker);
 	kfree(priv);
 }
 
@@ -281,7 +281,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
 		 */
 		kref_get(&priv_new->rx_kref);
 		mutex_unlock(&j1939_netdev_lock);
-		dev_put(ndev);
+		netdev_put(ndev, &priv->dev_tracker);
 		kfree(priv);
 		return priv_new;
 	}
@@ -298,7 +298,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
 	j1939_priv_set(ndev, NULL);
 	mutex_unlock(&j1939_netdev_lock);
 
-	dev_put(ndev);
+	netdev_put(ndev, &priv->dev_tracker);
 	kfree(priv);
 
 	return ERR_PTR(ret);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index df93d57907da..158433511552 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -283,6 +283,7 @@ static void j1939_session_destroy(struct j1939_session *session)
 		kfree_skb(skb);
 	}
 	__j1939_session_drop(session);
+	netdev_put(session->priv->ndev, &session->priv_dev_tracker);
 	j1939_priv_put(session->priv);
 	kfree(session);
 }
@@ -1515,6 +1516,7 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
 	INIT_LIST_HEAD(&session->active_session_list_entry);
 	INIT_LIST_HEAD(&session->sk_session_queue_entry);
 	kref_init(&session->kref);
+	netdev_hold(priv->ndev, &session->priv_dev_tracker, gfp_any());
 
 	j1939_priv_get(priv);
 	session->priv = priv;
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2 (repost)] can: j1939: use netdevice_tracker for j1939_{priv,session,ecu} tracking
  2026-05-29  1:31 [PATCH v2 (repost)] can: j1939: use netdevice_tracker for j1939_{priv,session,ecu} tracking Tetsuo Handa
@ 2026-06-09 18:49 ` sashiko-bot
  0 siblings, 0 replies; 2+ messages in thread
From: sashiko-bot @ 2026-06-09 18:49 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: linux-can, Marc Kleine-Budde, Oleksij Rempel, Oliver Hartkopp,
	Vincent Mailhol

Thank you for your contribution! Sashiko AI review found 5 potential issue(s) to consider:

Pre-existing issues:
- [High] In `j1939_netdev_start`, if `j1939_can_rx_register` fails, the error path forcefully calls `kfree(priv)` instead of properly dropping the reference via `j1939_priv_put(priv)`. This causes a use-after-free if a concurrent CAN RX interrupt accessed `priv` and created a session or ECU.
- [High] The destructors for `j1939_session` and `j1939_ecu` do not explicitly cancel their embedded hrtimers (`txtimer`, `rxtimer`, `ac_timer`) before freeing the memory.
- [High] Using `gfp_any()` under a locked context (`priv->lock`) may resolve to `GFP_KERNEL` and cause the thread to sleep while atomic.
- [High] Double dropping of SKB references in `j1939_session_destroy` causes premature SKB freeing, refcount underflow, and potential Use-After-Free.
- [High] Unprotected list deletion in `__j1939_ecu_release` risks list corruption if `j1939_ecu_put` is called without holding the global write lock.
--

commit 7da6da8c70916fd416a2173c592294f932e47769
Author: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Subject: can: j1939: use netdevice_tracker for j1939_{priv,session,ecu} tracking

This patch converts j1939_{priv,session,ecu} tracking to use the
netdevice_tracker API. It replaces dev_hold() and dev_put() with
netdev_hold() and netdev_put() to help debug a reference count leak
reported by syzbot.

> diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
> index dc374286eeb6e..cdc3c0a719379 100644
> --- a/net/can/j1939/bus.c
> +++ b/net/can/j1939/bus.c
> @@ -20,6 +20,7 @@ static void __j1939_ecu_release(struct kref *kref)
>  	struct j1939_priv *priv = ecu->priv;
>  
>  	list_del(&ecu->list);

[Severity: High]
This isn't a bug introduced by this patch, but does this unprotected list
deletion risk list corruption if __j1939_ecu_release() is called without
holding the global write lock?

If the lock is not held, it races with other readers/writers of priv->ecus.
Conversely, if the lock is required to be held, calling j1939_priv_put(priv)
later in this function could free priv while the lock is still held, leading
to a use-after-free on the lock itself during unlock.

> +	netdev_put(priv->ndev, &ecu->priv_dev_tracker);
>  	kfree(ecu);

[Severity: High]
This is a pre-existing issue, but does kfree(ecu) risk a use-after-free if
called while ecu->ac_timer is pending?

If this memory is freed without explicitly canceling the hrtimer, an
asynchronous timer callback might subsequently execute on the freed memory.

>  	j1939_priv_put(priv);
>  }
> @@ -155,6 +156,7 @@ struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
>  	if (!ecu)
>  		return ERR_PTR(-ENOMEM);
>  	kref_init(&ecu->kref);
> +	netdev_hold(priv->ndev, &ecu->priv_dev_tracker, gfp_any());

[Severity: High]
This is a pre-existing issue, but will passing gfp_any() while holding
priv->lock cause the thread to sleep while atomic?

The gfp_any() helper checks in_softirq(). If priv->lock (an rwlock) was
acquired via write_lock_irqsave or write_lock in process context, in_softirq()
will evaluate to false, causing gfp_any() to return GFP_KERNEL. Allocating
memory with GFP_KERNEL while holding an rwlock can cause the thread to sleep
while atomic.

>  	ecu->addr = J1939_IDLE_ADDR;
>  	ecu->name = name;
>  

[ ... ]

> diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
> index 9937c04241bc3..5e5e6c228f225 100644
> --- a/net/can/j1939/main.c
> +++ b/net/can/j1939/main.c

[ ... ]

> @@ -298,7 +298,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
>  	j1939_priv_set(ndev, NULL);
>  	mutex_unlock(&j1939_netdev_lock);
>  
> -	dev_put(ndev);
> +	netdev_put(ndev, &priv->dev_tracker);
>  	kfree(priv);

[Severity: High]
This is a pre-existing issue, but does this kfree(priv) risk a use-after-free?

In j1939_netdev_start(), priv is created with a kref of 1 and exposed via
j1939_priv_set(ndev, priv). The call to j1939_can_rx_register(priv) then makes
priv accessible to incoming packets.

If a packet arrives concurrently, the rx handler could take a reference to
priv and create sessions or ecus. If j1939_can_rx_register() subsequently
fails, this error path forcefully calls kfree(priv) unconditionally, bypassing
the reference count. Any concurrently created sessions would then access the
freed priv memory. Should this use j1939_priv_put(priv) instead?

>  
>  	return ERR_PTR(ret);

[ ... ]

> diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
> index df93d57907da7..158433511552e 100644
> --- a/net/can/j1939/transport.c
> +++ b/net/can/j1939/transport.c
> @@ -283,6 +283,7 @@ static void j1939_session_destroy(struct j1939_session *session)

[Severity: High]
This isn't a bug introduced by this patch, but does the loop in
j1939_session_destroy() underflow the skb reference count?

Looking at the loop just above this context:

	while ((skb = skb_dequeue(&session->skb_queue)) != NULL) {
		/* drop ref taken in j1939_session_skb_queue() */
		skb_unref(skb);
		kfree_skb(skb);
	}

j1939_session_new() only took one extra reference via skb_get(skb). Calling
both skb_unref() and kfree_skb() drops the reference count twice for each
dequeued skb, which can underflow the refcount and cause a use-after-free
if the original owner still holds a valid pointer.

>  		kfree_skb(skb);
>  	}
>  	__j1939_session_drop(session);
> +	netdev_put(session->priv->ndev, &session->priv_dev_tracker);
>  	j1939_priv_put(session->priv);
>  	kfree(session);

[Severity: High]
This is a pre-existing issue, but does freeing the session here without
stopping its timers risk a use-after-free?

If the session is destroyed while session->txtimer or session->rxtimer is
pending, the asynchronous timer callbacks could execute after kfree(session).
Should the hrtimers be explicitly canceled before the memory is freed?

>  }

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/2bc332e0-b250-4679-a075-1c413ad843ce@I-love.SAKURA.ne.jp?part=1

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-09 18:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-29  1:31 [PATCH v2 (repost)] can: j1939: use netdevice_tracker for j1939_{priv,session,ecu} tracking Tetsuo Handa
2026-06-09 18:49 ` sashiko-bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox