* Re: [PATCH 1/5] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
From: Oliver Hartkopp @ 2026-06-14 7:49 UTC (permalink / raw)
To: linux-can; +Cc: Marc Kleine-Budde, Lee Jones, Ji'an
In-Reply-To: <20260612-bcm_fixes-v1-1-ca2fa07ee70f@hartkopp.net>
Document successful testing for the patch.
On 12.06.26 13:05, Oliver Hartkopp wrote:
> From: Lee Jones <lee@kernel.org>
>
> Commit f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly
> synchronize_rcu()") replaced synchronize_rcu() in bcm_delete_rx_op()
> with call_rcu() and introduced the RX_NO_AUTOTIMER flag.
>
> However, this flag check was omitted for thrtimer in the packet rx
> fast-path. During BCM RX operation teardown, a concurrent RCU reader
> (bcm_rx_handler) can race and re-arm thrtimer via
> bcm_rx_update_and_send() after call_rcu() has been scheduled. Once
> the RCU grace period elapses, bcm_op is freed. The subsequently
> firing thrtimer then dereferences the deallocated op, causing a UAF.
>
> Adding flag checks to the rx fast-path (bcm_rx_update_and_send) does not
> fully close the TOCTOU race and introduces latency for every CAN frame.
> Conversely, calling hrtimer_cancel() directly inside the RCU callback
> (softirq context) is fatal as hrtimer_cancel() can sleep, triggering
> a "scheduling while atomic" panic.
>
> Resolve this by deferring the timer cancellation and memory free to a
> dedicated unbound workqueue (bcm_wq). The RCU callback now queues a
> work item to bcm_wq, which safely cancels both timers and deallocates
> memory in sleepable process context. A dedicated workqueue is used to
> prevent system-wide WQ saturation and is cleanly flushed/destroyed
> on module unload to avoid rmmod page faults.
>
> Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()")
> Signed-off-by: Lee Jones <lee@kernel.org>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Tested-by: Ji'an <eilaimemedsnaimel@gmail.com>
> ---
> net/can/bcm.c | 29 ++++++++++++++++++++++++++---
> 1 file changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index a4bef2c48a55..c49b09f3229f 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
> @@ -56,10 +56,11 @@
> #include <linux/can.h>
> #include <linux/can/core.h>
> #include <linux/can/skb.h>
> #include <linux/can/bcm.h>
> #include <linux/slab.h>
> +#include <linux/workqueue.h>
> #include <linux/spinlock.h>
> #include <net/can.h>
> #include <net/sock.h>
> #include <net/net_namespace.h>
>
> @@ -90,10 +91,12 @@ MODULE_LICENSE("Dual BSD/GPL");
> MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
> MODULE_ALIAS("can-proto-2");
>
> #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex)
>
> +static struct workqueue_struct *bcm_wq;
> +
> /*
> * easy access to the first 64 bit of can(fd)_frame payload. cp->data is
> * 64 bit aligned so the offset has to be multiples of 8 which is ensured
> * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler().
> */
> @@ -103,10 +106,11 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
> }
>
> struct bcm_op {
> struct list_head list;
> struct rcu_head rcu;
> + struct work_struct work;
> int ifindex;
> canid_t can_id;
> u32 flags;
> unsigned long frames_abs, frames_filtered;
> struct bcm_timeval ival1, ival2;
> @@ -791,23 +795,34 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
> }
>
> return NULL;
> }
>
> -static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> +static void bcm_free_op_work(struct work_struct *work)
> {
> - struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
> + struct bcm_op *op = container_of(work, struct bcm_op, work);
> +
> + hrtimer_cancel(&op->timer);
> + hrtimer_cancel(&op->thrtimer);
>
> if ((op->frames) && (op->frames != &op->sframe))
> kfree(op->frames);
>
> if ((op->last_frames) && (op->last_frames != &op->last_sframe))
> kfree(op->last_frames);
>
> kfree(op);
> }
>
> +static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> +{
> + struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
> +
> + INIT_WORK(&op->work, bcm_free_op_work);
> + queue_work(bcm_wq, &op->work);
> +}
> +
> static void bcm_remove_op(struct bcm_op *op)
> {
> hrtimer_cancel(&op->timer);
> hrtimer_cancel(&op->thrtimer);
>
> @@ -1837,15 +1852,19 @@ static struct notifier_block canbcm_notifier = {
>
> static int __init bcm_module_init(void)
> {
> int err;
>
> + bcm_wq = alloc_workqueue("can-bcm-wq", WQ_UNBOUND, 0);
> + if (!bcm_wq)
> + return -ENOMEM;
> +
> pr_info("can: broadcast manager protocol\n");
>
> err = register_pernet_subsys(&canbcm_pernet_ops);
> if (err)
> - return err;
> + goto register_pernet_failed;
>
> err = register_netdevice_notifier(&canbcm_notifier);
> if (err)
> goto register_notifier_failed;
>
> @@ -1859,17 +1878,21 @@ static int __init bcm_module_init(void)
>
> register_proto_failed:
> unregister_netdevice_notifier(&canbcm_notifier);
> register_notifier_failed:
> unregister_pernet_subsys(&canbcm_pernet_ops);
> +register_pernet_failed:
> + destroy_workqueue(bcm_wq);
> return err;
> }
>
> static void __exit bcm_module_exit(void)
> {
> can_proto_unregister(&bcm_can_proto);
> unregister_netdevice_notifier(&canbcm_notifier);
> unregister_pernet_subsys(&canbcm_pernet_ops);
> + rcu_barrier();
> + destroy_workqueue(bcm_wq);
> }
>
> module_init(bcm_module_init);
> module_exit(bcm_module_exit);
>
^ permalink raw reply
* Re: [PATCHv2 0/4] m68k: coldfire: fix non-standard readX()/writeX() functions
From: Paolo Abeni @ 2026-06-13 9:22 UTC (permalink / raw)
To: Greg Ungerer, linux-m68k
Cc: linux-kernel, arnd, wei.fang, frank.li, shenwei.wang, imx, netdev,
nico, adureghello, ulfh, linux-mmc, linux-can, linux-spi, olteanv
In-Reply-To: <20260609142139.1563360-1-gerg@linux-m68k.org>
On 6/9/26 4:12 PM, Greg Ungerer wrote:
> This odd collection of patches is aimed at fixing the non-standard ColdFire
> set of readX()/writeX() IO access functions. Instead switching to using the
> asm-generic definitions in include/asm-generic/io.h. The difficulty comes
> in trying not to break any drivers with this change.
>
> The implementation of the readX()/writeX() family of IO access functions
> is non-standard on ColdFire platforms. They either return big-endian (that
> is native endian) data, or on platforms with PCI bus support check the
> supplied address and return either big or little endian data based on that
> check. This is non-standard, they are expected to always return
> little-endian byte ordered data. Unfortunately this behavior also means
> that ioreadX()/iowroteX() and their big-endian counter parts
> ioreadXbe()/iowriteXbe() are currently broken because they are implemented
> using the readX()/writeX() functions.
>
> Patches 1, 2 and 3 in this series are specific driver changes that can be
> made independently of the final ColdFire readX()/writeX() change.
>
> Patch 4 is the actual switch to ColdFire building using asm-generic
> readX()/writeX(), but also contains three driver fixes that are not easily
> handled independently.
>
> Note that I don't have access to all supported hardware needed to fully
> test all these changes. I have tested what I have, a bunch of the standard
> Freescale ColdFire eval boards, and inspected generated code for differences.
>
> Note also that patch 3 relies on changes that are currently only in
> linux-next, and are scheduled to hit mainline during the next v7.2
> merge window. Those changes are also available in an immutable git tree
> at git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu.git
> cf-internal-io branch.
I understand that with this series you are targeting the m68K tree, am I
correct?
A possibly better option would be, after that the pre-req patches land
into Linus's tree, to share an immutable branch for this series, so that
both m68k and net-next could pull it.
/P
^ permalink raw reply
* Re: [PATCH 5/5] can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
From: sashiko-bot @ 2026-06-12 11:51 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Marc Kleine-Budde, linux-can, Oleksij Rempel, Vincent Mailhol
In-Reply-To: <20260612-bcm_fixes-v1-5-ca2fa07ee70f@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 4 potential issue(s) to consider:
Pre-existing issues:
- [Critical] Lockless update of `op->frames` in `bcm_tx_setup()` creates a TOCTOU race allowing transmission of unvalidated CAN frames, leading to Out-of-Bounds reads in CAN drivers.
- [Critical] Complete lack of frame length validation in `bcm_rx_setup()` allows Out-of-Bounds reads when responding to RTR frames.
- [High] Lockless updates to 64-bit ktime_t timer variables in `bcm_tx_setup()` lead to torn reads and soft lockups on 32-bit platforms.
- [High] Objects are passed to call_rcu() while still linked in the RCU-protected tx_ops and rx_ops lists during socket release.
--
commit 0171fb1b05b0628e97f895bb66387fd0bd7335bb
Author: Oliver Hartkopp <socketcan@hartkopp.net>
can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
sashiko-bot remarked the missing use of list_add_rcu() in
bcm_[rx|tx]_setup() to have a proper initialized bcm_op structure
when bcm_proc_show() traverses the bcm_op's under rcu_read_lock().
Link: https://lore.kernel.org/linux-can/20260610094654.A1FFE1F00893@smtp.kernel.org/
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index f1f1f11d1f92a..6a1d52189c0ab 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
[ ... ]
> @@ -1111,8 +1112,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> hrtimer_setup(&op->thrtimer, hrtimer_dummy_timeout, CLOCK_MONOTONIC,
> HRTIMER_MODE_REL_SOFT);
>
> - /* add this bcm_op to the list of the tx_ops */
> - list_add(&op->list, &bo->tx_ops);
> + add_op_to_list = true;
>
> } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
[Severity: Critical]
This is a pre-existing issue, but does this lockless update of op->frames
create a race condition?
In bcm_tx_setup(), user data is copied directly into the active array:
err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
The validation (cf->len > 64 or > 8) happens after this copy without
holding op->bcm_tx_lock or disabling the active timer.
Could bcm_tx_timeout_handler() fire during this window, causing
bcm_can_tx() to read and transmit a partially updated or unvalidated
frame? If an attacker supplies an iovec that faults immediately after
the length byte is copied, leaving a malicious length (like 255), does
this cause an out-of-bounds read in the CAN driver when the timer
transmits the frame?
[Severity: High]
This is a pre-existing issue, but do the lockless updates to 64-bit timer
variables lead to torn reads on 32-bit platforms?
In bcm_tx_setup(), op->kt_ival1 and op->kt_ival2 are updated based on
user input without holding op->bcm_tx_lock or disabling softirqs.
Since bcm_tx_timeout_handler() reads these variables in softirq context:
if (op->kt_ival1 && (op->count > 0)) {
...
} else if (op->kt_ival2) {
Could a softirq interrupting the update read a torn ktime_t value (a mix
of old and new 32-bit halves) on 32-bit systems? Would passing this
garbage value to hrtimer_set_expires() result in the timer firing
continuously or hanging indefinitely?
> @@ -1134,6 +1134,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> op->flags |= TX_ANNOUNCE;
> }
>
> + /* add this bcm_op to the list of the tx_ops? */
> + if (add_op_to_list)
> + list_add_rcu(&op->list, &bo->tx_ops);
> +
[Severity: High]
This is a pre-existing issue, but does the teardown loop in bcm_release()
pass objects to call_rcu() while they are still linked in the RCU-protected
lists?
While this patch correctly updates list_add_rcu() for bo->tx_ops and
bo->rx_ops, bcm_release() iterates over these lists with
list_for_each_entry_safe() and calls bcm_remove_op().
Since bcm_remove_op() immediately calls call_rcu(&op->rcu, bcm_free_op_rcu),
the elements are never unlinked using list_del_rcu() before the call_rcu()
invocation.
Does leaving these objects physically linked in the RCU-protected list
violate the remove-before-reclaim ordering rule, potentially allowing
new readers to find the object after the grace period begins?
[ ... ]
> @@ -1376,10 +1377,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> bcm_rx_handler, op, "bcm", sk);
> if (err) {
> /* this bcm rx op is broken -> remove it */
> - list_del_rcu(&op->list);
> bcm_remove_op(op);
> return err;
> }
> +
> + /* add this bcm_op to the list of the rx_ops */
> + list_add_rcu(&op->list, &bo->rx_ops);
> }
[Severity: Critical]
This is a pre-existing issue, but does the lack of frame length validation
in bcm_rx_setup() allow out-of-bounds reads during RTR replies?
When CAN frames are copied from user-space into op->frames via
memcpy_from_msg() in bcm_rx_setup(), the length field is never validated.
If the user configures the RX operation with the RX_RTR_FRAME flag,
bcm_rx_handler() automatically replies to incoming RTR packets:
if (op->flags & RX_RTR_FRAME) {
if (rx_ext->skb->sk != op->sk)
bcm_can_tx(op);
return;
}
Could an attacker set an arbitrary length (e.g., 255) for
op->frames[0].len, causing bcm_can_tx() to send an unvalidated frame
and trigger an out-of-bounds heap read in the driver?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net?part=5
^ permalink raw reply
* Re: [PATCH 3/5] can: bcm: add locking when updating filter and timer values
From: sashiko-bot @ 2026-06-12 11:40 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Vincent Mailhol, linux-can, Marc Kleine-Budde, Oleksij Rempel
In-Reply-To: <20260612-bcm_fixes-v1-3-ca2fa07ee70f@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
Pre-existing issues:
- [High] The patch addresses race conditions in the RX path but misses identical missing lock vulnerabilities and partial state corruption bugs in the TX path (`bcm_tx_setup` and `bcm_can_tx`).
- [High] Data race in `bcm_rx_setup` when clearing `CAN_RTR_FLAG` for `RX_RTR_FRAME` operations, which can lead to torn reads and CAN packet storms.
- [High] `bcm_rx_handler` reads `op->flags` without lock protection, triggering a data race with `bcm_rx_setup` and potential data corruption.
--
commit 7bf24ff9623b82ba96d63789d48e3d741157ffeb
Author: Oliver Hartkopp <socketcan@hartkopp.net>
can: bcm: add locking when updating filter and timer values
This commit fixes a KCSAN-detected race condition where timer values
could be overwritten simultaneously in bcm_rx_setup(). It protects the
timer and filter data updates with a new spin_lock_bh().
Link: https://lore.kernel.org/linux-can/6975d5cf.a00a0220.33ccc7.0022.GAE@google.com/
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index 9475758a6749f..fc5158132b554 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
> @@ -129,6 +129,7 @@ struct bcm_op {
> struct sock *sk;
> struct net_device *rx_reg_dev;
> spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */
> + spinlock_t bcm_rx_update_lock; /* protect filter/timer data updates */
> };
[Severity: High]
This is a pre-existing issue, but while we are adding bcm_rx_update_lock
to protect RX filter and timer updates, does the TX path need similar
protection?
In bcm_tx_setup(), when updating an existing operation, memcpy_from_msg()
copies user data directly into op->frames without locking, and 64-bit timer
values like op->kt_ival1 are updated locklessly.
Concurrently, if bcm_tx_timeout_handler() fires, it can read these torn
timer values. Also, bcm_can_tx() reads the frame payload from cf locklessly
via skb_put_data(), which races with memcpy_from_msg().
Furthermore, if bcm_tx_setup() encounters a validation error
(e.g., cf->len > 64) after memcpy_from_msg(), it returns -EINVAL without
rolling back. The active timer will continue firing, reading the malicious
length and allocating a normal-sized skb, potentially leading to an
out-of-bounds read in the CAN driver.
[ ... ]
> @@ -743,6 +755,8 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
> traffic_flags |= RX_OWN;
> }
>
> + spin_lock_bh(&op->bcm_rx_update_lock);
> +
> if (op->flags & RX_FILTER_ID) {
[Severity: High]
This is a pre-existing issue, but does bcm_rx_handler() read op->flags
without lock protection just before this new lock is acquired?
If a user updates an existing operation to toggle RX_RTR_FRAME, bcm_rx_setup()
updates op->flags and op->frames under the bcm_rx_update_lock.
However, earlier in bcm_rx_handler(), it checks op->flags locklessly:
if (op->flags & RX_RTR_FRAME) {
/* send reply for RTR-request (placed in op->frames[0]) */
bcm_can_tx(op);
return;
}
Could bcm_rx_handler() observe the newly written op->flags and immediately
call bcm_can_tx(), which will then read a stale or partially written
op->frames[0] to transmit? This seems to defeat the atomicity provided by
the newly introduced lock.
[ ... ]
> @@ -1277,10 +1315,13 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> if (op->flags & SETTIMER) {
>
> /* set timer value */
> + spin_lock_bh(&op->bcm_rx_update_lock);
[Severity: High]
This is a pre-existing issue, but is there a data race when clearing
CAN_RTR_FLAG for RX_RTR_FRAME operations?
In bcm_rx_setup(), just above this block, it handles RX_RTR_FRAME:
if (op->flags & RX_RTR_FRAME) {
struct canfd_frame *frame0 = op->frames;
...
if ((op->flags & TX_CP_CAN_ID) ||
(frame0->can_id == op->can_id))
frame0->can_id = op->can_id & ~CAN_RTR_FLAG;
} else {
if (op->flags & SETTIMER) {
The frame0->can_id modification happens after bcm_rx_update_lock is already
unlocked.
Concurrently, bcm_rx_handler() checks op->flags & RX_RTR_FRAME and calls
bcm_can_tx(), which reads op->frames to send a reply. Could bcm_can_tx()
transmit the frame with CAN_RTR_FLAG still set before it is cleared here,
potentially triggering an infinite loopback storm?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net?part=3
^ permalink raw reply
* Re: [PATCH 2/5] can: bcm: mark intentional lockless read of bo->bound for KCSAN
From: sashiko-bot @ 2026-06-12 11:29 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Marc Kleine-Budde, Oleksij Rempel, linux-can, Vincent Mailhol
In-Reply-To: <20260612-bcm_fixes-v1-2-ca2fa07ee70f@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
Pre-existing issues:
- [High] Incomplete fix for KCSAN data race causing TOCTOU logic bug in CAN BCM socket binding. The patch annotates `bo->bound` with READ_ONCE()/WRITE_ONCE() but leaves an adjacent lockless read of `bo->ifindex` unprotected in `bcm_sendmsg()`. Because `bo->ifindex` is cached into a local variable before checking `bo->bound` and before acquiring `lock_sock()`, a concurrent `bcm_notify()` on device unregistration can clear `bo->bound` and `bo->ifindex` while `bcm_sendmsg()` retains and proceeds with the stale `ifindex`.
--
commit 1e0341f7b374835149bc461a869bd9dbd61ed130
Author: Oliver Hartkopp <socketcan@hartkopp.net>
can: bcm: mark intentional lockless read of bo->bound for KCSAN
This commit annotates an intentional lockless read of bo->bound with
READ_ONCE() and WRITE_ONCE() to resolve a KCSAN data race warning. The data
race occurs between bcm_sendmsg() and bcm_notify().
Link: https://lore.kernel.org/linux-can/CAGp+u1aBK8QVjsvAxM2Ldzep4rEbsP9x_pV3At4g=h1kVEtyhA@mail.gmail.com/
> Once the lock is acquired, any subsequent attempts to configure BCM
> jobs (like RX_SETUP) will safely fail because bcm_notify() has
> already cleared bo->ifindex to 0.
[Severity: High]
This isn't a bug introduced by this patch, but does this actually happen? Since
bcm_sendmsg() caches bo->ifindex into a local variable before checking
bo->bound or acquiring lock_sock(), won't it use the stale ifindex instead
of 0?
net/can/bcm.c:bcm_sendmsg() {
struct bcm_sock *bo = bcm_sk(sk);
int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
...
/* Lockless fast-path check for bound socket */
if (!READ_ONCE(bo->bound))
return -ENOTCONN;
...
lock_sock(sk);
...
case RX_SETUP:
ret = bcm_rx_setup(&msg_head, msg, ifindex, sk);
...
}
If bcm_notify() unregisters the device concurrently, dev_get_by_index()
inside bcm_rx_setup() might fail using the stale ifindex, which would
bypass the filter registration entirely without returning an error.
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index c49b09f3229fa..9475758a6749f 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
[ ... ]
> @@ -1527,7 +1528,7 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
> bo->bcm_proc_read = NULL;
> }
> #endif
> - bo->bound = 0;
> + WRITE_ONCE(bo->bound, 0);
> bo->ifindex = 0;
> notify_enodev = 1;
> }
[Severity: High]
This is also a pre-existing issue, but could the lack of memory barriers cause
the CPU or compiler to reorder the lockless accesses in bcm_sendmsg(),
observing bo->ifindex == 0 while bo->bound == 1?
If so, bcm_sendmsg() would execute with ifindex = 0 and call
can_rx_register(..., NULL, ...), inadvertently establishing a global wildcard
filter that receives frames from all CAN interfaces in the namespace.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net?part=2
^ permalink raw reply
* Re: [PATCH 1/5] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
From: sashiko-bot @ 2026-06-12 11:26 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Vincent Mailhol, Oleksij Rempel, Marc Kleine-Budde, linux-can
In-Reply-To: <20260612-bcm_fixes-v1-1-ca2fa07ee70f@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [High] Deferring timer cancellation to a workqueue fixes a UAF on `bcm_op`, but exposes a Use-After-Free (UAF) on `op->sk` because `bcm_release` does not wait for the workqueue to flush before freeing the socket.
--
commit 9591bb7d40d0a4fbd45c8700cf885c74ad1dcfec
Author: Lee Jones <lee@kernel.org>
can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
This commit defers the cancellation of timers and deallocation of bcm_op to
a dedicated workqueue. This prevents a use-after-free when a concurrent
RCU reader re-arms the timer after the RCU callback is scheduled.
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index a4bef2c48a559..c49b09f3229fa 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
[ ... ]
> @@ -806,6 +813,14 @@ static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> kfree(op);
> }
>
> +static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> +{
> + struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
> +
> + INIT_WORK(&op->work, bcm_free_op_work);
> + queue_work(bcm_wq, &op->work);
> +}
[Severity: High]
Could this deferral to a workqueue expose a use-after-free on op->sk?
If a socket is closed immediately after an operation is deleted:
1. bcm_delete_rx_op() removes op from the socket's rx_ops list and schedules
call_rcu().
2. A concurrent bcm_rx_handler() running under RCU re-arms op->thrtimer.
3. bcm_release() closes the socket. Since op was removed from rx_ops, it is
skipped, and bcm_release() drops the final reference via sock_put(sk),
freeing the socket.
Because the workqueue execution is asynchronous, bcm_release() does not wait
for it. If the re-armed thrtimer fires before bcm_free_op_work() can cancel
it, would this trigger a use-after-free?
bcm_rx_thr_handler()
bcm_send_to_user()
reason = sock_queue_rcv_skb_reason(op->sk, skb);
Since op->sk is assigned in bcm_rx_setup() without a corresponding
sock_hold(), does the deferred cleanup need to hold a socket reference to
ensure the socket outlives this work item and any pending timers?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net?part=1
^ permalink raw reply
* [PATCH 3/5] can: bcm: add locking when updating filter and timer values
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, Marc Kleine-Budde, syzbot+75e5e4ae00c3b4bb544e
In-Reply-To: <20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net>
KCSAN detected a simultaneous access to timer values that can be
overwritten in bcm_rx_setup when updating timer and filter content.
Fix the RX_SETUP update case by protecting the timer and filter updates
with a spin_lock_bh().
Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol")
Reported-by: syzbot+75e5e4ae00c3b4bb544e@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-can/6975d5cf.a00a0220.33ccc7.0022.GAE@google.com/
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 52 insertions(+), 9 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9475758a6749..fc5158132b55 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -127,10 +127,11 @@ struct bcm_op {
struct canfd_frame sframe;
struct canfd_frame last_sframe;
struct sock *sk;
struct net_device *rx_reg_dev;
spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */
+ spinlock_t bcm_rx_update_lock; /* protect filter/timer data updates */
};
struct bcm_sock {
struct sock sk;
int bound;
@@ -620,10 +621,12 @@ static void bcm_rx_starttimer(struct bcm_op *op)
static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
{
struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
struct bcm_msg_head msg_head;
+ spin_lock_bh(&op->bcm_rx_update_lock);
+
/* if user wants to be informed, when cyclic CAN-Messages come back */
if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
/* clear received CAN frames to indicate 'nothing received' */
memset(op->last_frames, 0, op->nframes * op->cfsiz);
}
@@ -636,10 +639,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
msg_head.ival1 = op->ival1;
msg_head.ival2 = op->ival2;
msg_head.can_id = op->can_id;
msg_head.nframes = 0;
+ spin_unlock_bh(&op->bcm_rx_update_lock);
+
bcm_send_to_user(op, &msg_head, NULL, 0);
return HRTIMER_NORESTART;
}
@@ -684,19 +689,26 @@ static int bcm_rx_thr_flush(struct bcm_op *op)
* Check for throttled data and send it to the userspace
*/
static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
{
struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
+ enum hrtimer_restart ret;
+
+ spin_lock_bh(&op->bcm_rx_update_lock);
if (bcm_rx_thr_flush(op)) {
hrtimer_forward_now(hrtimer, op->kt_ival2);
- return HRTIMER_RESTART;
+ ret = HRTIMER_RESTART;
} else {
/* rearm throttle handling */
op->kt_lastmsg = 0;
- return HRTIMER_NORESTART;
+ ret = HRTIMER_NORESTART;
}
+
+ spin_unlock_bh(&op->bcm_rx_update_lock);
+
+ return ret;
}
/*
* bcm_rx_handler - handle a CAN frame reception
*/
@@ -741,10 +753,12 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
traffic_flags |= RX_LOCAL;
if (skb->sk == op->sk)
traffic_flags |= RX_OWN;
}
+ spin_lock_bh(&op->bcm_rx_update_lock);
+
if (op->flags & RX_FILTER_ID) {
/* the easiest case */
bcm_rx_update_and_send(op, op->last_frames, rxframe,
traffic_flags);
goto rx_starttimer;
@@ -776,10 +790,12 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
}
}
rx_starttimer:
bcm_rx_starttimer(op);
+
+ spin_unlock_bh(&op->bcm_rx_update_lock);
}
/*
* helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
*/
@@ -1155,10 +1171,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
return -EINVAL;
/* check the given can_id */
op = bcm_find_op(&bo->rx_ops, msg_head, ifindex);
if (op) {
+ void *new_frames = NULL;
+
/* update existing BCM operation */
/*
* Do we need more space for the CAN frames than currently
* allocated? -> This is a _really_ unusual use-case and
@@ -1166,33 +1184,53 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
*/
if (msg_head->nframes > op->nframes)
return -E2BIG;
if (msg_head->nframes) {
- /* update CAN frames content */
- err = memcpy_from_msg(op->frames, msg,
+ /* get new CAN frames content before locking */
+ new_frames = kmalloc(msg_head->nframes * op->cfsiz,
+ GFP_KERNEL);
+ if (!new_frames)
+ return -ENOMEM;
+
+ err = memcpy_from_msg(new_frames, msg,
msg_head->nframes * op->cfsiz);
- if (err < 0)
+ if (err < 0) {
+ kfree(new_frames);
return err;
-
- /* clear last_frames to indicate 'nothing received' */
- memset(op->last_frames, 0, msg_head->nframes * op->cfsiz);
+ }
}
+ spin_lock_bh(&op->bcm_rx_update_lock);
op->nframes = msg_head->nframes;
op->flags = msg_head->flags;
+ if (msg_head->nframes) {
+ /* update CAN frames content */
+ memcpy(op->frames, new_frames,
+ msg_head->nframes * op->cfsiz);
+
+ /* clear last_frames to indicate 'nothing received' */
+ memset(op->last_frames, 0,
+ msg_head->nframes * op->cfsiz);
+ }
+ spin_unlock_bh(&op->bcm_rx_update_lock);
+
+ /* free temporary frames / kfree(NULL) is safe */
+ kfree(new_frames);
+
/* Only an update -> do not call can_rx_register() */
do_rx_register = 0;
} else {
/* insert new BCM operation for the given can_id */
op = kzalloc(OPSIZ, GFP_KERNEL);
if (!op)
return -ENOMEM;
spin_lock_init(&op->bcm_tx_lock);
+ spin_lock_init(&op->bcm_rx_update_lock);
op->can_id = msg_head->can_id;
op->nframes = msg_head->nframes;
op->cfsiz = CFSIZ(msg_head->flags);
op->flags = msg_head->flags;
@@ -1275,26 +1313,31 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} else {
if (op->flags & SETTIMER) {
/* set timer value */
+ spin_lock_bh(&op->bcm_rx_update_lock);
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
+ op->kt_lastmsg = 0;
+ spin_unlock_bh(&op->bcm_rx_update_lock);
/* disable an active timer due to zero value? */
if (!op->kt_ival1)
hrtimer_cancel(&op->timer);
/*
* In any case cancel the throttle timer, flush
* potentially blocked msgs and reset throttle handling
*/
- op->kt_lastmsg = 0;
hrtimer_cancel(&op->thrtimer);
+
+ spin_lock_bh(&op->bcm_rx_update_lock);
bcm_rx_thr_flush(op);
+ spin_unlock_bh(&op->bcm_rx_update_lock);
}
if ((op->flags & STARTTIMER) && op->kt_ival1)
hrtimer_start(&op->timer, op->kt_ival1,
HRTIMER_MODE_REL_SOFT);
--
2.53.0
^ permalink raw reply related
* [PATCH 4/5] can: bcm: fix CAN frame rx/tx statistics
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, Marc Kleine-Budde
In-Reply-To: <20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net>
KCSAN detected a data race within the bcm_rx_handler() when two CAN frames
have been simultaneously received and processed in a single rx op by two
different CPUs.
Use atomic operations with (signed) long data types to access the
statistics in the hot path to fix the KCSAN complaint.
Additionally simplify the update and check of statistics overflow by
using the atomic operations in a separate bcm_update_stats() function.
Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index fc5158132b55..f1f1f11d1f92 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -110,11 +110,11 @@ struct bcm_op {
struct rcu_head rcu;
struct work_struct work;
int ifindex;
canid_t can_id;
u32 flags;
- unsigned long frames_abs, frames_filtered;
+ atomic_long_t frames_abs, frames_filtered;
struct bcm_timeval ival1, ival2;
struct hrtimer timer, thrtimer;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
int rx_ifindex;
int cfsiz;
@@ -227,14 +227,17 @@ static int bcm_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
list_for_each_entry_rcu(op, &bo->rx_ops, list) {
- unsigned long reduction;
+ long reduction, frames_filtered, frames_abs;
+
+ frames_filtered = atomic_long_read(&op->frames_filtered);
+ frames_abs = atomic_long_read(&op->frames_abs);
/* print only active entries & prevent division by zero */
- if (!op->frames_abs)
+ if (!frames_abs)
continue;
seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
bcm_proc_getifname(net, ifname, op->ifindex));
@@ -252,13 +255,13 @@ static int bcm_proc_show(struct seq_file *m, void *v)
if (op->kt_ival2)
seq_printf(m, "thr=%lld ",
(long long)ktime_to_us(op->kt_ival2));
seq_printf(m, "# recv %ld (%ld) => reduction: ",
- op->frames_filtered, op->frames_abs);
+ frames_filtered, frames_abs);
- reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
+ reduction = 100 - (frames_filtered * 100) / frames_abs;
seq_printf(m, "%s%ld%%\n",
(reduction == 100) ? "near " : "", reduction);
}
@@ -278,20 +281,30 @@ static int bcm_proc_show(struct seq_file *m, void *v)
if (op->kt_ival2)
seq_printf(m, "t2=%lld ",
(long long)ktime_to_us(op->kt_ival2));
- seq_printf(m, "# sent %ld\n", op->frames_abs);
+ seq_printf(m, "# sent %ld\n",
+ atomic_long_read(&op->frames_abs));
}
seq_putc(m, '\n');
rcu_read_unlock();
return 0;
}
#endif /* CONFIG_PROC_FS */
+static void bcm_update_stats(struct bcm_op *op)
+{
+ /* prevent statistics overflow */
+ if (atomic_long_inc_return(&op->frames_abs) > LONG_MAX / 100) {
+ atomic_long_set(&op->frames_filtered, 0);
+ atomic_long_set(&op->frames_abs, 0);
+ }
+}
+
/*
* bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
* of the given bcm tx op
*/
static void bcm_can_tx(struct bcm_op *op)
@@ -338,11 +351,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* update currframe and count under lock protection */
spin_lock_bh(&op->bcm_tx_lock);
if (!err)
- op->frames_abs++;
+ bcm_update_stats(op);
op->currframe++;
/* reached last frame? */
if (op->currframe >= op->nframes)
@@ -491,16 +504,13 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
*/
static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
{
struct bcm_msg_head head;
- /* update statistics */
- op->frames_filtered++;
-
- /* prevent statistics overflow */
- if (op->frames_filtered > ULONG_MAX/100)
- op->frames_filtered = op->frames_abs = 0;
+ /* update statistics (frames_filtered <= frames_abs) */
+ if (atomic_long_read(&op->frames_abs))
+ atomic_long_inc(&op->frames_filtered);
/* this element is not throttled anymore */
data->flags &= ~RX_THR;
memset(&head, 0, sizeof(head));
@@ -737,11 +747,11 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
/* save rx timestamp */
op->rx_stamp = skb->tstamp;
/* save originator for recvfrom() */
op->rx_ifindex = skb->dev->ifindex;
/* update statistics */
- op->frames_abs++;
+ bcm_update_stats(op);
if (op->flags & RX_RTR_FRAME) {
/* send reply for RTR-request (placed in op->frames[0]) */
bcm_can_tx(op);
return;
--
2.53.0
^ permalink raw reply related
* [PATCH 2/5] can: bcm: mark intentional lockless read of bo->bound for KCSAN
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, Marc Kleine-Budde, Ginger
In-Reply-To: <20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net>
A static analyzer and KCSAN can detect a potential data race in
net/can/bcm.c between bcm_sendmsg() and bcm_notify().
bcm_sendmsg() reads bo->bound without holding the socket lock to
perform a fast-path check for unbound sockets before checking the
message length:
/* Lockless fast-path check for bound socket */
if (!bo->bound)
return -ENOTCONN;
Concurrently, bcm_notify() can clear bo->bound under lock_sock(sk)
protection during a netdevice notification event:
lock_sock(sk);
...
bo->bound = 0;
bo->ifindex = 0;
notify_enodev = 1;
This lockless read is intentional and functionally safe. If
bcm_sendmsg() passes the check while bcm_notify() is running, the
socket will block on the subsequent lock_sock(sk) call.
Once the lock is acquired, any subsequent attempts to configure BCM
jobs (like RX_SETUP) will safely fail because bcm_notify() has
already cleared bo->ifindex to 0. While a user-defined ifindex of 0
is normally a valid feature to listen on "all" CAN interfaces,
bcm_notify() only triggers for specific real CAN devices. In this
unregister context, the combination of bound=0 and ifindex=0 effectively
invalidates the socket binding. This prevents the registration of
stale CAN filters and drops downstream operations safely without any
memory corruption or stale state exploitation.
However, to comply with the Linux kernel memory model and to eliminate
false-positive warnings from concurrency sanitizers (KCSAN), annotate
this intentional data race using READ_ONCE() and WRITE_ONCE().
Reported-by: Ginger <ginger.jzllee@gmail.com>
Closes: https://lore.kernel.org/linux-can/CAGp+u1aBK8QVjsvAxM2Ldzep4rEbsP9x_pV3At4g=h1kVEtyhA@mail.gmail.com/
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index c49b09f3229f..9475758a6749 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1391,11 +1391,12 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
struct bcm_msg_head msg_head;
int cfsiz;
int ret; /* read bytes or error codes as return value */
- if (!bo->bound)
+ /* Lockless fast-path check for bound socket */
+ if (!READ_ONCE(bo->bound))
return -ENOTCONN;
/* check for valid message length from userspace */
if (size < MHSIZ)
return -EINVAL;
@@ -1525,11 +1526,11 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) {
remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir);
bo->bcm_proc_read = NULL;
}
#endif
- bo->bound = 0;
+ WRITE_ONCE(bo->bound, 0);
bo->ifindex = 0;
notify_enodev = 1;
}
release_sock(sk);
--
2.53.0
^ permalink raw reply related
* [PATCH 0/5] can: bcm: collected fixes
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can
Cc: Oliver Hartkopp, Marc Kleine-Budde, Lee Jones, Ginger,
syzbot+75e5e4ae00c3b4bb544e, sashiko-reviews
Hi Marc,
as there were different patches flying arround to fix CAN_BCM issues and
AI assisted stuff pop's up again and again, I've created this collection
to be applied.
I'm not sure whether sashiko-bot takes the sequence of this series into
account when checking the last patches. So let's see.
Best regards,
Oliver
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
Lee Jones (1):
can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
Oliver Hartkopp (4):
can: bcm: mark intentional lockless read of bo->bound for KCSAN
can: bcm: add locking when updating filter and timer values
can: bcm: fix CAN frame rx/tx statistics
can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
net/can/bcm.c | 150 ++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 115 insertions(+), 35 deletions(-)
---
base-commit: 2b414a95b8f7307d42173ba9e580d6d3e2bcbfce
change-id: 20260612-bcm_fixes-84fb4aa93ac2
Best regards,
--
Oliver Hartkopp <socketcan@hartkopp.net>
^ permalink raw reply
* [PATCH 5/5] can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, Marc Kleine-Budde, sashiko-reviews
In-Reply-To: <20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net>
sashiko-bot remarked the missing use of list_add_rcu() in
bcm_[rx|tx]_setup() to have a proper initialized bcm_op structure
when bcm_proc_show() traverses the bcm_op's under rcu_read_lock().
To cover all initial settings of the bcm_op's the list_add_rcu() calls
are moved to the end of the setup code.
Reported-by: sashiko-reviews@lists.linux.dev
Closes: https://lore.kernel.org/linux-can/20260610094654.A1FFE1F00893@smtp.kernel.org/
Fixes: dac5e6249159 ("can: bcm: add missing rcu read protection for procfs content")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index f1f1f11d1f92..6a1d52189c0a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -263,11 +263,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, "%s%ld%%\n",
(reduction == 100) ? "near " : "", reduction);
}
- list_for_each_entry(op, &bo->tx_ops, list) {
+ list_for_each_entry_rcu(op, &bo->tx_ops, list) {
seq_printf(m, "tx_op: %03X %s ", op->can_id,
bcm_proc_getifname(net, ifname, op->ifindex));
if (op->flags & CAN_FD_FRAME)
@@ -969,10 +969,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
int ifindex, struct sock *sk)
{
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
struct canfd_frame *cf;
+ bool add_op_to_list = false;
unsigned int i;
int err;
/* we need a real device to send frames */
if (!ifindex)
@@ -1109,12 +1110,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* currently unused in tx_ops */
hrtimer_setup(&op->thrtimer, hrtimer_dummy_timeout, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
- /* add this bcm_op to the list of the tx_ops */
- list_add(&op->list, &bo->tx_ops);
+ add_op_to_list = true;
} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
if (op->flags & SETTIMER) {
/* set timer values */
@@ -1132,10 +1132,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
hrtimer_cancel(&op->timer);
/* spec: send CAN frame when starting timer */
op->flags |= TX_ANNOUNCE;
}
+ /* add this bcm_op to the list of the tx_ops? */
+ if (add_op_to_list)
+ list_add_rcu(&op->list, &bo->tx_ops);
+
if (op->flags & TX_ANNOUNCE)
bcm_can_tx(op);
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
@@ -1293,13 +1297,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
hrtimer_setup(&op->timer, bcm_rx_timeout_handler, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
hrtimer_setup(&op->thrtimer, bcm_rx_thr_handler, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
- /* add this bcm_op to the list of the rx_ops */
- list_add(&op->list, &bo->rx_ops);
-
/* call can_rx_register() */
do_rx_register = 1;
} /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
@@ -1374,14 +1375,16 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
err = can_rx_register(sock_net(sk), NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
- list_del_rcu(&op->list);
bcm_remove_op(op);
return err;
}
+
+ /* add this bcm_op to the list of the rx_ops */
+ list_add_rcu(&op->list, &bo->rx_ops);
}
return msg_head->nframes * op->cfsiz + MHSIZ;
}
--
2.53.0
^ permalink raw reply related
* [PATCH 1/5] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
From: Oliver Hartkopp @ 2026-06-12 11:05 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, Marc Kleine-Budde, Lee Jones
In-Reply-To: <20260612-bcm_fixes-v1-0-ca2fa07ee70f@hartkopp.net>
From: Lee Jones <lee@kernel.org>
Commit f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly
synchronize_rcu()") replaced synchronize_rcu() in bcm_delete_rx_op()
with call_rcu() and introduced the RX_NO_AUTOTIMER flag.
However, this flag check was omitted for thrtimer in the packet rx
fast-path. During BCM RX operation teardown, a concurrent RCU reader
(bcm_rx_handler) can race and re-arm thrtimer via
bcm_rx_update_and_send() after call_rcu() has been scheduled. Once
the RCU grace period elapses, bcm_op is freed. The subsequently
firing thrtimer then dereferences the deallocated op, causing a UAF.
Adding flag checks to the rx fast-path (bcm_rx_update_and_send) does not
fully close the TOCTOU race and introduces latency for every CAN frame.
Conversely, calling hrtimer_cancel() directly inside the RCU callback
(softirq context) is fatal as hrtimer_cancel() can sleep, triggering
a "scheduling while atomic" panic.
Resolve this by deferring the timer cancellation and memory free to a
dedicated unbound workqueue (bcm_wq). The RCU callback now queues a
work item to bcm_wq, which safely cancels both timers and deallocates
memory in sleepable process context. A dedicated workqueue is used to
prevent system-wide WQ saturation and is cleanly flushed/destroyed
on module unload to avoid rmmod page faults.
Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()")
Signed-off-by: Lee Jones <lee@kernel.org>
---
net/can/bcm.c | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a4bef2c48a55..c49b09f3229f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -56,10 +56,11 @@
#include <linux/can.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
#include <linux/can/bcm.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <net/can.h>
#include <net/sock.h>
#include <net/net_namespace.h>
@@ -90,10 +91,12 @@ MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
MODULE_ALIAS("can-proto-2");
#define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex)
+static struct workqueue_struct *bcm_wq;
+
/*
* easy access to the first 64 bit of can(fd)_frame payload. cp->data is
* 64 bit aligned so the offset has to be multiples of 8 which is ensured
* by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler().
*/
@@ -103,10 +106,11 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
}
struct bcm_op {
struct list_head list;
struct rcu_head rcu;
+ struct work_struct work;
int ifindex;
canid_t can_id;
u32 flags;
unsigned long frames_abs, frames_filtered;
struct bcm_timeval ival1, ival2;
@@ -791,23 +795,34 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
}
return NULL;
}
-static void bcm_free_op_rcu(struct rcu_head *rcu_head)
+static void bcm_free_op_work(struct work_struct *work)
{
- struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
+ struct bcm_op *op = container_of(work, struct bcm_op, work);
+
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
if ((op->last_frames) && (op->last_frames != &op->last_sframe))
kfree(op->last_frames);
kfree(op);
}
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
+{
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
+
+ INIT_WORK(&op->work, bcm_free_op_work);
+ queue_work(bcm_wq, &op->work);
+}
+
static void bcm_remove_op(struct bcm_op *op)
{
hrtimer_cancel(&op->timer);
hrtimer_cancel(&op->thrtimer);
@@ -1837,15 +1852,19 @@ static struct notifier_block canbcm_notifier = {
static int __init bcm_module_init(void)
{
int err;
+ bcm_wq = alloc_workqueue("can-bcm-wq", WQ_UNBOUND, 0);
+ if (!bcm_wq)
+ return -ENOMEM;
+
pr_info("can: broadcast manager protocol\n");
err = register_pernet_subsys(&canbcm_pernet_ops);
if (err)
- return err;
+ goto register_pernet_failed;
err = register_netdevice_notifier(&canbcm_notifier);
if (err)
goto register_notifier_failed;
@@ -1859,17 +1878,21 @@ static int __init bcm_module_init(void)
register_proto_failed:
unregister_netdevice_notifier(&canbcm_notifier);
register_notifier_failed:
unregister_pernet_subsys(&canbcm_pernet_ops);
+register_pernet_failed:
+ destroy_workqueue(bcm_wq);
return err;
}
static void __exit bcm_module_exit(void)
{
can_proto_unregister(&bcm_can_proto);
unregister_netdevice_notifier(&canbcm_notifier);
unregister_pernet_subsys(&canbcm_pernet_ops);
+ rcu_barrier();
+ destroy_workqueue(bcm_wq);
}
module_init(bcm_module_init);
module_exit(bcm_module_exit);
--
2.53.0
^ permalink raw reply related
* Re: [PATCHv2 4/4] m68k: coldfire: fix non-standard readX()/writeX() functions
From: Greg Ungerer @ 2026-06-11 13:35 UTC (permalink / raw)
To: sashiko-reviews
Cc: Oliver Hartkopp, Vincent Mailhol, linux-can, Frank.Li,
Oleksij Rempel, Marc Kleine-Budde, imx, Angelo Dureghello
In-Reply-To: <20260610142725.031C31F0089A@smtp.kernel.org>
Hi Sashiko-bot,
On 11/6/26 00:27, sashiko-bot@kernel.org wrote:
> Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
>
> New issues:
> - [High] The transition to asm-generic readX()/writeX() changes the endianness of ColdFire internal IO accesses, breaking unpatched ColdFire drivers.
>
> Pre-existing issues:
> - [High] Undefined behavior when shifting 1 by 32 or more using `BIT(ch)` in `mcf_edma_tx_handler`.
> - [High] The error interrupt handler incorrectly skips handling high channels and returns `IRQ_NONE` after servicing low channels.
> --
>
> commit 25193db3eba89f780b6dc534f2f72e16f3429571
> Author: Greg Ungerer <gerg@linux-m68k.org>
>
> m68k: coldfire: fix non-standard readX()/writeX() functions
>
> Remove the local ColdFire definitions of readb()/readw()/readl() and
> writeb()/writew()/writel() and use the asm-generic versions of them.
>
>> diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
>> index 4f0f34b06e37c..2f12f4ed0da54 100644
>> --- a/arch/m68k/include/asm/io_no.h
>> +++ b/arch/m68k/include/asm/io_no.h
>
> [ ... ]
>
>> -#define writel writel
>> -static inline void writel(u32 value, volatile void __iomem *addr)
>> -{
>> - if (cf_internalio(addr))
>> - __raw_writel(value, addr);
>> - else
>> - __raw_writel(swab32(value), addr);
>> -}
>
> [Severity: High]
> Does replacing the local readX()/writeX() with the asm-generic versions
> change the endianness of unpatched ColdFire drivers?
>
> The old ColdFire implementation detected internal IO memory using
> cf_internalio() and bypassed byte-swapping to provide native big-endian
> access. The asm-generic accessors always assume little-endian devices and
> perform byte-swapping.
>
> Will unpatched drivers relying on readX()/writeX() for internal IO
> (such as spi-coldfire-qspi.c, sdhci-esdhc-mcf.c, or fec_main.c) now read
> and write byte-swapped values and malfunction?
This is of course a real problem, and much effort has been expended to
find and fix affected drivers. Specifically here:
patch 1 in this series fixes fec_main.c
patch 3 in this series fixes sdhci-esdhc-mcf.c
patch 4 (this patch) fixes spi-coldfire-qspi.c with the endian change to spi-fsl-dspi.c
Not mentioned here, but patch 2 in this series fixes the smc91x driver.
It is not believed at this time that any other drivers are affected.
> [ ... ]
>
>> diff --git a/drivers/dma/mcf-edma-main.c b/drivers/dma/mcf-edma-main.c
>> index 9e1c6400c77be..4ed0ce644e37e 100644
>> --- a/drivers/dma/mcf-edma-main.c
>> +++ b/drivers/dma/mcf-edma-main.c
>> @@ -21,9 +21,9 @@ static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id)
>> unsigned int ch;
>> u64 intmap;
>>
>> - intmap = ioread32(regs->inth);
>> + intmap = ioread32be(regs->inth);
>> intmap <<= 32;
>> - intmap |= ioread32(regs->intl);
>> + intmap |= ioread32be(regs->intl);
>> if (!intmap)
>> return IRQ_NONE;
>
> [Severity: High]
> This is a pre-existing issue, but does this shift invoke undefined behavior
> for higher channels?
>
> Looking at the loop immediately following this in mcf_edma_tx_handler():
>
> for (ch = 0; ch < mcf_edma->n_chans; ch++) {
> if (intmap & BIT(ch)) {
> ...
>
> On a 32-bit architecture like m68k, BIT(ch) expands to a 32-bit unsigned
> long. Since ch can be up to 64, shifting by 32 or more would be undefined,
> potentially causing channels 32-63 to be improperly processed or missing
> their interrupts entirely.
>
> Should this use BIT_ULL(ch) when testing against the 64-bit intmap?
>
> [ ... ]
>
>> @@ -43,7 +43,7 @@ static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
>> struct edma_regs *regs = &mcf_edma->regs;
>> unsigned int err, ch;
>>
>> - err = ioread32(regs->errl);
>> + err = ioread32be(regs->errl);
>> if (!err)
>> return IRQ_NONE;
>
> [Severity: High]
> This is a pre-existing issue, but does this early return incorrectly skip
> handling high channels?
>
> If regs->errl is zero, it returns IRQ_NONE immediately, ignoring any errors
> in regs->errh. This could leave errors in higher channels uncleared.
>
> [ ... ]
>
>> @@ -55,7 +55,7 @@ static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
>> }
>> }
>>
>> - err = ioread32(regs->errh);
>> + err = ioread32be(regs->errh);
>> if (!err)
>> return IRQ_NONE;
>
> [Severity: High]
> This is a pre-existing issue, but does this return the wrong IRQ status if
> low channel errors were already handled?
>
> If errl was non-zero (so some errors were handled) but errh is zero,
> returning IRQ_NONE here instead of IRQ_HANDLED might incorrectly signal
> to the IRQ subsystem that the interrupt wasn't handled at all.
At casual inspection these may well be real problems. I will have to defer
to the driver author and current maintainers for them to fix. The changes
to fix ColdFire readl()/writel() have no effect on these, and are really
outside of the scope of this series.
Thanks for the review!
Regards
Greg
^ permalink raw reply
* Re: [PATCH v5 4/8] dt-bindings: can: fsl,flexcan: add NXP S32N79 SoC support
From: Ciprian Marian Costea @ 2026-06-11 11:57 UTC (permalink / raw)
To: Krzysztof Kozlowski
Cc: Marc Kleine-Budde, Vincent Mailhol, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Frank Li, Sascha Hauer,
Fabio Estevam, Pengutronix Kernel Team, linux-can, devicetree,
linux-kernel, imx, linux-arm-kernel, NXP S32 Linux Team,
Christophe Lizzi, Alberto Ruiz, Enric Balletbo, Eric Chanudet,
Andra-Teodora Ilie, Larisa Grigore, Conor Dooley, Haibo Chen
In-Reply-To: <20260610-crouching-wild-mushroom-c8bf6a@quoll>
On 6/10/2026 9:37 AM, Krzysztof Kozlowski wrote:
> On Tue, Jun 09, 2026 at 04:29:50PM +0200, Ciprian Costea wrote:
>> From: Ciprian Marian Costea <ciprianmarian.costea@oss.nxp.com>
>>
>> Add NXP S32N79 SoC compatible string and interrupt properties.
>>
>> On S32N79, FlexCAN IP is integrated with two interrupt lines:
>> one for the mailbox interrupts (0-127) and one for signaling
>> bus errors and device state changes.
>>
>> Co-developed-by: Andra-Teodora Ilie <andra.ilie@nxp.com>
>> Signed-off-by: Andra-Teodora Ilie <andra.ilie@nxp.com>
>> Co-developed-by: Larisa Grigore <larisa.grigore@nxp.com>
>> Signed-off-by: Larisa Grigore <larisa.grigore@nxp.com>
>> Signed-off-by: Ciprian Marian Costea <ciprianmarian.costea@oss.nxp.com>
>> Acked-by: Conor Dooley <conor.dooley@microchip.com>
>> Reviewed-and-tested-by: Haibo Chen <haibo.chen@nxp.com>
>
> You cannot test a binding (in a meaning what "testing" means). Building
> code is not testing.
>
>> Tested-by: Enric Balletbo i Serra <eballetb@redhat.com>
>
> Not possible.
>
> Best regards,
> Krzysztof
>
Hello Krzysztof,
Yes, my bad. It makes total sense.
I presume I shouldn't send a new version just for this 'Tested-by'
removal - right ?
Best Regards,
Ciprian
^ permalink raw reply
* Re: [PATCH can-next 3/3] can: add can diag interface
From: sashiko-bot @ 2026-06-10 17:38 UTC (permalink / raw)
To: Filippo Storniolo
Cc: linux-can, Vincent Mailhol, Marc Kleine-Budde, Oliver Hartkopp,
Oleksij Rempel
In-Reply-To: <20260610-feat-can-diag-v1-3-021e3f1631a0@redhat.com>
Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider:
- [High] The `can_diag` module fails to enforce a dependency on the core `can` module, leading to a crash when locking an uninitialized mutex.
- [Medium] Binary structures are embedded directly inside Netlink attributes, violating Netlink design rules.
--
commit 058ea09f4025994f935c1bd793eb19a873974e20
Author: Filippo Storniolo <fstornio@redhat.com>
can: add can diag interface
Add the can_diag interface for querying sockets from userspace.
ss(8) tool can use this interface to list open sockets. The userspace ABI
is defined in <linux/can_diag.h> and includes netlink request and response
structs.
> diff --git a/include/uapi/linux/can/diag.h b/include/uapi/linux/can/diag.h
> new file mode 100644
> index 0000000000000..7f99f85c42425
> --- /dev/null
> +++ b/include/uapi/linux/can/diag.h
[ ... ]
> +struct can_diag_j1939_addr {
> + __u64 name;
> + __u32 pgn;
> + __u8 addr;
> +};
[Severity: Medium]
Does this violate the Netlink subsystem design guidelines?
The Netlink documentation states to avoid binary structures inside attributes
and suggests breaking each member into its own attribute.
Also, does this structure have implicit trailing padding? Without explicit
padding after the 1-byte field, we might run into architecture-dependent size
differences, breaking 32-bit vs 64-bit compatibility.
[ ... ]
> diff --git a/net/can/diag.c b/net/can/diag.c
> new file mode 100644
> index 0000000000000..eb623ed118af5
> --- /dev/null
> +++ b/net/can/diag.c
[ ... ]
> + case CAN_J1939:
> + {
> + struct can_diag_j1939_addr j1939_addr;
> +
> + memset(&j1939_addr, 0, sizeof(j1939_addr));
> +
> + j1939_addr.name = can_addr.can_addr.j1939.name;
> + j1939_addr.pgn = can_addr.can_addr.j1939.pgn;
> + j1939_addr.addr = can_addr.can_addr.j1939.addr;
> +
> + err = nla_put(skb, CAN_DIAG_J1939_ADDR, sizeof(struct can_diag_j1939_addr),
> + &j1939_addr);
[Severity: Medium]
Does this directly write the binary struct into the Netlink message attribute?
The Netlink guidelines discourage putting binary structs into attributes for
new families.
[ ... ]
> +static int can_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +{
> + int num = 0, s_num = cb->args[0];
> + struct can_diag_req *req;
> + struct net *net;
> + struct sock *sk;
> +
> + net = sock_net(skb->sk);
> + req = nlmsg_data(cb->nlh);
> +
> + mutex_lock(&net->can.sklist_lock);
[Severity: High]
Can this attempt to lock an uninitialized mutex?
Since can_diag.ko does not call any exported symbols from can.ko, the kernel
module loader will not enforce a load dependency between them. If a userspace
tool queries CAN sockets, the kernel might autoload can_diag.ko without loading
can.ko.
Because can.ko is responsible for registering the pernet operations that
initialize net->can.sklist_lock, can_diag_dump() could end up calling
mutex_lock() on a zeroed, uninitialized mutex, leading to lockdep warnings
or crashes.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260610-feat-can-diag-v1-0-021e3f1631a0@redhat.com?part=3
^ permalink raw reply
* Re: [PATCH v1] can: flexcan: Drop unused platform driver data
From: Uwe Kleine-König (The Capable Hub) @ 2026-06-10 17:27 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Vincent Mailhol, linux-can, linux-kernel, Rob Herring,
Geert Uytterhoeven, linux-m68k
In-Reply-To: <ahhAhZ76nLc-eZg8@monoceros>
[-- Attachment #1: Type: text/plain, Size: 1003 bytes --]
Hey Marc,
On Thu, May 28, 2026 at 03:26:29PM +0200, Uwe Kleine-König (The Capable Hub) wrote:
> On Thu, May 28, 2026 at 01:00:15PM +0200, Marc Kleine-Budde wrote:
> > On 28.05.2026 11:45:39, Uwe Kleine-König (The Capable Hub) wrote:
> > > No function in the flexcan driver ever accesses the .driver_data member
> > > of its .id_table array as device_get_match_data() returns NULL for
> > > non-of platform devices. So drop that unused assignment.
> > >
> > > While touching that array, drop the trailing comma after the terminator
> > > entry.
> >
> > Does this mean that commit 5e6c3454b405 ("net: can: Use
> > device_get_match_data()") introduced a regression?
>
> Yes, I'd claim that this commit introduces a null pointer exception when
> mcf_flexcan0 (defined in arch/m68k/coldfire/device.c) probes.
I wonder what your plan is here. Should I send a v2 that also drops the
unused variable that I missed in my v1, or do you want this NPE
addressed first?
Best regards
Uwe
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* [PATCH can-next 3/3] can: add can diag interface
From: Filippo Storniolo @ 2026-06-10 17:24 UTC (permalink / raw)
To: Oliver Hartkopp, Marc Kleine-Budde, Robin van der Gracht,
Oleksij Rempel, kernel, Urs Thuermann, Paolo Abeni, Simon Horman
Cc: linux-can, Filippo Storniolo, Davide Caratti
In-Reply-To: <20260610-feat-can-diag-v1-0-021e3f1631a0@redhat.com>
Add the can_diag interface for querying sockets from userspace.
ss(8) tool can use this interface to list open sockets.
The userspace ABI is defined in <linux/can_diag.h> and includes
netlink request and response structs. The request queries open
can sockets and the response contains socket information fields
including the interface index for bound sockets, inode number,
transport protocol etc.
Support can be added later by extending can_diag_dump().
Suggested-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Filippo Storniolo <fstornio@redhat.com>
---
MAINTAINERS | 1 +
include/uapi/linux/can/diag.h | 55 +++++++++++++
net/can/Kconfig | 10 +++
net/can/Makefile | 3 +
net/can/diag.c | 184 ++++++++++++++++++++++++++++++++++++++++++
5 files changed, 253 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index ca6c7425b45f74830612896d01eb397e2529f43b..6c1342cee5067717518add8a0d85e82b8e2b781e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5736,6 +5736,7 @@ F: include/net/can.h
F: include/net/netns/can.h
F: include/uapi/linux/can.h
F: include/uapi/linux/can/bcm.h
+F: include/uapi/linux/can/diag.h
F: include/uapi/linux/can/gw.h
F: include/uapi/linux/can/isotp.h
F: include/uapi/linux/can/raw.h
diff --git a/include/uapi/linux/can/diag.h b/include/uapi/linux/can/diag.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f99f85c42425062e69f961693da17965a49e7ab
--- /dev/null
+++ b/include/uapi/linux/can/diag.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_CAN_DIAG_H_
+#define _UAPI_CAN_DIAG_H_
+
+#include <linux/types.h>
+#include <linux/can.h>
+
+/* Request */
+struct can_diag_req {
+ __u8 sdiag_family; /* must be AF_CAN */
+ __u8 sdiag_protocol; /* for future filtering of transport protocols */
+ __u16 pad;
+ __u32 cdiag_states;
+ __u32 cdiag_ino;
+ __u32 cdiag_show;
+ __u32 cdiag_cookie[2];
+};
+
+enum {
+ CAN_DIAG_UNSPEC,
+ CAN_DIAG_UID,
+ CAN_DIAG_ISOTP_ADDR,
+ CAN_DIAG_J1939_ADDR,
+
+ __CAN_DIAG_MAX,
+};
+
+#define CAN_DIAG_MAX (__CAN_DIAG_MAX - 1)
+
+/* Response */
+struct can_diag_msg {
+ __u8 cdiag_family; /* AF_CAN */
+ __u8 cdiag_state;
+ __u16 cdiag_protocol;
+ __u16 cdiag_type;
+ __u16 pad16;
+ __u32 cdiag_cookie[2];
+ __s32 cdiag_ifindex;
+ __u32 pad32;
+ __u64 cdiag_ino;
+};
+
+struct can_diag_isotp_addr {
+ canid_t tx_id;
+ canid_t rx_id;
+};
+
+struct can_diag_j1939_addr {
+ __u64 name;
+ __u32 pgn;
+ __u8 addr;
+};
+
+#endif /* _UAPI_CAN_DIAG_H_ */
diff --git a/net/can/Kconfig b/net/can/Kconfig
index abbb4be7ad2152c66abd2e28dbbd5282ec45d065..ed210d62da7c28798d2f6f1967739e1621093cbf 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -70,4 +70,14 @@ config CAN_ISOTP
as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN
traffic.
+config CAN_DIAG
+ tristate "CAN socket monitoring interface"
+ depends on CAN
+ default m
+ help
+ Support for CAN socket monitoring interface used by the ss tool.
+ If unsure, say Y.
+
+ Enable this module so userspace applications can query open sockets.
+
endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 58f2c31c1ef377ef8c5385211e7182d42ac2225a..c0ddeb9a012c8200468be01015da71ec25e96b66 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -20,3 +20,6 @@ obj-$(CONFIG_CAN_J1939) += j1939/
obj-$(CONFIG_CAN_ISOTP) += can-isotp.o
can-isotp-y := isotp.o
+
+obj-$(CONFIG_CAN_DIAG) += can-diag.o
+can-diag-y := diag.o
diff --git a/net/can/diag.c b/net/can/diag.c
new file mode 100644
index 0000000000000000000000000000000000000000..eb623ed118af57d260a86a18106bb838503ec76c
--- /dev/null
+++ b/net/can/diag.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2026 Red Hat
+ * Author: Filippo Storniolo <fstornio@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/can.h>
+#include <linux/can/diag.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <linux/netdevice.h>
+#include <linux/user_namespace.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ struct can_diag_req *req, /* will be used for filtering */
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, u64 sk_ino)
+{
+ struct sockaddr_can can_addr;
+ struct can_diag_msg *rep;
+ struct nlmsghdr *nlh;
+ uid_t uid;
+ int err;
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ memset(rep, 0, sizeof(struct can_diag_msg));
+
+ rep->cdiag_family = AF_CAN;
+ rep->cdiag_type = sk->sk_type;
+ rep->cdiag_ino = sk_ino;
+ rep->cdiag_protocol = sk->sk_protocol;
+ rep->cdiag_state = READ_ONCE(sk->sk_state);
+ sock_diag_save_cookie(sk, rep->cdiag_cookie);
+
+ uid = from_kuid_munged(user_ns, sk_uid(sk));
+ err = nla_put(skb, CAN_DIAG_UID, sizeof(uid_t), &uid);
+ if (err < 0)
+ goto cancel_nlmsg_err;
+
+ memset(&can_addr, 0, sizeof(can_addr));
+
+ err = kernel_getsockname(sk->sk_socket, (struct sockaddr *)&can_addr);
+ if (err < 0) {
+ /* Some protocols (e.g. CAN_BCM) do not implement kernel_getsockname().
+ * No error returned because the netlink message is still valid.
+ */
+ if (err == -EOPNOTSUPP)
+ goto exit_no_err;
+
+ goto cancel_nlmsg_err;
+ }
+
+ rep->cdiag_ifindex = can_addr.can_ifindex;
+
+ switch (sk->sk_protocol) {
+ case CAN_ISOTP:
+ {
+ struct can_diag_isotp_addr isotp_addr;
+
+ memset(&isotp_addr, 0, sizeof(isotp_addr));
+
+ isotp_addr.tx_id = can_addr.can_addr.tp.tx_id;
+ isotp_addr.rx_id = can_addr.can_addr.tp.rx_id;
+
+ err = nla_put(skb, CAN_DIAG_ISOTP_ADDR, sizeof(struct can_diag_isotp_addr),
+ &isotp_addr);
+ }
+ break;
+ case CAN_J1939:
+ {
+ struct can_diag_j1939_addr j1939_addr;
+
+ memset(&j1939_addr, 0, sizeof(j1939_addr));
+
+ j1939_addr.name = can_addr.can_addr.j1939.name;
+ j1939_addr.pgn = can_addr.can_addr.j1939.pgn;
+ j1939_addr.addr = can_addr.can_addr.j1939.addr;
+
+ err = nla_put(skb, CAN_DIAG_J1939_ADDR, sizeof(struct can_diag_j1939_addr),
+ &j1939_addr);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (err < 0)
+ goto cancel_nlmsg_err;
+
+exit_no_err:
+ nlmsg_end(skb, nlh);
+ return 0;
+
+cancel_nlmsg_err:
+ nlmsg_cancel(skb, nlh);
+ return err;
+}
+
+static int can_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int num = 0, s_num = cb->args[0];
+ struct can_diag_req *req;
+ struct net *net;
+ struct sock *sk;
+
+ net = sock_net(skb->sk);
+ req = nlmsg_data(cb->nlh);
+
+ mutex_lock(&net->can.sklist_lock);
+ sk_for_each(sk, &net->can.sklist) {
+ if (num < s_num)
+ goto next;
+
+ if (sk_diag_fill(sk, skb, req,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0)
+ goto done;
+next:
+ num++;
+ }
+done:
+ mutex_unlock(&net->can.sklist_lock);
+ cb->args[0] = num;
+
+ return skb->len;
+}
+
+static int can_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct can_diag_req);
+ struct net *net = sock_net(skb->sk);
+ struct can_diag_req *req;
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ req = nlmsg_data(h);
+ if (req->sdiag_protocol)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = can_diag_dump
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler can_diag_handler = {
+ .owner = THIS_MODULE,
+ .family = AF_CAN,
+ .dump = can_diag_handler_dump,
+};
+
+static int __init can_diag_init(void)
+{
+ pr_info("can: diagnostic module\n");
+ return sock_diag_register(&can_diag_handler);
+}
+
+static void __exit can_diag_exit(void)
+{
+ sock_diag_unregister(&can_diag_handler);
+}
+
+module_init(can_diag_init);
+module_exit(can_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CAN socket monitoring via SOCK_DIAG");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_CAN);
--
2.53.0
^ permalink raw reply related
* [PATCH can-next 0/3] Introduce diag support for CAN
From: Filippo Storniolo @ 2026-06-10 17:24 UTC (permalink / raw)
To: Oliver Hartkopp, Marc Kleine-Budde, Robin van der Gracht,
Oleksij Rempel, kernel, Urs Thuermann, Paolo Abeni, Simon Horman
Cc: linux-can, Filippo Storniolo, Davide Caratti
In the current state of the art, it is not possible for userspace to use
tool like ss(8) to query open AF_CAN sockets.
This patch series adds the netlink can_diag interface for AF_CAN. Userspace
is now able to send a netlink request to request information about open
AF_CAN sockets that are retrieved by the can-diag.ko module.
Patch 1 is a fix that is mandatory to the correct behaviour of the diag
module: the field sk_protocol is not assigned during `can_create()` instead
of being assigned only in case the CAN socket belong to the j1939 transport
protocol. This is needed to the correct filtering of the can diag module.
Patch 2 introduces a list per network namespace containing open CAN sockets.
The list is protected by a mutex.
CAN sockets are added in `can_create()` to the list and removed from it
in the `release()` calls defined for each transport protocol.
Patch 3 is the core patch that introduces the userspace structures to query
and receive netlink messages for CAN sockets and the actual CAN diag module.
The latter will use the userspace structures defined in <linux/can/diag.h>
to filter and retrieve open CAN sockets.
In the current implementation, every CAN sockets in the list is
returned to the userspace, however a filtering could be implemented:
Userspace could possibly request only bound CAN sockets or CAN sockets
belonging to a specific transport protocol (raw, iso-tp, j1939, bcm)
and so on.
---
Change w.r.t. RFC:
- header relocation, from linux/can_diag.h to linux/can/diag.h
(Oliver Hartkopp)
- file renaming, from net/can/can-diag.c to net/can/diag.c
(Oliver Hartkopp)
- guard renaming, from _UAPI__CAN_DIAG_H__ to _UAPI_CAN_DIAG_H_
(Oliver Hartkopp)
- handled J1939 addressing in sockaddr_can
(Oliver Hartkopp)
Signed-off-by: Filippo Storniolo <fstornio@redhat.com>
---
Davide Caratti (1):
af_can: ensure sk_protocol is always set on socket creation
Filippo Storniolo (2):
af_can: store socket pointers in struct netns_can
can: add can diag interface
MAINTAINERS | 1 +
include/linux/can/core.h | 9 +++
include/net/netns/can.h | 6 ++
include/uapi/linux/can/diag.h | 55 +++++++++++++
net/can/Kconfig | 10 +++
net/can/Makefile | 3 +
net/can/af_can.c | 38 +++++++++
net/can/bcm.c | 2 +
net/can/diag.c | 184 ++++++++++++++++++++++++++++++++++++++++++
net/can/isotp.c | 2 +
net/can/j1939/socket.c | 3 +-
net/can/raw.c | 2 +
12 files changed, 314 insertions(+), 1 deletion(-)
---
base-commit: 0cf905cb9a12dbfb5d14896729b74508f83f73df
change-id: 20260529-feat-can-diag-d7acbf5e22c2
Best regards,
--
Filippo Storniolo <fstornio@redhat.com>
^ permalink raw reply
* [PATCH can-next 2/3] af_can: store socket pointers in struct netns_can
From: Filippo Storniolo @ 2026-06-10 17:24 UTC (permalink / raw)
To: Oliver Hartkopp, Marc Kleine-Budde, Robin van der Gracht,
Oleksij Rempel, kernel, Urs Thuermann, Paolo Abeni, Simon Horman
Cc: linux-can, Filippo Storniolo
In-Reply-To: <20260610-feat-can-diag-v1-0-021e3f1631a0@redhat.com>
AF_CAN sockets need to be stored in the netns_can structure
in order to be retrieved by the CAN diagnostic module when
a netlink request message is issued by the userspace.
On socket creation (`can_create()`), add the pointer to the
new socket to `netns_can::sk_list`. During socket release
(`isotp_release()`, `raw_release()`, `j1939_release()`,
`bcm_release()`), remove the corresponding pointer from
this list.
Since this is a prerequisite of the CAN diagnostic module,
deletes and insert operations are conditioned by
IS_ENABLED(CONFIG_CAN_DIAG).
Signed-off-by: Filippo Storniolo <fstornio@redhat.com>
---
include/linux/can/core.h | 9 +++++++++
include/net/netns/can.h | 6 ++++++
net/can/af_can.c | 37 +++++++++++++++++++++++++++++++++++++
net/can/bcm.c | 2 ++
net/can/isotp.c | 2 ++
net/can/j1939/socket.c | 2 ++
net/can/raw.c | 2 ++
7 files changed, 60 insertions(+)
diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 3287232e3cad92b8514bdad996fbaa02c8fe53fe..0c658ee4aab286e48eb5815640fedbd98a90b92a 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -61,4 +61,13 @@ extern int can_send(struct sk_buff *skb, int loop);
void can_set_skb_uid(struct sk_buff *skb);
void can_sock_destruct(struct sock *sk);
+/* function prototypes for the CAN diag module */
+#if IS_ENABLED(CONFIG_CAN_DIAG)
+void can_add_sock_sklist(struct sock *sk);
+void can_remove_sock_sklist(struct sock *sk);
+#else
+#define can_add_sock_sklist(sk)
+#define can_remove_sock_sklist(sk)
+#endif
+
#endif /* !_CAN_CORE_H */
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index 48b79f7e6236d9b4cf4bd57655f89e161faff05d..bcafff5e66697faf065b4a6b4ab009ce0c62dee7 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -36,6 +36,12 @@ struct netns_can {
/* CAN GW per-net gateway jobs */
struct hlist_head cgw_list;
+
+#if IS_ENABLED(CONFIG_CAN_DIAG)
+ /* CAN diag support */
+ struct mutex sklist_lock;
+ struct hlist_head sklist;
+#endif
};
#endif /* __NETNS_CAN_H__ */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 65af259469854fc19cda0d7dd01c7b4da2ec0f4c..8e1be301fa0ca9dba4cbf1bbd27bfc62ddb19f18 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -111,6 +111,32 @@ static inline void can_put_proto(const struct can_proto *cp)
module_put(cp->prot->owner);
}
+#if IS_ENABLED(CONFIG_CAN_DIAG)
+void can_add_sock_sklist(struct sock *sk)
+{
+ struct net *net;
+
+ net = sock_net(sk);
+
+ mutex_lock(&net->can.sklist_lock);
+ sk_add_node(sk, &net->can.sklist);
+ mutex_unlock(&net->can.sklist_lock);
+}
+EXPORT_SYMBOL(can_add_sock_sklist);
+
+void can_remove_sock_sklist(struct sock *sk)
+{
+ struct net *net;
+
+ net = sock_net(sk);
+
+ mutex_lock(&net->can.sklist_lock);
+ sk_del_node_init(sk);
+ mutex_unlock(&net->can.sklist_lock);
+}
+EXPORT_SYMBOL(can_remove_sock_sklist);
+#endif
+
static int can_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
@@ -174,6 +200,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
sock_put(sk);
sock->sk = NULL;
} else {
+ can_add_sock_sklist(sk);
+
sock_prot_inuse_add(net, sk->sk_prot, 1);
}
@@ -821,6 +849,11 @@ static int can_pernet_init(struct net *net)
can_init_proc(net);
}
+#if IS_ENABLED(CONFIG_CAN_DIAG)
+ mutex_init(&net->can.sklist_lock);
+ INIT_HLIST_HEAD(&net->can.sklist);
+#endif
+
return 0;
out_free_pkg_stats:
@@ -842,6 +875,10 @@ static void can_pernet_exit(struct net *net)
kfree(net->can.rx_alldev_list);
kfree(net->can.pkg_stats);
kfree(net->can.rcv_lists_stats);
+
+#if IS_ENABLED(CONFIG_CAN_DIAG)
+ WARN_ON_ONCE(!hlist_empty(&net->can.sklist));
+#endif
}
/* af_can module init/exit functions */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a4bef2c48a559cf7b0fa1d00d1fb1187e31359d8..63515306f499a602a58f7d1f103ff3a95f239e0a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1594,6 +1594,8 @@ static int bcm_release(struct socket *sock)
if (!sk)
return 0;
+ can_remove_sock_sklist(sk);
+
net = sock_net(sk);
bo = bcm_sk(sk);
diff --git a/net/can/isotp.c b/net/can/isotp.c
index c48b4a818297e2a1348a2b64016d0f4ff613e683..91736b28e7bfbed3e915666a2ae9d60ac0cec193 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1201,6 +1201,8 @@ static int isotp_release(struct socket *sock)
so = isotp_sk(sk);
net = sock_net(sk);
+ can_remove_sock_sklist(sk);
+
/* wait for complete transmission of current pdu */
while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 &&
cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index ccd43ff5519c72f209b53bf74b833acd6ffe9728..cbfd0b8887680b48f4b941b851080f69b465433e 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -641,6 +641,8 @@ static int j1939_sk_release(struct socket *sock)
if (!sk)
return 0;
+ can_remove_sock_sklist(sk);
+
lock_sock(sk);
jsk = j1939_sk(sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index a26942e78e688721c9306f4e3fcbf25cd395a127..6cf47c53eb5336d15b4303612a960a833c62eade 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -418,6 +418,8 @@ static int raw_release(struct socket *sock)
ro = raw_sk(sk);
net = sock_net(sk);
+ can_remove_sock_sklist(sk);
+
spin_lock(&raw_notifier_lock);
while (raw_busy_notifier == ro) {
spin_unlock(&raw_notifier_lock);
--
2.53.0
^ permalink raw reply related
* [PATCH can-next 1/3] af_can: ensure sk_protocol is always set on socket creation
From: Filippo Storniolo @ 2026-06-10 17:24 UTC (permalink / raw)
To: Oliver Hartkopp, Marc Kleine-Budde, Robin van der Gracht,
Oleksij Rempel, kernel, Urs Thuermann, Paolo Abeni, Simon Horman
Cc: linux-can, Filippo Storniolo, Davide Caratti
In-Reply-To: <20260610-feat-can-diag-v1-0-021e3f1631a0@redhat.com>
From: Davide Caratti <dcaratti@redhat.com>
Currently, only j1939 assigns a value to 'sk_protocol'. As a prerequisite
for the implementation of CAN sockets diagnostics, AF_CAN sockets need an
easy way to determine which protocol is on top of an existing socket.
POC test using can-tests:
| # perf probe -m can_j1939 --add "j1939_sk_recvmsg sock->sk->sk_protocol"
| # perf record -e probe:j1939_sk_recvmsg -aR -- ./j1939/tst-j1939-ac
| [...]
| # perf script
| tst-j1939-ac 5807 [002] 322767.312599: probe:j1939_sk_recvmsg: (ffffffffc0b29a14) sk_protocol=0x7
| ^^^ 0x7, that's CAN_J1939
| # perf probe -m can_raw --add "raw_recvmsg sock->sk->sk_protocol"
| # perf record -e probe:raw_recvmsg -aR ./netlayer/tst-rcv-own-msgs vcan0
| # perf script
| [...]
| tst-rcv-own-msg 5816 [001] 323173.651122: probe:raw_recvmsg: (ffffffffc0b20154) sk_protocol=0x1
| ^^^ 0x1, that's CAN_RAW. It was 0x0 on unpatched kernel
Storing the "protocol" value in can_create() fixes the problem. This also
fixes AF_CAN support for the following system call:
| getsockopt(..., SOL_SOCKET, SO_PROTOCOL, &proto, sizeof(proto))
that was assigning 0 to 'proto' since the earliest kernel versions.
Fixes: 0d66548a10cb ("[CAN]: Add PF_CAN core module")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
net/can/af_can.c | 1 +
net/can/j1939/socket.c | 1 -
2 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7bc86b176b4de32dddeab5a42a420d62434fcd7f..65af259469854fc19cda0d7dd01c7b4da2ec0f4c 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -162,6 +162,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
}
sock_init_data(sock, sk);
+ sk->sk_protocol = protocol;
sk->sk_destruct = can_sock_destruct;
if (sk->sk_prot->init)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 50a598ef5fd4a5f5e007816a341e04ddbcc724e6..ccd43ff5519c72f209b53bf74b833acd6ffe9728 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -420,7 +420,6 @@ static int j1939_sk_init(struct sock *sk)
/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
sock_set_flag(sk, SOCK_RCU_FREE);
sk->sk_destruct = j1939_sk_sock_destruct;
- sk->sk_protocol = CAN_J1939;
return 0;
}
--
2.53.0
^ permalink raw reply related
* Re: [PATCH v2] can: bcm: fix CAN frame rx/tx statistics
From: sashiko-bot @ 2026-06-10 16:53 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Vincent Mailhol, Oleksij Rempel, linux-can, Marc Kleine-Budde
In-Reply-To: <20260610163744.51980-1-socketcan@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
New issues:
- [Medium] Lockless sequential resetting of related statistics creates a race condition leading to invalid procfs output.
Pre-existing issues:
- [High] Missing synchronize_rcu() in bcm_delete_rx_op() causes Use-After-Free of bcm_op due to concurrent timer re-arming.
- [High] Incomplete fix for concurrent rx data races leaves CAN frame payload and timestamps vulnerable to corruption.
--
commit da9dc73034abe67929230960f0d96f1b0cc6b4c0
Author: Oliver Hartkopp <socketcan@hartkopp.net>
can: bcm: fix CAN frame rx/tx statistics
KCSAN detected a data race within the bcm_rx_handler() when two CAN frames
have been simultaneously received and processed in a single rx op by two
different CPUs. This patch uses atomic operations with (signed) long data
types to access the statistics in the hot path.
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index a4bef2c48a559..e7b1eb61918f5 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
[ ... ]
> @@ -249,9 +252,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
> (long long)ktime_to_us(op->kt_ival2));
>
> seq_printf(m, "# recv %ld (%ld) => reduction: ",
> - op->frames_filtered, op->frames_abs);
> + frames_filtered, frames_abs);
>
> - reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
> + reduction = 100 - (frames_filtered * 100) / frames_abs;
>
> seq_printf(m, "%s%ld%%\n",
> (reduction == 100) ? "near " : "", reduction);
[Severity: Medium]
Could this reduction calculation produce a wildly incorrect negative
percentage if the counters become desynchronized?
[ ... ]
> @@ -285,6 +289,15 @@ static int bcm_proc_show(struct seq_file *m, void *v)
> }
> #endif /* CONFIG_PROC_FS */
>
> +static void bcm_update_stats(struct bcm_op *op)
> +{
> + /* prevent statistics overflow */
> + if (atomic_long_inc_return(&op->frames_abs) > LONG_MAX / 100) {
> + atomic_long_set(&op->frames_filtered, 0);
> + atomic_long_set(&op->frames_abs, 0);
> + }
> +}
[Severity: Medium]
Does sequentially resetting these atomic counters introduce a race window?
If a thread is preempted between resetting frames_filtered and frames_abs,
could another thread process numerous CAN frames and increment both counters,
leaving frames_filtered at a large value when the delayed thread resumes and
resets only frames_abs?
[ ... ]
> @@ -723,7 +733,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
> /* save originator for recvfrom() */
> op->rx_ifindex = skb->dev->ifindex;
> /* update statistics */
> - op->frames_abs++;
> + bcm_update_stats(op);
[Severity: High]
This isn't a bug introduced by this patch, but since bcm_rx_handler() can
execute concurrently on multiple CPUs, does this leave the rest of the
shared bcm_op state vulnerable?
Concurrent lockless writes to op->rx_stamp and op->rx_ifindex could cause
torn reads.
Additionally, bcm_rx_update_and_send() performs a non-atomic memcpy()
to shared memory:
net/can/bcm.c:bcm_rx_update_and_send() {
...
memcpy(lastdata, rxdata, op->cfsiz);
lastdata->flags |= (RX_RECV|RX_THR);
lastdata->flags |= traffic_flags;
...
}
Can this corrupt the CAN frame payload delivered to userspace?
[Severity: High]
This is a pre-existing issue, but is there a use-after-free risk during
RX operation removal?
When bcm_delete_rx_op() removes an RX operation, it defers subscriber
removal to an RCU callback via can_rx_unregister(). It then calls
bcm_remove_op() which cancels timers and frees the bcm_op via call_rcu()
without waiting for existing readers:
net/can/bcm.c:bcm_delete_rx_op() {
...
can_rx_unregister(sock_net(op->sk), NULL, op->can_id,
REGMASK(op->can_id), bcm_rx_handler, op);
list_del_rcu(&op->list);
bcm_remove_op(op);
...
}
Could a concurrently executing bcm_rx_handler() subsequently call
bcm_rx_update_and_send(), which unconditionally executes
hrtimer_start(&op->thrtimer, ...)?
Does this re-arm the timer after it was cancelled, causing a use-after-free
when the timer fires after the bcm_op memory is freed at the end of the RCU
grace period?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260610163744.51980-1-socketcan@hartkopp.net?part=1
^ permalink raw reply
* Re: [PATCH] can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
From: sashiko-bot @ 2026-06-10 16:51 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Oleksij Rempel, linux-can, Vincent Mailhol, Marc Kleine-Budde
In-Reply-To: <20260610163744.51980-2-socketcan@hartkopp.net>
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
Pre-existing issues:
- [High] Data race allowing transmission of unvalidated CAN frames with out-of-bounds lengths.
- [Medium] Missing RCU list traversal primitive for tx_ops in bcm_proc_show().
- [Medium] Unrecoverable state corruption upon validation failure during BCM operation update.
--
commit 6728a4ac4d52665b9a664d80e0c10b8ebce20e70
Author: Oliver Hartkopp <socketcan@hartkopp.net>
can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
This patch moves the list_add_rcu() calls in bcm_tx_setup() and
bcm_rx_setup() to the end of the initialization code. This ensures the
bcm_op structure is fully initialized before it becomes visible to RCU
readers like bcm_proc_show().
Link: https://lore.kernel.org/linux-can/20260610094654.A1FFE1F00893@smtp.kernel.org/
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index a4bef2c48a559..7215fa32832c8 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
> @@ -930,6 +930,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> struct bcm_sock *bo = bcm_sk(sk);
> struct bcm_op *op;
> struct canfd_frame *cf;
> + bool add_op_to_list = false;
> unsigned int i;
> int err;
[Severity: High]
This is a pre-existing issue, but is it possible for an unvalidated
CAN frame to be transmitted here?
When updating an existing operation in bcm_tx_setup(), the user data is
copied directly into the active transmission buffer before validation:
err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
if (op->flags & CAN_FD_FRAME) {
if (cf->len > 64)
err = -EINVAL;
}
Concurrently, the softirq timer bcm_tx_timeout_handler() could fire,
calling bcm_can_tx(), which fetches this frame and sends it without locks:
skb_put_data(skb, cf, op->cfsiz);
err = can_send(skb, 1);
Could a malicious user craft a frame with an out-of-bounds length
(e.g., cf->len = 255) that gets fetched and transmitted before
bcm_tx_setup() validates it and aborts? This could potentially cause
crashes or out-of-bounds memory reads in downstream CAN drivers.
[Severity: Medium]
This is a pre-existing issue, but does a failed validation leave the socket
in an unrecoverable corrupted state?
In both bcm_tx_setup() and bcm_rx_setup(), if an existing operation is being
updated and the validation fails after the memcpy_from_msg() block:
err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz);
if (err < 0)
return err;
The function returns an error without reverting the changes to op->frames.
Because the timers might still be running for the active bcm_op, the socket
seems permanently stuck transmitting or filtering based on this partially
overwritten data.
[ ... ]
> @@ -1093,6 +1093,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
> op->flags |= TX_ANNOUNCE;
> }
>
> + /* add this bcm_op to the list of the tx_ops? */
> + if (add_op_to_list)
> + list_add_rcu(&op->list, &bo->tx_ops);
> +
[Severity: Medium]
This is a pre-existing issue, but since we are now using list_add_rcu()
here, should the iteration in bcm_proc_show() also use the RCU primitive?
bcm_proc_show() traverses the bo->tx_ops list under rcu_read_lock(), but
currently uses the standard list macro:
list_for_each_entry(op, &bo->tx_ops, list) {
Without list_for_each_entry_rcu(), it omits the necessary rcu_dereference()
calls and memory barriers. On weakly ordered architectures, couldn't this
lead to reading torn pointers or uninitialized memory of a newly inserted
bcm_op, potentially causing a crash?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260610163744.51980-2-socketcan@hartkopp.net?part=1
^ permalink raw reply
* [PATCH v2] can: bcm: fix CAN frame rx/tx statistics
From: Oliver Hartkopp @ 2026-06-10 16:37 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp
KCSAN detected a data race within the bcm_rx_handler() when two CAN frames
have been simultaneously received and processed in a single rx op by two
different CPUs.
Use atomic operations with (signed) long data types to access the
statistics in the hot path to fix the KCSAN complaint.
Additionally simplify the update and check of statistics overflow by
using the atomic operations in a separate bcm_update_stats() function.
Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 658c4a7a55df..e5db4dca8d5a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -106,11 +106,11 @@ struct bcm_op {
struct list_head list;
struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
- unsigned long frames_abs, frames_filtered;
+ atomic_long_t frames_abs, frames_filtered;
struct bcm_timeval ival1, ival2;
struct hrtimer timer, thrtimer;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
int rx_ifindex;
int cfsiz;
@@ -223,14 +223,17 @@ static int bcm_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
list_for_each_entry_rcu(op, &bo->rx_ops, list) {
- unsigned long reduction;
+ long reduction, frames_filtered, frames_abs;
+
+ frames_filtered = atomic_long_read(&op->frames_filtered);
+ frames_abs = atomic_long_read(&op->frames_abs);
/* print only active entries & prevent division by zero */
- if (!op->frames_abs)
+ if (!frames_abs)
continue;
seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
bcm_proc_getifname(net, ifname, op->ifindex));
@@ -248,13 +251,13 @@ static int bcm_proc_show(struct seq_file *m, void *v)
if (op->kt_ival2)
seq_printf(m, "thr=%lld ",
(long long)ktime_to_us(op->kt_ival2));
seq_printf(m, "# recv %ld (%ld) => reduction: ",
- op->frames_filtered, op->frames_abs);
+ frames_filtered, frames_abs);
- reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
+ reduction = 100 - (frames_filtered * 100) / frames_abs;
seq_printf(m, "%s%ld%%\n",
(reduction == 100) ? "near " : "", reduction);
}
@@ -274,20 +277,30 @@ static int bcm_proc_show(struct seq_file *m, void *v)
if (op->kt_ival2)
seq_printf(m, "t2=%lld ",
(long long)ktime_to_us(op->kt_ival2));
- seq_printf(m, "# sent %ld\n", op->frames_abs);
+ seq_printf(m, "# sent %ld\n",
+ atomic_long_read(&op->frames_abs));
}
seq_putc(m, '\n');
rcu_read_unlock();
return 0;
}
#endif /* CONFIG_PROC_FS */
+static void bcm_update_stats(struct bcm_op *op)
+{
+ /* prevent statistics overflow */
+ if (atomic_long_inc_return(&op->frames_abs) > LONG_MAX / 100) {
+ atomic_long_set(&op->frames_filtered, 0);
+ atomic_long_set(&op->frames_abs, 0);
+ }
+}
+
/*
* bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
* of the given bcm tx op
*/
static void bcm_can_tx(struct bcm_op *op)
@@ -334,11 +347,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* update currframe and count under lock protection */
spin_lock_bh(&op->bcm_tx_lock);
if (!err)
- op->frames_abs++;
+ bcm_update_stats(op);
op->currframe++;
/* reached last frame? */
if (op->currframe >= op->nframes)
@@ -487,16 +500,13 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
*/
static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
{
struct bcm_msg_head head;
- /* update statistics */
- op->frames_filtered++;
-
- /* prevent statistics overflow */
- if (op->frames_filtered > ULONG_MAX/100)
- op->frames_filtered = op->frames_abs = 0;
+ /* update statistics (frames_filtered <= frames_abs) */
+ if (atomic_long_read(&op->frames_abs))
+ atomic_long_inc(&op->frames_filtered);
/* this element is not throttled anymore */
data->flags &= ~RX_THR;
memset(&head, 0, sizeof(head));
@@ -733,11 +743,11 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
/* save rx timestamp */
op->rx_stamp = skb->tstamp;
/* save originator for recvfrom() */
op->rx_ifindex = skb->dev->ifindex;
/* update statistics */
- op->frames_abs++;
+ bcm_update_stats(op);
if (op->flags & RX_RTR_FRAME) {
/* send reply for RTR-request (placed in op->frames[0]) */
bcm_can_tx(op);
return;
--
2.53.0
^ permalink raw reply related
* [PATCH] can: bcm: use list_add_rcu() in bcm_[rx|tx]_setup()
From: Oliver Hartkopp @ 2026-06-10 16:37 UTC (permalink / raw)
To: linux-can; +Cc: Oliver Hartkopp, sashiko-reviews
In-Reply-To: <20260610163744.51980-1-socketcan@hartkopp.net>
sashiko-bot remarked the missing use of list_add_rcu() in
bcm_[rx|tx]_setup() to have a proper initialized bcm_op structure
when bcm_proc_show() traverses the bcm_op's under rcu_read_lock().
To cover all initial settings of the bcm_op's the list_add_rcu() calls
are moved to the end of the setup code.
Reported-by: sashiko-reviews@lists.linux.dev
Closes: https://lore.kernel.org/linux-can/20260610094654.A1FFE1F00893@smtp.kernel.org/
Fixes: dac5e6249159 ("can: bcm: add missing rcu read protection for procfs content")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
net/can/bcm.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index e5db4dca8d5a..76d1e067e156 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -954,10 +954,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
int ifindex, struct sock *sk)
{
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
struct canfd_frame *cf;
+ bool add_op_to_list = false;
unsigned int i;
int err;
/* we need a real device to send frames */
if (!ifindex)
@@ -1094,12 +1095,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* currently unused in tx_ops */
hrtimer_setup(&op->thrtimer, hrtimer_dummy_timeout, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
- /* add this bcm_op to the list of the tx_ops */
- list_add(&op->list, &bo->tx_ops);
+ add_op_to_list = true;
} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
if (op->flags & SETTIMER) {
/* set timer values */
@@ -1117,10 +1117,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
hrtimer_cancel(&op->timer);
/* spec: send CAN frame when starting timer */
op->flags |= TX_ANNOUNCE;
}
+ /* add this bcm_op to the list of the tx_ops? */
+ if (add_op_to_list)
+ list_add_rcu(&op->list, &bo->tx_ops);
+
if (op->flags & TX_ANNOUNCE)
bcm_can_tx(op);
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
@@ -1278,13 +1282,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
hrtimer_setup(&op->timer, bcm_rx_timeout_handler, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
hrtimer_setup(&op->thrtimer, bcm_rx_thr_handler, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_SOFT);
- /* add this bcm_op to the list of the rx_ops */
- list_add(&op->list, &bo->rx_ops);
-
/* call can_rx_register() */
do_rx_register = 1;
} /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
@@ -1359,14 +1360,16 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
err = can_rx_register(sock_net(sk), NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
- list_del_rcu(&op->list);
bcm_remove_op(op);
return err;
}
+
+ /* add this bcm_op to the list of the rx_ops */
+ list_add_rcu(&op->list, &bo->rx_ops);
}
return msg_head->nframes * op->cfsiz + MHSIZ;
}
--
2.53.0
^ permalink raw reply related
* Re: [PATCHv2 4/4] m68k: coldfire: fix non-standard readX()/writeX() functions
From: sashiko-bot @ 2026-06-10 14:27 UTC (permalink / raw)
To: Greg Ungerer
Cc: Oliver Hartkopp, Vincent Mailhol, linux-can, Frank.Li,
Oleksij Rempel, Marc Kleine-Budde, imx
In-Reply-To: <20260609142139.1563360-6-gerg@linux-m68k.org>
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
New issues:
- [High] The transition to asm-generic readX()/writeX() changes the endianness of ColdFire internal IO accesses, breaking unpatched ColdFire drivers.
Pre-existing issues:
- [High] Undefined behavior when shifting 1 by 32 or more using `BIT(ch)` in `mcf_edma_tx_handler`.
- [High] The error interrupt handler incorrectly skips handling high channels and returns `IRQ_NONE` after servicing low channels.
--
commit 25193db3eba89f780b6dc534f2f72e16f3429571
Author: Greg Ungerer <gerg@linux-m68k.org>
m68k: coldfire: fix non-standard readX()/writeX() functions
Remove the local ColdFire definitions of readb()/readw()/readl() and
writeb()/writew()/writel() and use the asm-generic versions of them.
> diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
> index 4f0f34b06e37c..2f12f4ed0da54 100644
> --- a/arch/m68k/include/asm/io_no.h
> +++ b/arch/m68k/include/asm/io_no.h
[ ... ]
> -#define writel writel
> -static inline void writel(u32 value, volatile void __iomem *addr)
> -{
> - if (cf_internalio(addr))
> - __raw_writel(value, addr);
> - else
> - __raw_writel(swab32(value), addr);
> -}
[Severity: High]
Does replacing the local readX()/writeX() with the asm-generic versions
change the endianness of unpatched ColdFire drivers?
The old ColdFire implementation detected internal IO memory using
cf_internalio() and bypassed byte-swapping to provide native big-endian
access. The asm-generic accessors always assume little-endian devices and
perform byte-swapping.
Will unpatched drivers relying on readX()/writeX() for internal IO
(such as spi-coldfire-qspi.c, sdhci-esdhc-mcf.c, or fec_main.c) now read
and write byte-swapped values and malfunction?
[ ... ]
> diff --git a/drivers/dma/mcf-edma-main.c b/drivers/dma/mcf-edma-main.c
> index 9e1c6400c77be..4ed0ce644e37e 100644
> --- a/drivers/dma/mcf-edma-main.c
> +++ b/drivers/dma/mcf-edma-main.c
> @@ -21,9 +21,9 @@ static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id)
> unsigned int ch;
> u64 intmap;
>
> - intmap = ioread32(regs->inth);
> + intmap = ioread32be(regs->inth);
> intmap <<= 32;
> - intmap |= ioread32(regs->intl);
> + intmap |= ioread32be(regs->intl);
> if (!intmap)
> return IRQ_NONE;
[Severity: High]
This is a pre-existing issue, but does this shift invoke undefined behavior
for higher channels?
Looking at the loop immediately following this in mcf_edma_tx_handler():
for (ch = 0; ch < mcf_edma->n_chans; ch++) {
if (intmap & BIT(ch)) {
...
On a 32-bit architecture like m68k, BIT(ch) expands to a 32-bit unsigned
long. Since ch can be up to 64, shifting by 32 or more would be undefined,
potentially causing channels 32-63 to be improperly processed or missing
their interrupts entirely.
Should this use BIT_ULL(ch) when testing against the 64-bit intmap?
[ ... ]
> @@ -43,7 +43,7 @@ static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
> struct edma_regs *regs = &mcf_edma->regs;
> unsigned int err, ch;
>
> - err = ioread32(regs->errl);
> + err = ioread32be(regs->errl);
> if (!err)
> return IRQ_NONE;
[Severity: High]
This is a pre-existing issue, but does this early return incorrectly skip
handling high channels?
If regs->errl is zero, it returns IRQ_NONE immediately, ignoring any errors
in regs->errh. This could leave errors in higher channels uncleared.
[ ... ]
> @@ -55,7 +55,7 @@ static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
> }
> }
>
> - err = ioread32(regs->errh);
> + err = ioread32be(regs->errh);
> if (!err)
> return IRQ_NONE;
[Severity: High]
This is a pre-existing issue, but does this return the wrong IRQ status if
low channel errors were already handled?
If errl was non-zero (so some errors were handled) but errh is zero,
returning IRQ_NONE here instead of IRQ_HANDLED might incorrectly signal
to the IRQ subsystem that the interrupt wasn't handled at all.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260609142139.1563360-6-gerg@linux-m68k.org?part=1
^ permalink raw reply
page: next (older)
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox