All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/1] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
@ 2026-05-26 16:52 Lee Jones
  2026-06-03 12:33 ` Oliver Hartkopp
  0 siblings, 1 reply; 2+ messages in thread
From: Lee Jones @ 2026-05-26 16:52 UTC (permalink / raw)
  To: lee, Oliver Hartkopp, Marc Kleine-Budde, linux-can, linux-kernel

Commit f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly
synchronize_rcu()") replaced synchronize_rcu() in bcm_delete_rx_op()
with call_rcu() and introduced the RX_NO_AUTOTIMER flag.

However, this flag check was omitted for thrtimer in the packet rx
fast-path. During BCM RX operation teardown, a concurrent RCU reader
(bcm_rx_handler) can race and re-arm thrtimer via
bcm_rx_update_and_send() after call_rcu() has been scheduled.  Once
the RCU grace period elapses, bcm_op is freed.  The subsequently
firing thrtimer then dereferences the deallocated op, causing a UAF.

Adding flag checks to the rx fast-path (bcm_rx_update_and_send) does not
fully close the TOCTOU race and introduces latency for every CAN frame.
Conversely, calling hrtimer_cancel() directly inside the RCU callback
(softirq context) is fatal as hrtimer_cancel() can sleep, triggering
a "scheduling while atomic" panic.

Resolve this by deferring the timer cancellation and memory free to a
dedicated unbound workqueue (bcm_wq).  The RCU callback now queues a
work item to bcm_wq, which safely cancels both timers and deallocates
memory in sleepable process context.  A dedicated workqueue is used to
prevent system-wide WQ saturation and is cleanly flushed/destroyed
on module unload to avoid rmmod page faults.

Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()")
Signed-off-by: Lee Jones <lee@kernel.org>
---
v1 => v2: New patch using workqueues
v2 => v3: Add memory barrier

 net/can/bcm.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index a4bef2c48a55..c49b09f3229f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -58,6 +58,7 @@
 #include <linux/can/skb.h>
 #include <linux/can/bcm.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <net/can.h>
 #include <net/sock.h>
@@ -92,6 +93,8 @@ MODULE_ALIAS("can-proto-2");
 
 #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex)
 
+static struct workqueue_struct *bcm_wq;
+
 /*
  * easy access to the first 64 bit of can(fd)_frame payload. cp->data is
  * 64 bit aligned so the offset has to be multiples of 8 which is ensured
@@ -105,6 +108,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
 struct bcm_op {
 	struct list_head list;
 	struct rcu_head rcu;
+	struct work_struct work;
 	int ifindex;
 	canid_t can_id;
 	u32 flags;
@@ -793,9 +797,12 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
 	return NULL;
 }
 
-static void bcm_free_op_rcu(struct rcu_head *rcu_head)
+static void bcm_free_op_work(struct work_struct *work)
 {
-	struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
+	struct bcm_op *op = container_of(work, struct bcm_op, work);
+
+	hrtimer_cancel(&op->timer);
+	hrtimer_cancel(&op->thrtimer);
 
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
@@ -806,6 +813,14 @@ static void bcm_free_op_rcu(struct rcu_head *rcu_head)
 	kfree(op);
 }
 
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
+{
+	struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
+
+	INIT_WORK(&op->work, bcm_free_op_work);
+	queue_work(bcm_wq, &op->work);
+}
+
 static void bcm_remove_op(struct bcm_op *op)
 {
 	hrtimer_cancel(&op->timer);
@@ -1839,11 +1854,15 @@ static int __init bcm_module_init(void)
 {
 	int err;
 
+	bcm_wq = alloc_workqueue("can-bcm-wq", WQ_UNBOUND, 0);
+	if (!bcm_wq)
+		return -ENOMEM;
+
 	pr_info("can: broadcast manager protocol\n");
 
 	err = register_pernet_subsys(&canbcm_pernet_ops);
 	if (err)
-		return err;
+		goto register_pernet_failed;
 
 	err = register_netdevice_notifier(&canbcm_notifier);
 	if (err)
@@ -1861,6 +1880,8 @@ static int __init bcm_module_init(void)
 	unregister_netdevice_notifier(&canbcm_notifier);
 register_notifier_failed:
 	unregister_pernet_subsys(&canbcm_pernet_ops);
+register_pernet_failed:
+	destroy_workqueue(bcm_wq);
 	return err;
 }
 
@@ -1869,6 +1890,8 @@ static void __exit bcm_module_exit(void)
 	can_proto_unregister(&bcm_can_proto);
 	unregister_netdevice_notifier(&canbcm_notifier);
 	unregister_pernet_subsys(&canbcm_pernet_ops);
+	rcu_barrier();
+	destroy_workqueue(bcm_wq);
 }
 
 module_init(bcm_module_init);
-- 
2.54.0.746.g67dd491aae-goog


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v3 1/1] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF
  2026-05-26 16:52 [PATCH v3 1/1] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF Lee Jones
@ 2026-06-03 12:33 ` Oliver Hartkopp
  0 siblings, 0 replies; 2+ messages in thread
From: Oliver Hartkopp @ 2026-06-03 12:33 UTC (permalink / raw)
  To: Lee Jones, Marc Kleine-Budde, linux-can, linux-kernel



On 26.05.26 18:52, Lee Jones wrote:
> Commit f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly
> synchronize_rcu()") replaced synchronize_rcu() in bcm_delete_rx_op()
> with call_rcu() and introduced the RX_NO_AUTOTIMER flag.
> 
> However, this flag check was omitted for thrtimer in the packet rx
> fast-path. During BCM RX operation teardown, a concurrent RCU reader
> (bcm_rx_handler) can race and re-arm thrtimer via
> bcm_rx_update_and_send() after call_rcu() has been scheduled.  Once
> the RCU grace period elapses, bcm_op is freed.  The subsequently
> firing thrtimer then dereferences the deallocated op, causing a UAF.
> 
> Adding flag checks to the rx fast-path (bcm_rx_update_and_send) does not
> fully close the TOCTOU race and introduces latency for every CAN frame.
> Conversely, calling hrtimer_cancel() directly inside the RCU callback
> (softirq context) is fatal as hrtimer_cancel() can sleep, triggering
> a "scheduling while atomic" panic.
> 
> Resolve this by deferring the timer cancellation and memory free to a
> dedicated unbound workqueue (bcm_wq).  The RCU callback now queues a
> work item to bcm_wq, which safely cancels both timers and deallocates
> memory in sleepable process context.  A dedicated workqueue is used to
> prevent system-wide WQ saturation and is cleanly flushed/destroyed
> on module unload to avoid rmmod page faults.
> 
> Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()")
> Signed-off-by: Lee Jones <lee@kernel.org>

Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>

I was not able to trigger the problem with an unpatched kernel for two 
days now - and this patch does not seem to make it worse.

Btw. @Marc: Do you plan to send some updates for the net tree for rc7?

If so, the two KCSAN fixes

https://lore.kernel.org/linux-can/20260522180758.51128-1-socketcan@hartkopp.net/T/#t

should be considered too.

Best regards,
Oliver


> ---
> v1 => v2: New patch using workqueues
> v2 => v3: Add memory barrier
> 
>   net/can/bcm.c | 29 ++++++++++++++++++++++++++---
>   1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/net/can/bcm.c b/net/can/bcm.c
> index a4bef2c48a55..c49b09f3229f 100644
> --- a/net/can/bcm.c
> +++ b/net/can/bcm.c
> @@ -58,6 +58,7 @@
>   #include <linux/can/skb.h>
>   #include <linux/can/bcm.h>
>   #include <linux/slab.h>
> +#include <linux/workqueue.h>
>   #include <linux/spinlock.h>
>   #include <net/can.h>
>   #include <net/sock.h>
> @@ -92,6 +93,8 @@ MODULE_ALIAS("can-proto-2");
>   
>   #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex)
>   
> +static struct workqueue_struct *bcm_wq;
> +
>   /*
>    * easy access to the first 64 bit of can(fd)_frame payload. cp->data is
>    * 64 bit aligned so the offset has to be multiples of 8 which is ensured
> @@ -105,6 +108,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
>   struct bcm_op {
>   	struct list_head list;
>   	struct rcu_head rcu;
> +	struct work_struct work;
>   	int ifindex;
>   	canid_t can_id;
>   	u32 flags;
> @@ -793,9 +797,12 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
>   	return NULL;
>   }
>   
> -static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> +static void bcm_free_op_work(struct work_struct *work)
>   {
> -	struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
> +	struct bcm_op *op = container_of(work, struct bcm_op, work);
> +
> +	hrtimer_cancel(&op->timer);
> +	hrtimer_cancel(&op->thrtimer);
>   
>   	if ((op->frames) && (op->frames != &op->sframe))
>   		kfree(op->frames);
> @@ -806,6 +813,14 @@ static void bcm_free_op_rcu(struct rcu_head *rcu_head)
>   	kfree(op);
>   }
>   
> +static void bcm_free_op_rcu(struct rcu_head *rcu_head)
> +{
> +	struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
> +
> +	INIT_WORK(&op->work, bcm_free_op_work);
> +	queue_work(bcm_wq, &op->work);
> +}
> +
>   static void bcm_remove_op(struct bcm_op *op)
>   {
>   	hrtimer_cancel(&op->timer);
> @@ -1839,11 +1854,15 @@ static int __init bcm_module_init(void)
>   {
>   	int err;
>   
> +	bcm_wq = alloc_workqueue("can-bcm-wq", WQ_UNBOUND, 0);
> +	if (!bcm_wq)
> +		return -ENOMEM;
> +
>   	pr_info("can: broadcast manager protocol\n");
>   
>   	err = register_pernet_subsys(&canbcm_pernet_ops);
>   	if (err)
> -		return err;
> +		goto register_pernet_failed;
>   
>   	err = register_netdevice_notifier(&canbcm_notifier);
>   	if (err)
> @@ -1861,6 +1880,8 @@ static int __init bcm_module_init(void)
>   	unregister_netdevice_notifier(&canbcm_notifier);
>   register_notifier_failed:
>   	unregister_pernet_subsys(&canbcm_pernet_ops);
> +register_pernet_failed:
> +	destroy_workqueue(bcm_wq);
>   	return err;
>   }
>   
> @@ -1869,6 +1890,8 @@ static void __exit bcm_module_exit(void)
>   	can_proto_unregister(&bcm_can_proto);
>   	unregister_netdevice_notifier(&canbcm_notifier);
>   	unregister_pernet_subsys(&canbcm_pernet_ops);
> +	rcu_barrier();
> +	destroy_workqueue(bcm_wq);
>   }
>   
>   module_init(bcm_module_init);


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-03 12:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-26 16:52 [PATCH v3 1/1] can: bcm: defer rx_op deallocation to workqueue to fix thrtimer UAF Lee Jones
2026-06-03 12:33 ` Oliver Hartkopp

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.