* [PATCH 1/2] timer: add add_timer_active_cpu()
2026-04-23 9:19 [PATCH 0/2] timers/workqueue: Add support for active CPU Partha Satapathy
@ 2026-04-23 9:19 ` Partha Satapathy
2026-04-23 9:19 ` [PATCH 2/2] workqueue: add queue_delayed_work_active_cpu() Partha Satapathy
2026-04-23 12:05 ` [PATCH 0/2] timers/workqueue: Add support for active CPU Frederic Weisbecker
2 siblings, 0 replies; 4+ messages in thread
From: Partha Satapathy @ 2026-04-23 9:19 UTC (permalink / raw)
To: partha.satapathy, anna-maria, frederic, tglx, linux-kernel, tj,
jiangshanlai
Cc: notify
From: Partha Sarathi Satapathy <partha.satapathy@oracle.com>
add_timer_on() can queue a timer on a CPU that goes offline before the
timer is enqueued. When that happens, the timer remains unserviced until
the CPU comes back online.
Callers can try to avoid that by checking CPU state themselves, but that
does not close the race with CPU hotplug. Taking the hotplug lock around
every enqueue is also too expensive for users that only want CPU-local
placement as a performance hint.
Add add_timer_active_cpu() for callers that want to queue a timer on a
specific CPU when that CPU is active, but otherwise fall back to an
active CPU. Implement this by teaching the enqueue path to verify that
the target timer base is active and, if not, requeue the timer on the
current CPU.
Leave add_timer_on() semantics unchanged for callers that require strict
CPU placement.
Signed-off-by: Partha Sarathi Satapathy <partha.satapathy@oracle.com>
---
include/linux/timer.h | 1 +
kernel/time/timer.c | 45 ++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 62e1cea71125..8c771b367662 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -148,6 +148,7 @@ static inline int timer_pending(const struct timer_list * timer)
}
extern void add_timer_on(struct timer_list *timer, int cpu);
+extern void add_timer_active_cpu(struct timer_list *timer, int cpu);
extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int timer_reduce(struct timer_list *timer, unsigned long expires);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 1f2364126894..c73a28701a31 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -260,6 +260,7 @@ struct timer_base {
bool next_expiry_recalc;
bool is_idle;
bool timers_pending;
+ bool is_active;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -1296,7 +1297,7 @@ EXPORT_SYMBOL(add_timer_global);
*
* See add_timer() for further details.
*/
-void add_timer_on(struct timer_list *timer, int cpu)
+static void __add_timer_on(struct timer_list *timer, int cpu, bool need_ol_cpu)
{
struct timer_base *new_base, *base;
unsigned long flags;
@@ -1333,6 +1334,18 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+#ifdef CONFIG_HOTPLUG_CPU
+ if (need_ol_cpu) {
+ if (!base->is_active) {
+ raw_spin_unlock(&base->lock);
+ base = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
+ raw_spin_lock(&base->lock);
+ cpu = smp_processor_id();
+ WRITE_ONCE(timer->flags,
+ (timer->flags & ~TIMER_BASEMASK) | cpu);
+ }
+ }
+#endif /* CONFIG_HOTPLUG_CPU */
forward_timer_base(base);
debug_timer_activate(timer);
@@ -1340,8 +1353,31 @@ void add_timer_on(struct timer_list *timer, int cpu)
out_unlock:
raw_spin_unlock_irqrestore(&base->lock, flags);
}
+
+void add_timer_on(struct timer_list *timer, int cpu)
+{
+ bool need_ol_cpu = false;
+
+ __add_timer_on(timer, cpu, need_ol_cpu);
+}
EXPORT_SYMBOL_GPL(add_timer_on);
+/**
+ * add_timer_active_cpu - Start a timer on a particular CPU if online or current
+ * @timer: The timer to be started
+ * @cpu: The CPU to start it on
+ *
+ * This is like add_timer_on(), except that it queues the timer on the
+ * given CPU only when that CPU is online or on the current CPU.
+ */
+void add_timer_active_cpu(struct timer_list *timer, int cpu)
+{
+ bool need_ol_cpu = true;
+
+ __add_timer_on(timer, cpu, need_ol_cpu);
+}
+EXPORT_SYMBOL_GPL(add_timer_active_cpu);
+
/**
* __timer_delete - Internal function: Deactivate a timer
* @timer: The timer to be deactivated
@@ -2507,6 +2543,7 @@ int timers_prepare_cpu(unsigned int cpu)
base->next_expiry_recalc = false;
base->timers_pending = false;
base->is_idle = false;
+ base->is_active = true;
}
return 0;
}
@@ -2535,6 +2572,11 @@ int timers_dead_cpu(unsigned int cpu)
WARN_ON_ONCE(old_base->running_timer);
old_base->running_timer = NULL;
+ /*
+ * Make the dead CPU base unavailable to add_timer_on()
+ * when the caller wants an active target CPU.
+ */
+ old_base->is_active = false;
for (i = 0; i < WHEEL_SIZE; i++)
migrate_timer_list(new_base, old_base->vectors + i);
@@ -2559,6 +2601,7 @@ static void __init init_timer_cpu(int cpu)
raw_spin_lock_init(&base->lock);
base->clk = jiffies;
base->next_expiry = base->clk + TIMER_NEXT_MAX_DELTA;
+ base->is_active = true;
timer_base_init_expiry_lock(base);
}
}
--
2.43.7
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 2/2] workqueue: add queue_delayed_work_active_cpu()
2026-04-23 9:19 [PATCH 0/2] timers/workqueue: Add support for active CPU Partha Satapathy
2026-04-23 9:19 ` [PATCH 1/2] timer: add add_timer_active_cpu() Partha Satapathy
@ 2026-04-23 9:19 ` Partha Satapathy
2026-04-23 12:05 ` [PATCH 0/2] timers/workqueue: Add support for active CPU Frederic Weisbecker
2 siblings, 0 replies; 4+ messages in thread
From: Partha Satapathy @ 2026-04-23 9:19 UTC (permalink / raw)
To: partha.satapathy, anna-maria, frederic, tglx, linux-kernel, tj,
jiangshanlai
Cc: notify
From: Partha Sarathi Satapathy <partha.satapathy@oracle.com>
Delayed work queued with queue_delayed_work_on() inherits the same CPU
hotplug race as add_timer_on(): the backing timer can be enqueued on a
CPU that goes offline before the queueing completes, leaving the timer
unserviced until that CPU comes back online. As a result, the delayed
work item is never queued for execution.
Add queue_delayed_work_active_cpu() for callers that want delayed work
to target a specific CPU when that CPU is active, but fall back to an
active CPU otherwise.
For the delayed timer path, use add_timer_active_cpu() instead of
add_timer_on(). After enqueueing the timer, update dwork->cpu to match
the CPU recorded in the timer so that the work item is queued on the CPU
actually selected for the timer.
Leave queue_delayed_work_on() unchanged for callers that require the
existing strict behavior.
Signed-off-by: Partha Sarathi Satapathy <partha.satapathy@oracle.com>
---
include/linux/workqueue.h | 3 ++
kernel/workqueue.c | 96 +++++++++++++++++++++++++++++++++------
2 files changed, 86 insertions(+), 13 deletions(-)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index dabc351cc127..9799e44bca2b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -596,6 +596,9 @@ extern bool queue_work_node(int node, struct workqueue_struct *wq,
struct work_struct *work);
extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
+extern bool queue_delayed_work_active_cpu(int cpu, struct workqueue_struct *wq,
+ struct delayed_work *work,
+ unsigned long delay);
extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay);
extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 253311af47c6..80f5d162624d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2483,7 +2483,8 @@ void delayed_work_timer_fn(struct timer_list *t)
EXPORT_SYMBOL(delayed_work_timer_fn);
static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
- struct delayed_work *dwork, unsigned long delay)
+ struct delayed_work *dwork, unsigned long delay,
+ bool dwork_active_cpu)
{
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
@@ -2504,7 +2505,8 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
return;
}
- WARN_ON_ONCE(cpu != WORK_CPU_UNBOUND && !cpu_online(cpu));
+ WARN_ON_ONCE(!dwork_active_cpu && cpu != WORK_CPU_UNBOUND &&
+ !cpu_online(cpu));
dwork->wq = wq;
dwork->cpu = cpu;
timer->expires = jiffies + delay;
@@ -2516,10 +2518,17 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
add_timer_on(timer, cpu);
} else {
- if (likely(cpu == WORK_CPU_UNBOUND))
+ if (likely(cpu == WORK_CPU_UNBOUND)) {
add_timer_global(timer);
- else
+ } else if (dwork_active_cpu) {
+ add_timer_active_cpu(timer, cpu);
+ /* The target CPU may change if it is offline. */
+ /* Avoid selecting the same offline CPU. */
+ dwork->cpu = READ_ONCE(timer->flags) &
+ TIMER_CPUMASK;
+ } else {
add_timer_on(timer, cpu);
+ }
}
}
@@ -2530,18 +2539,23 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
* @dwork: work to queue
* @delay: number of jiffies to wait before queueing
*
- * We queue the delayed_work to a specific CPU, for non-zero delays the
- * caller must ensure it is online and can't go away. Callers that fail
- * to ensure this, may get @dwork->timer queued to an offlined CPU and
- * this will prevent queueing of @dwork->work unless the offlined CPU
- * becomes online again.
+ * We queue the delayed_work to a specific CPU, for non-zero delays and
+ * dwork_active_cpu is not set, caller must ensure it is online and can't
+ * go away. Callers that fail to ensure this, may get @dwork->timer
+ * queued to an offlined CPU and this will prevent queueing of
+ * @dwork->work unless the offlined CPU becomes online again.
+ *
+ * If dwork_active_cpu is set, timer queued to an offlined CPU will be
+ * queued to the current cpu.
*
* Return: %false if @work was already on a queue, %true otherwise. If
* @delay is zero and @dwork is idle, it will be scheduled for immediate
* execution.
*/
-bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
- struct delayed_work *dwork, unsigned long delay)
+static inline bool
+__queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+ struct delayed_work *dwork, unsigned long delay,
+ bool dwork_active_cpu)
{
struct work_struct *work = &dwork->work;
bool ret = false;
@@ -2552,15 +2566,70 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
!clear_pending_if_disabled(work)) {
- __queue_delayed_work(cpu, wq, dwork, delay);
+ __queue_delayed_work(cpu, wq, dwork, delay, dwork_active_cpu);
ret = true;
}
local_irq_restore(irq_flags);
return ret;
}
+
+/**
+ * queue_delayed_work_on - queue work on specific CPU after delay
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * We queue the delayed_work to a specific CPU, for non-zero delays the
+ * caller must ensure it is online and can't go away. Callers that fail
+ * to ensure this, may get @dwork->timer queued to an offlined CPU and
+ * this will prevent queueing of @dwork->work unless the offlined CPU
+ * becomes online again.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise. If
+ * @delay is zero and @dwork is idle, it will be scheduled for immediate
+ * execution.
+ */
+bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+ struct delayed_work *dwork, unsigned long delay)
+{
+ bool ret = false;
+ bool dwork_active_cpu = false;
+
+ ret = __queue_delayed_work_on(cpu, wq, dwork, delay, dwork_active_cpu);
+ return ret;
+}
EXPORT_SYMBOL(queue_delayed_work_on);
+/**
+ * queue_delayed_work_active_cpu - queue delayed work for an active CPU
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * This is like queue_delayed_work_on(), except that for delayed work
+ * the timer is queued on @cpu only if that CPU is online or is the
+ * current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise. If
+ * @delay is zero and @dwork is idle, it will be scheduled for immediate
+ * execution.
+ *
+ */
+bool queue_delayed_work_active_cpu(int cpu, struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ bool ret = false;
+ bool dwork_active_cpu = true;
+
+ ret = __queue_delayed_work_on(cpu, wq, dwork, delay, dwork_active_cpu);
+ return ret;
+}
+EXPORT_SYMBOL(queue_delayed_work_active_cpu);
+
/**
* mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
* @cpu: CPU number to execute work on
@@ -2584,11 +2653,12 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
{
unsigned long irq_flags;
bool ret;
+ bool dwork_active_cpu = false;
ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags);
if (!clear_pending_if_disabled(&dwork->work))
- __queue_delayed_work(cpu, wq, dwork, delay);
+ __queue_delayed_work(cpu, wq, dwork, delay, dwork_active_cpu);
local_irq_restore(irq_flags);
return ret;
--
2.43.7
^ permalink raw reply related [flat|nested] 4+ messages in thread