* [PATCH v3] hrtimers: calculate expires_next after all timers are executed
@ 2015-01-20 12:45 Stanislav Fomichev
2015-01-20 23:06 ` Thomas Gleixner
0 siblings, 1 reply; 3+ messages in thread
From: Stanislav Fomichev @ 2015-01-20 12:45 UTC (permalink / raw)
To: tglx, linux-kernel
Cc: john.stultz, viresh.kumar, fweisbec, mingo, cl, stuart.w.hayes
Hi Thomas,
There was a problem awhile ago regarding hrtimer interrupt missing tick
reprogramming: https://lkml.org/lkml/2014/3/17/323
This is the last version of the patch rebased on top of 3.19-rc5 and
lightly tested. Could you please apply it?
---
I think I'm hitting particularly subtle issue with NOHZ_IDLE kernel.
The sequence is as follows:
- CPU enters idle, we disable tick
- hrtimer interrupt fires (for hrtimer_wakeup)
- for clock base #1 (REALTIME) we wake up SCHED_RT thread and
start RT period timer (from start_rt_bandwidth) for clock base #0
(MONOTONIC)
- because we already checked expiry time for clock base #0
we end up programming wrong wake up time (minutes, from
tick_sched_timer)
- then, we exit idle loop and restart tick;
but because tick_sched_timer is not leftmost (the leftmost one
is RT period timer) we don't program it
So in the end, there is working CPU without tick interrupt enabled.
This eventually leads to RCU stall on that CPU: rcu_gp_kthread
is not woken up because there is no tick.
This fix runs expired timers first and only then tries to find
next expiry time for all clocks.
Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
include/linux/hrtimer.h | 2 +
kernel/time/hrtimer.c | 162 ++++++++++++++++++++++++++----------------------
2 files changed, 90 insertions(+), 74 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index a036d058a249..f23ee6edd2e7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -141,6 +141,7 @@ struct hrtimer_sleeper {
* @get_time: function to retrieve the current time of the clock
* @softirq_time: the time when running the hrtimer queue in the softirq
* @offset: offset of this clock to the monotonic base
+ * @expires_next: absolute time of the next event on this clock base
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base;
@@ -151,6 +152,7 @@ struct hrtimer_clock_base {
ktime_t (*get_time)(void);
ktime_t softirq_time;
ktime_t offset;
+ ktime_t expires_next;
};
enum hrtimer_base_type {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 37e50aadd471..63247b2ac72b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -440,6 +440,36 @@ static inline void debug_deactivate(struct hrtimer *timer)
trace_hrtimer_cancel(timer);
}
+/*
+ * Find the next expiring timer and return the expiry in absolute
+ * CLOCK_MONOTONIC time.
+ */
+static ktime_t hrtimer_get_expires_next(struct hrtimer_cpu_base *cpu_base)
+{
+ int i;
+ ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
+ struct hrtimer_clock_base *base;
+
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ if (!(cpu_base->active_bases & (1 << i)))
+ continue;
+
+ base = cpu_base->clock_base + i;
+ /* Adjust to CLOCK_MONOTONIC */
+ expires = ktime_sub(base->expires_next, base->offset);
+
+ if (expires.tv64 < expires_next.tv64)
+ expires_next = expires;
+ }
+ /*
+ * If clock_was_set() has changed base->offset the result
+ * might be negative. Fix it up to prevent a false positive in
+ * clockevents_program_event()
+ */
+ expires.tv64 = 0;
+ return expires_next.tv64 < 0 ? expires : expires_next;
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -488,32 +518,7 @@ static inline int hrtimer_hres_active(void)
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
- int i;
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t expires, expires_next;
-
- expires_next.tv64 = KTIME_MAX;
-
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
- timer = container_of(next, struct hrtimer, node);
-
- expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
- /*
- * clock_was_set() has changed base->offset so the
- * result might be negative. Fix it up to prevent a
- * false positive in clockevents_program_event()
- */
- if (expires.tv64 < 0)
- expires.tv64 = 0;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
- }
+ ktime_t expires_next = hrtimer_get_expires_next(cpu_base);
if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
return;
@@ -830,6 +835,8 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
static int enqueue_hrtimer(struct hrtimer *timer,
struct hrtimer_clock_base *base)
{
+ int leftmost;
+
debug_activate(timer);
timerqueue_add(&base->active, &timer->node);
@@ -841,7 +848,13 @@ static int enqueue_hrtimer(struct hrtimer *timer,
*/
timer->state |= HRTIMER_STATE_ENQUEUED;
- return (&timer->node == base->active.next);
+ leftmost = &timer->node == base->active.next;
+ /*
+ * If the new timer is the leftmost, update clock_base->expires_next.
+ */
+ if (leftmost)
+ base->expires_next = hrtimer_get_expires(timer);
+ return leftmost;
}
/*
@@ -858,27 +871,45 @@ static void __remove_hrtimer(struct hrtimer *timer,
struct hrtimer_clock_base *base,
unsigned long newstate, int reprogram)
{
- struct timerqueue_node *next_timer;
+ struct timerqueue_node *next;
+ struct hrtimer *next_timer;
+ bool first;
+
if (!(timer->state & HRTIMER_STATE_ENQUEUED))
goto out;
- next_timer = timerqueue_getnext(&base->active);
+ /*
+ * If this is not the first timer of the clock base, we just
+ * remove it. No updates, no reprogramming.
+ */
+ first = timerqueue_getnext(&base->active) == &timer->node;
timerqueue_del(&base->active, &timer->node);
- if (&timer->node == next_timer) {
+ if (!first)
+ goto out;
+
+ /*
+ * Update cpu base and clock base. This needs to be done
+ * before calling into hrtimer_force_reprogram() as that
+ * relies on active_bases and expires_next.
+ */
+ next = timerqueue_getnext(&base->active);
+ if (!next) {
+ base->cpu_base->active_bases &= ~(1 << base->index);
+ base->expires_next.tv64 = KTIME_MAX;
+ } else {
+ next_timer = container_of(next, struct hrtimer, node);
+ base->expires_next = hrtimer_get_expires(next_timer);
+ }
+
#ifdef CONFIG_HIGH_RES_TIMERS
- /* Reprogram the clock event device. if enabled */
- if (reprogram && hrtimer_hres_active()) {
- ktime_t expires;
-
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (base->cpu_base->expires_next.tv64 == expires.tv64)
- hrtimer_force_reprogram(base->cpu_base, 1);
- }
-#endif
+ if (reprogram && hrtimer_hres_active()) {
+ ktime_t expires;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+ if (base->cpu_base->expires_next.tv64 == expires.tv64)
+ hrtimer_force_reprogram(base->cpu_base, 1);
}
- if (!timerqueue_getnext(&base->active))
- base->cpu_base->active_bases &= ~(1 << base->index);
+#endif
out:
timer->state = newstate;
}
@@ -1104,35 +1135,21 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
ktime_t hrtimer_get_next_event(void)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+ ktime_t next, delta = { .tv64 = KTIME_MAX };
unsigned long flags;
- int i;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (!hrtimer_hres_active()) {
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
-
- timer = container_of(next, struct hrtimer, node);
- delta.tv64 = hrtimer_get_expires_tv64(timer);
- delta = ktime_sub(delta, base->get_time());
- if (delta.tv64 < mindelta.tv64)
- mindelta.tv64 = delta.tv64;
- }
+ next = hrtimer_get_expires_next(cpu_base);
+ delta = ktime_sub(next, ktime_get());
+ if (delta.tv64 < 0)
+ delta.tv64 = 0;
}
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
- if (mindelta.tv64 < 0)
- mindelta.tv64 = 0;
- return mindelta;
+ return delta;
}
#endif
@@ -1242,6 +1259,7 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
*/
void hrtimer_interrupt(struct clock_event_device *dev)
{
+ struct hrtimer_clock_base *base;
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
ktime_t expires_next, now, entry_time, delta;
int i, retries = 0;
@@ -1253,7 +1271,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
raw_spin_lock(&cpu_base->lock);
entry_time = now = hrtimer_update_base(cpu_base);
retry:
- expires_next.tv64 = KTIME_MAX;
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
@@ -1264,7 +1281,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
cpu_base->expires_next.tv64 = KTIME_MAX;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- struct hrtimer_clock_base *base;
struct timerqueue_node *node;
ktime_t basenow;
@@ -1292,23 +1308,20 @@ void hrtimer_interrupt(struct clock_event_device *dev)
* timer will have to trigger a wakeup anyway.
*/
- if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
- ktime_t expires;
-
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (expires.tv64 < 0)
- expires.tv64 = KTIME_MAX;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
+ if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
break;
- }
__run_hrtimer(timer, &basenow);
}
}
/*
+ * We might have dropped cpu_base->lock and the callbacks
+ * might have added timers. Find the timer which expires first.
+ */
+ expires_next = hrtimer_get_expires_next(cpu_base);
+
+ /*
* Store the new expiry value so the migration code can verify
* against it.
*/
@@ -1628,6 +1641,7 @@ static void init_hrtimers_cpu(int cpu)
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
cpu_base->clock_base[i].cpu_base = cpu_base;
timerqueue_init_head(&cpu_base->clock_base[i].active);
+ cpu_base->clock_base[i].expires_next.tv64 = KTIME_MAX;
}
cpu_base->cpu = cpu;
--
2.1.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v3] hrtimers: calculate expires_next after all timers are executed
2015-01-20 12:45 [PATCH v3] hrtimers: calculate expires_next after all timers are executed Stanislav Fomichev
@ 2015-01-20 23:06 ` Thomas Gleixner
2015-01-23 11:21 ` [tip:timers/core] hrtimer: Prevent stale expiry time in hrtimer_interrupt() tip-bot for Thomas Gleixner
0 siblings, 1 reply; 3+ messages in thread
From: Thomas Gleixner @ 2015-01-20 23:06 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: linux-kernel, john.stultz, viresh.kumar, fweisbec, mingo, cl,
stuart.w.hayes
On Tue, 20 Jan 2015, Stanislav Fomichev wrote:
> I think I'm hitting particularly subtle issue with NOHZ_IDLE kernel.
>
> The sequence is as follows:
> - CPU enters idle, we disable tick
> - hrtimer interrupt fires (for hrtimer_wakeup)
> - for clock base #1 (REALTIME) we wake up SCHED_RT thread and
> start RT period timer (from start_rt_bandwidth) for clock base #0
> (MONOTONIC)
> - because we already checked expiry time for clock base #0
> we end up programming wrong wake up time (minutes, from
> tick_sched_timer)
> - then, we exit idle loop and restart tick;
> but because tick_sched_timer is not leftmost (the leftmost one
> is RT period timer) we don't program it
>
> So in the end, there is working CPU without tick interrupt enabled.
> This eventually leads to RCU stall on that CPU: rcu_gp_kthread
> is not woken up because there is no tick.
>
> This fix runs expired timers first and only then tries to find
> next expiry time for all clocks.
I have a few issues with that patch:
1) The extra storage and conditionals in enqueue()
Why do we need the extra clock base storage for the expiry per
clock base if we evaluate all clockbases anyway?
Getting it from the timerqueue is not that much slower than reading
it from clock_base especially as we have to take the offset into
account in any case.
2) We have another problem in that code:
We hold off timer migration by setting cpu_base->expires_next to
KTIME_MAX while we are in hrtimer_interrupt(). Aside of that we
really want to avoid touching the hardware while we are running
hrtimer_interrupt() simply becasue hrtimer_interrupt() is going to
repogramm the hardware anyway.
Now if a timer gets armed from a callback, like in the case you
describe above we end up changing cpu_base->expires_next and
programming the hardware, which of course gets overwritten at the
end of hrtimer_interrupt().
So that's inconsistent behaviour and just works by chance. With
your patch applied we paper over this issue, but we really want to
solve it proper.
We could allow that behaviour with the explicit reevaluation of the
next expiry time, but if we decide to do so then this wants to be
documented and a seperate patch.
Does the patch below fix your problem as well?
Thanks,
tglx
----------------->
Subject: hrtimer: Prevent stale expiry time in hrtimer_interrupt()
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jan 2015 21:24:10 +0100
hrtimer_interrupt() has the following subtle issue:
hrtimer_interrupt()
lock(cpu_base);
expires_next = KTIME_MAX;
expire_timers(CLOCK_MONOTONIC);
expires = get_next_timer(CLOCK_MONOTONIC);
if (expires < expires_next)
expires_next = expires;
expire_timers(CLOCK_REALTIME);
unlock(cpu_base);
wakeup()
hrtimer_start(CLOCK_MONOTONIC, newtimer);
lock(cpu_base();
expires = get_next_timer(CLOCK_REALTIME);
if (expires < expires_next)
expires_next = expires;
So because we already evaluated the next expiring timer of
CLOCK_MONOTONIC we ignore that the expiry time of newtimer might be
earlier than the overall next expiry time in hrtimer_interrupt().
To solve this, remove the caching of the next expiry value from
hrtimer_interrupt() and reevaluate all active clock bases for the next
expiry value. To avoid another code duplication, create a shared
evaluation function and use it for hrtimer_get_next_event(),
hrtimer_force_reprogram() and hrtimer_interrupt().
There is another subtlety in this mechanism:
While hrtimer_interrupt() is running, we want to avoid to touch the
hardware device because we will reprogram it anyway at the end of
hrtimer_interrupt(). This works nicely for hrtimers which get rearmed
via the HRTIMER_RESTART mechanism, because we drop out when the
callback on that CPU is running. But that fails, if a new timer gets
enqueued like in the example above.
This has another implication: While hrtimer_interrupt() is running we
refuse remote enqueueing of timers - see hrtimer_interrupt() and
hrtimer_check_target().
hrtimer_interrupt() tries to prevent this by setting cpu_base->expires
to KTIME_MAX, but that fails if a new timer gets queued.
Prevent both the hardware access and the remote enqueue
explicitely. We can loosen the restriction on the remote enqueue now
due to reevaluation of the next expiry value, but that needs a
seperate patch.
Reported-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Based-on-patch-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hrtimer.h | 2
kernel/time/hrtimer.c | 107 +++++++++++++++++++++---------------------------
2 files changed, 51 insertions(+), 58 deletions(-)
Index: tip/include/linux/hrtimer.h
===================================================================
--- tip.orig/include/linux/hrtimer.h
+++ tip/include/linux/hrtimer.h
@@ -170,6 +170,7 @@ enum hrtimer_base_type {
* @clock_was_set: Indicates that clock was set from irq context.
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
+ * @in_hrtirq: hrtimer_interrupt() is currently executing
* @hres_active: State of high resolution mode
* @hang_detected: The last hrtimer interrupt detected a hang
* @nr_events: Total number of hrtimer interrupt events
@@ -185,6 +186,7 @@ struct hrtimer_cpu_base {
unsigned int clock_was_set;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
+ int in_hrtirq;
int hres_active;
int hang_detected;
unsigned long nr_events;
Index: tip/kernel/time/hrtimer.c
===================================================================
--- tip.orig/kernel/time/hrtimer.c
+++ tip/kernel/time/hrtimer.c
@@ -440,6 +440,37 @@ static inline void debug_deactivate(stru
trace_hrtimer_cancel(timer);
}
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+ struct hrtimer_clock_base *base = cpu_base->clock_base;
+ ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
+ int i;
+
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+ struct timerqueue_node *next;
+ struct hrtimer *timer;
+
+ next = timerqueue_getnext(&base->active);
+ if (!next)
+ continue;
+
+ timer = container_of(next, struct hrtimer, node);
+ expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+ if (expires.tv64 < expires_next.tv64)
+ expires_next = expires;
+ }
+ /*
+ * clock_was_set() might have changed base->offset of any of
+ * the clock bases so the result might be negative. Fix it up
+ * to prevent a false positive in clockevents_program_event().
+ */
+ if (expires_next.tv64 < 0)
+ expires_next.tv64 = 0;
+ return expires_next;
+}
+#endif
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -488,32 +519,7 @@ static inline int hrtimer_hres_active(vo
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
- int i;
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t expires, expires_next;
-
- expires_next.tv64 = KTIME_MAX;
-
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
- timer = container_of(next, struct hrtimer, node);
-
- expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
- /*
- * clock_was_set() has changed base->offset so the
- * result might be negative. Fix it up to prevent a
- * false positive in clockevents_program_event()
- */
- if (expires.tv64 < 0)
- expires.tv64 = 0;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
- }
+ ktime_t expires_next = __hrtimer_get_next_event(cpu_base);
if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
return;
@@ -587,6 +593,15 @@ static int hrtimer_reprogram(struct hrti
return 0;
/*
+ * When the target cpu of the timer is currently executing
+ * hrtimer_interrupt(), then we do not touch the clock event
+ * device. hrtimer_interrupt() will reevaluate all clock bases
+ * before reprogramming the device.
+ */
+ if (cpu_base->in_hrtirq)
+ return 0;
+
+ /*
* If a hang was detected in the last timer interrupt then we
* do not schedule a timer which is earlier than the expiry
* which we enforced in the hang detection. We want the system
@@ -1104,29 +1119,14 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining)
ktime_t hrtimer_get_next_event(void)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+ ktime_t mindelta = { .tv64 = KTIME_MAX };
unsigned long flags;
- int i;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (!hrtimer_hres_active()) {
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
-
- timer = container_of(next, struct hrtimer, node);
- delta.tv64 = hrtimer_get_expires_tv64(timer);
- delta = ktime_sub(delta, base->get_time());
- if (delta.tv64 < mindelta.tv64)
- mindelta.tv64 = delta.tv64;
- }
- }
+ if (!hrtimer_hres_active())
+ mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base),
+ ktime_get());
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -1251,9 +1251,9 @@ void hrtimer_interrupt(struct clock_even
dev->next_event.tv64 = KTIME_MAX;
raw_spin_lock(&cpu_base->lock);
+ cpu_base->in_hrtirq = 1;
entry_time = now = hrtimer_update_base(cpu_base);
retry:
- expires_next.tv64 = KTIME_MAX;
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
@@ -1291,23 +1291,14 @@ retry:
* are right-of a not yet expired timer, because that
* timer will have to trigger a wakeup anyway.
*/
-
- if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
- ktime_t expires;
-
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (expires.tv64 < 0)
- expires.tv64 = KTIME_MAX;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
+ if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
break;
- }
__run_hrtimer(timer, &basenow);
}
}
-
+ /* Reevaluate the clock bases for the next expiry */
+ expires_next = __hrtimer_get_next_event(cpu_base);
/*
* Store the new expiry value so the migration code can verify
* against it.
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tip:timers/core] hrtimer: Prevent stale expiry time in hrtimer_interrupt()
2015-01-20 23:06 ` Thomas Gleixner
@ 2015-01-23 11:21 ` tip-bot for Thomas Gleixner
0 siblings, 0 replies; 3+ messages in thread
From: tip-bot for Thomas Gleixner @ 2015-01-23 11:21 UTC (permalink / raw)
To: linux-tip-commits; +Cc: mingo, tglx, stfomichev, linux-kernel, hpa
Commit-ID: 9bc7491906b4113b4c5ae442157c7dfc4e10cd14
Gitweb: http://git.kernel.org/tip/9bc7491906b4113b4c5ae442157c7dfc4e10cd14
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 20 Jan 2015 21:24:10 +0100
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Fri, 23 Jan 2015 12:13:20 +0100
hrtimer: Prevent stale expiry time in hrtimer_interrupt()
hrtimer_interrupt() has the following subtle issue:
hrtimer_interrupt()
lock(cpu_base);
expires_next = KTIME_MAX;
expire_timers(CLOCK_MONOTONIC);
expires = get_next_timer(CLOCK_MONOTONIC);
if (expires < expires_next)
expires_next = expires;
expire_timers(CLOCK_REALTIME);
unlock(cpu_base);
wakeup()
hrtimer_start(CLOCK_MONOTONIC, newtimer);
lock(cpu_base();
expires = get_next_timer(CLOCK_REALTIME);
if (expires < expires_next)
expires_next = expires;
So because we already evaluated the next expiring timer of
CLOCK_MONOTONIC we ignore that the expiry time of newtimer might be
earlier than the overall next expiry time in hrtimer_interrupt().
To solve this, remove the caching of the next expiry value from
hrtimer_interrupt() and reevaluate all active clock bases for the next
expiry value. To avoid another code duplication, create a shared
evaluation function and use it for hrtimer_get_next_event(),
hrtimer_force_reprogram() and hrtimer_interrupt().
There is another subtlety in this mechanism:
While hrtimer_interrupt() is running, we want to avoid to touch the
hardware device because we will reprogram it anyway at the end of
hrtimer_interrupt(). This works nicely for hrtimers which get rearmed
via the HRTIMER_RESTART mechanism, because we drop out when the
callback on that CPU is running. But that fails, if a new timer gets
enqueued like in the example above.
This has another implication: While hrtimer_interrupt() is running we
refuse remote enqueueing of timers - see hrtimer_interrupt() and
hrtimer_check_target().
hrtimer_interrupt() tries to prevent this by setting cpu_base->expires
to KTIME_MAX, but that fails if a new timer gets queued.
Prevent both the hardware access and the remote enqueue
explicitely. We can loosen the restriction on the remote enqueue now
due to reevaluation of the next expiry value, but that needs a
seperate patch.
Folded in a fix from Vignesh Radhakrishnan.
Reported-and-tested-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Based-on-patch-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: vigneshr@codeaurora.org
Cc: john.stultz@linaro.org
Cc: viresh.kumar@linaro.org
Cc: fweisbec@gmail.com
Cc: cl@linux.com
Cc: stuart.w.hayes@gmail.com
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1501202049190.5526@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hrtimer.h | 2 +
kernel/time/hrtimer.c | 108 ++++++++++++++++++++++--------------------------
2 files changed, 52 insertions(+), 58 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index a036d05..05f6df1 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -170,6 +170,7 @@ enum hrtimer_base_type {
* @clock_was_set: Indicates that clock was set from irq context.
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
+ * @in_hrtirq: hrtimer_interrupt() is currently executing
* @hres_active: State of high resolution mode
* @hang_detected: The last hrtimer interrupt detected a hang
* @nr_events: Total number of hrtimer interrupt events
@@ -185,6 +186,7 @@ struct hrtimer_cpu_base {
unsigned int clock_was_set;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
+ int in_hrtirq;
int hres_active;
int hang_detected;
unsigned long nr_events;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 37e50aa..b663653 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -440,6 +440,37 @@ static inline void debug_deactivate(struct hrtimer *timer)
trace_hrtimer_cancel(timer);
}
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+ struct hrtimer_clock_base *base = cpu_base->clock_base;
+ ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
+ int i;
+
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+ struct timerqueue_node *next;
+ struct hrtimer *timer;
+
+ next = timerqueue_getnext(&base->active);
+ if (!next)
+ continue;
+
+ timer = container_of(next, struct hrtimer, node);
+ expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+ if (expires.tv64 < expires_next.tv64)
+ expires_next = expires;
+ }
+ /*
+ * clock_was_set() might have changed base->offset of any of
+ * the clock bases so the result might be negative. Fix it up
+ * to prevent a false positive in clockevents_program_event().
+ */
+ if (expires_next.tv64 < 0)
+ expires_next.tv64 = 0;
+ return expires_next;
+}
+#endif
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -488,32 +519,7 @@ static inline int hrtimer_hres_active(void)
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
- int i;
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t expires, expires_next;
-
- expires_next.tv64 = KTIME_MAX;
-
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
- timer = container_of(next, struct hrtimer, node);
-
- expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
- /*
- * clock_was_set() has changed base->offset so the
- * result might be negative. Fix it up to prevent a
- * false positive in clockevents_program_event()
- */
- if (expires.tv64 < 0)
- expires.tv64 = 0;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
- }
+ ktime_t expires_next = __hrtimer_get_next_event(cpu_base);
if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
return;
@@ -587,6 +593,15 @@ static int hrtimer_reprogram(struct hrtimer *timer,
return 0;
/*
+ * When the target cpu of the timer is currently executing
+ * hrtimer_interrupt(), then we do not touch the clock event
+ * device. hrtimer_interrupt() will reevaluate all clock bases
+ * before reprogramming the device.
+ */
+ if (cpu_base->in_hrtirq)
+ return 0;
+
+ /*
* If a hang was detected in the last timer interrupt then we
* do not schedule a timer which is earlier than the expiry
* which we enforced in the hang detection. We want the system
@@ -1104,29 +1119,14 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
ktime_t hrtimer_get_next_event(void)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- struct hrtimer_clock_base *base = cpu_base->clock_base;
- ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+ ktime_t mindelta = { .tv64 = KTIME_MAX };
unsigned long flags;
- int i;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (!hrtimer_hres_active()) {
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
- struct hrtimer *timer;
- struct timerqueue_node *next;
-
- next = timerqueue_getnext(&base->active);
- if (!next)
- continue;
-
- timer = container_of(next, struct hrtimer, node);
- delta.tv64 = hrtimer_get_expires_tv64(timer);
- delta = ktime_sub(delta, base->get_time());
- if (delta.tv64 < mindelta.tv64)
- mindelta.tv64 = delta.tv64;
- }
- }
+ if (!hrtimer_hres_active())
+ mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base),
+ ktime_get());
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -1253,7 +1253,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
raw_spin_lock(&cpu_base->lock);
entry_time = now = hrtimer_update_base(cpu_base);
retry:
- expires_next.tv64 = KTIME_MAX;
+ cpu_base->in_hrtirq = 1;
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
@@ -1291,28 +1291,20 @@ retry:
* are right-of a not yet expired timer, because that
* timer will have to trigger a wakeup anyway.
*/
-
- if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
- ktime_t expires;
-
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (expires.tv64 < 0)
- expires.tv64 = KTIME_MAX;
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
+ if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
break;
- }
__run_hrtimer(timer, &basenow);
}
}
-
+ /* Reevaluate the clock bases for the next expiry */
+ expires_next = __hrtimer_get_next_event(cpu_base);
/*
* Store the new expiry value so the migration code can verify
* against it.
*/
cpu_base->expires_next = expires_next;
+ cpu_base->in_hrtirq = 0;
raw_spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-01-23 11:21 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-20 12:45 [PATCH v3] hrtimers: calculate expires_next after all timers are executed Stanislav Fomichev
2015-01-20 23:06 ` Thomas Gleixner
2015-01-23 11:21 ` [tip:timers/core] hrtimer: Prevent stale expiry time in hrtimer_interrupt() tip-bot for Thomas Gleixner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox