* [patch 1/8] hrtimer optimize softirq runqueues
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 2/8] Pass current time to hrtimer_forward() Thomas Gleixner
` (6 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: hrtimer-optimize-run-queues.patch --]
[-- Type: text/plain, Size: 4066 bytes --]
The hrtimer softirq is called from the timer softirq every tick.
Retrieve the current time from xtime and wall_to_monotonic instead of
calling base->get_time() for each timer base. Store the time in the
base structure and provide a hook once clock source abstractions are in
place and to keep the code open for new base clocks.
Based on a patch from: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/hrtimer.h | 20 ++++++++++++--------
kernel/hrtimer.c | 28 ++++++++++++++++++++++++++--
2 files changed, 38 insertions(+), 10 deletions(-)
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -123,6 +123,26 @@ void ktime_get_ts(struct timespec *ts)
EXPORT_SYMBOL_GPL(ktime_get_ts);
/*
+ * Get the coarse grained time at the softirq based on xtime and
+ * wall_to_monotonic.
+ */
+static void hrtimer_get_softirq_time(struct hrtimer_base *base)
+{
+ ktime_t xtim, tomono;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ xtim = timespec_to_ktime(xtime);
+ tomono = timespec_to_ktime(wall_to_monotonic);
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ base[CLOCK_REALTIME].softirq_time = xtim;
+ base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono);
+}
+
+/*
* Functions and macros which are different for UP/SMP systems are kept in a
* single place
*/
@@ -586,9 +606,11 @@ int hrtimer_get_res(const clockid_t whic
*/
static inline void run_hrtimer_queue(struct hrtimer_base *base)
{
- ktime_t now = base->get_time();
struct rb_node *node;
+ if (base->get_softirq_time)
+ base->softirq_time = base->get_softirq_time();
+
spin_lock_irq(&base->lock);
while ((node = base->first)) {
@@ -598,7 +620,7 @@ static inline void run_hrtimer_queue(str
void *data;
timer = rb_entry(node, struct hrtimer, node);
- if (now.tv64 <= timer->expires.tv64)
+ if (base->softirq_time.tv64 <= timer->expires.tv64)
break;
fn = timer->function;
@@ -641,6 +663,8 @@ void hrtimer_run_queues(void)
struct hrtimer_base *base = __get_cpu_var(hrtimer_bases);
int i;
+ hrtimer_get_softirq_time(base);
+
for (i = 0; i < MAX_HRTIMER_BASES; i++)
run_hrtimer_queue(&base[i]);
}
Index: linux-2.6.16-updates/include/linux/hrtimer.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/hrtimer.h
+++ linux-2.6.16-updates/include/linux/hrtimer.h
@@ -72,14 +72,16 @@ struct hrtimer {
/**
* struct hrtimer_base - the timer base for a specific clock
*
- * @index: clock type index for per_cpu support when moving a timer
- * to a base on another cpu.
- * @lock: lock protecting the base and associated timers
- * @active: red black tree root node for the active timers
- * @first: pointer to the timer node which expires first
- * @resolution: the resolution of the clock, in nanoseconds
- * @get_time: function to retrieve the current time of the clock
- * @curr_timer: the timer which is executing a callback right now
+ * @index: clock type index for per_cpu support when moving a timer
+ * to a base on another cpu.
+ * @lock: lock protecting the base and associated timers
+ * @active: red black tree root node for the active timers
+ * @first: pointer to the timer node which expires first
+ * @resolution: the resolution of the clock, in nanoseconds
+ * @get_time: function to retrieve the current time of the clock
+ * @get_sofirq_time: function to retrieve the current time from the softirq
+ * @curr_timer: the timer which is executing a callback right now
+ * @softirq_time: the time when running the hrtimer queue in the softirq
*/
struct hrtimer_base {
clockid_t index;
@@ -88,7 +90,9 @@ struct hrtimer_base {
struct rb_node *first;
ktime_t resolution;
ktime_t (*get_time)(void);
+ ktime_t (*get_softirq_time)(void);
struct hrtimer *curr_timer;
+ ktime_t softirq_time;
};
/*
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 2/8] Pass current time to hrtimer_forward()
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
2006-03-12 10:37 ` [patch 1/8] hrtimer optimize softirq runqueues Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 3/8] posix-timer cleanup common_timer_get() Thomas Gleixner
` (5 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: hrtimer-forward-remove-get-time.patch --]
[-- Type: text/plain, Size: 4225 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
Pass current time to hrtimer_forward(). This allows to use the softirq
time in the timer base when the forward function is called from the
timer callback. Other places pass current time with a call to
timer->base->get_time().
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/hrtimer.h | 3 ++-
kernel/hrtimer.c | 7 +++----
kernel/itimer.c | 3 ++-
kernel/posix-timers.c | 14 ++++++++++----
4 files changed, 17 insertions(+), 10 deletions(-)
Index: linux-2.6.16-updates/include/linux/hrtimer.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/hrtimer.h
+++ linux-2.6.16-updates/include/linux/hrtimer.h
@@ -130,7 +130,8 @@ static inline int hrtimer_active(const s
}
/* Forward a hrtimer so it expires after now: */
-extern unsigned long hrtimer_forward(struct hrtimer *timer, ktime_t interval);
+extern unsigned long
+hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
/* Precise sleep: */
extern long hrtimer_nanosleep(struct timespec *rqtp,
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -301,18 +301,17 @@ void unlock_hrtimer_base(const struct hr
* hrtimer_forward - forward the timer expiry
*
* @timer: hrtimer to forward
+ * @now: forward past this time
* @interval: the interval to forward
*
* Forward the timer expiry so it will expire in the future.
* Returns the number of overruns.
*/
unsigned long
-hrtimer_forward(struct hrtimer *timer, ktime_t interval)
+hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
{
unsigned long orun = 1;
- ktime_t delta, now;
-
- now = timer->base->get_time();
+ ktime_t delta;
delta = ktime_sub(now, timer->expires);
Index: linux-2.6.16-updates/kernel/itimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/itimer.c
+++ linux-2.6.16-updates/kernel/itimer.c
@@ -136,7 +136,8 @@ int it_real_fn(void *data)
if (tsk->signal->it_real_incr.tv64 != 0) {
hrtimer_forward(&tsk->signal->real_timer,
- tsk->signal->it_real_incr);
+ tsk->signal->real_timer.base->softirq_time,
+ tsk->signal->it_real_incr);
return HRTIMER_RESTART;
}
Index: linux-2.6.16-updates/kernel/posix-timers.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/posix-timers.c
+++ linux-2.6.16-updates/kernel/posix-timers.c
@@ -250,15 +250,18 @@ __initcall(init_posix_timers);
static void schedule_next_timer(struct k_itimer *timr)
{
+ struct hrtimer *timer = &timr->it.real.timer;
+
if (timr->it.real.interval.tv64 == 0)
return;
- timr->it_overrun += hrtimer_forward(&timr->it.real.timer,
+ timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
timr->it.real.interval);
+
timr->it_overrun_last = timr->it_overrun;
timr->it_overrun = -1;
++timr->it_requeue_pending;
- hrtimer_restart(&timr->it.real.timer);
+ hrtimer_restart(timer);
}
/*
@@ -333,6 +336,7 @@ EXPORT_SYMBOL_GPL(posix_timer_event);
static int posix_timer_fn(void *data)
{
struct k_itimer *timr = data;
+ struct hrtimer *timer = &timr->it.real.timer;
unsigned long flags;
int si_private = 0;
int ret = HRTIMER_NORESTART;
@@ -350,7 +354,8 @@ static int posix_timer_fn(void *data)
*/
if (timr->it.real.interval.tv64 != 0) {
timr->it_overrun +=
- hrtimer_forward(&timr->it.real.timer,
+ hrtimer_forward(timer,
+ timer->base->softirq_time,
timr->it.real.interval);
ret = HRTIMER_RESTART;
}
@@ -621,7 +626,8 @@ common_timer_get(struct k_itimer *timr,
if (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
timr->it_overrun +=
- hrtimer_forward(timer, timr->it.real.interval);
+ hrtimer_forward(timer, timer->base->get_time(),
+ timr->it.real.interval);
remaining = hrtimer_get_remaining(timer);
}
calci:
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 3/8] posix-timer cleanup common_timer_get()
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
2006-03-12 10:37 ` [patch 1/8] hrtimer optimize softirq runqueues Thomas Gleixner
2006-03-12 10:37 ` [patch 2/8] Pass current time to hrtimer_forward() Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 4/8] hrtimer simplify nanosleep Thomas Gleixner
` (4 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: posixtimer-cleanup-common-timer-get.patch --]
[-- Type: text/plain, Size: 2127 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
Cleanup common_timer_get() a little.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/posix-timers.c | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
Index: linux-2.6.16-updates/kernel/posix-timers.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/posix-timers.c
+++ linux-2.6.16-updates/kernel/posix-timers.c
@@ -606,18 +606,20 @@ static struct k_itimer * lock_timer(time
static void
common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
{
- ktime_t remaining;
+ ktime_t now, remaining;
struct hrtimer *timer = &timr->it.real.timer;
memset(cur_setting, 0, sizeof(struct itimerspec));
- remaining = hrtimer_get_remaining(timer);
- /* Time left ? or timer pending */
- if (remaining.tv64 > 0 || hrtimer_active(timer))
- goto calci;
/* interval timer ? */
- if (timr->it.real.interval.tv64 == 0)
+ if (timr->it.real.interval.tv64 == 0) {
+ cur_setting->it_interval =
+ ktime_to_timespec(timr->it.real.interval);
+ } else if (!hrtimer_active(timer))
return;
+
+ now = timer->base->get_time();
+
/*
* When a requeue is pending or this is a SIGEV_NONE timer
* move the expiry time forward by intervals, so expiry is >
@@ -625,16 +627,11 @@ common_timer_get(struct k_itimer *timr,
*/
if (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- timr->it_overrun +=
- hrtimer_forward(timer, timer->base->get_time(),
- timr->it.real.interval);
- remaining = hrtimer_get_remaining(timer);
+ timr->it_overrun += hrtimer_forward(timer, now,
+ timr->it.real.interval);
}
- calci:
- /* interval timer ? */
- if (timr->it.real.interval.tv64 != 0)
- cur_setting->it_interval =
- ktime_to_timespec(timr->it.real.interval);
+
+ remaining = ktime_sub(timer->expires, now);
/* Return 0 only, when the timer is expired and not pending */
if (remaining.tv64 <= 0)
cur_setting->it_value.tv_nsec = 1;
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 4/8] hrtimer simplify nanosleep
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
` (2 preceding siblings ...)
2006-03-12 10:37 ` [patch 3/8] posix-timer cleanup common_timer_get() Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 5/8] hrtimer remove state field Thomas Gleixner
` (3 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: hrtimers-simplify-nanosleep.patch --]
[-- Type: text/plain, Size: 7024 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
nanosleep is the only user of the expired state, so let it manage this
itself, which makes the hrtimer code a bit simpler. The remaining time
is also only calculated if requested.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/hrtimer.h | 4 -
kernel/hrtimer.c | 142 ++++++++++++++++++++----------------------------
2 files changed, 63 insertions(+), 83 deletions(-)
Index: linux-2.6.16-updates/include/linux/hrtimer.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/hrtimer.h
+++ linux-2.6.16-updates/include/linux/hrtimer.h
@@ -38,9 +38,7 @@ enum hrtimer_restart {
* Timer states:
*/
enum hrtimer_state {
- HRTIMER_INACTIVE, /* Timer is inactive */
- HRTIMER_EXPIRED, /* Timer is expired */
- HRTIMER_RUNNING, /* Timer is running the callback function */
+ HRTIMER_INACTIVE, /* Timer is inactive */
HRTIMER_PENDING, /* Timer is pending */
};
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -625,30 +625,20 @@ static inline void run_hrtimer_queue(str
fn = timer->function;
data = timer->data;
set_curr_timer(base, timer);
- timer->state = HRTIMER_RUNNING;
+ timer->state = HRTIMER_INACTIVE;
__remove_hrtimer(timer, base);
spin_unlock_irq(&base->lock);
- /*
- * fn == NULL is special case for the simplest timer
- * variant - wake up process and do not restart:
- */
- if (!fn) {
- wake_up_process(data);
- restart = HRTIMER_NORESTART;
- } else
- restart = fn(data);
+ restart = fn(data);
spin_lock_irq(&base->lock);
/* Another CPU has added back the timer */
- if (timer->state != HRTIMER_RUNNING)
+ if (timer->state != HRTIMER_INACTIVE)
continue;
- if (restart == HRTIMER_RESTART)
+ if (restart != HRTIMER_NORESTART)
enqueue_hrtimer(timer, base);
- else
- timer->state = HRTIMER_EXPIRED;
}
set_curr_timer(base, NULL);
spin_unlock_irq(&base->lock);
@@ -672,79 +662,70 @@ void hrtimer_run_queues(void)
* Sleep related functions:
*/
-/**
- * schedule_hrtimer - sleep until timeout
- *
- * @timer: hrtimer variable initialized with the correct clock base
- * @mode: timeout value is abs/rel
- *
- * Make the current task sleep until @timeout is
- * elapsed.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to
- * pass before the routine returns. The routine will return 0
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * will be returned
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- */
-static ktime_t __sched
-schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
-{
- /* fn stays NULL, meaning single-shot wakeup: */
- timer->data = current;
+struct sleep_hrtimer {
+ struct hrtimer timer;
+ struct task_struct *task;
+ int expired;
+};
- hrtimer_start(timer, timer->expires, mode);
+static int nanosleep_wakeup(void *data)
+{
+ struct sleep_hrtimer *t = data;
- schedule();
- hrtimer_cancel(timer);
+ t->expired = 1;
+ wake_up_process(t->task);
- /* Return the remaining time: */
- if (timer->state != HRTIMER_EXPIRED)
- return ktime_sub(timer->expires, timer->base->get_time());
- else
- return (ktime_t) {.tv64 = 0 };
+ return HRTIMER_NORESTART;
}
-static inline ktime_t __sched
-schedule_hrtimer_interruptible(struct hrtimer *timer,
- const enum hrtimer_mode mode)
+static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode)
{
- set_current_state(TASK_INTERRUPTIBLE);
+ t->timer.function = nanosleep_wakeup;
+ t->timer.data = t;
+ t->task = current;
+ t->expired = 0;
+
+ do {
+ set_current_state(TASK_INTERRUPTIBLE);
+ hrtimer_start(&t->timer, t->timer.expires, mode);
- return schedule_hrtimer(timer, mode);
+ schedule();
+
+ if (unlikely(!t->expired)) {
+ hrtimer_cancel(&t->timer);
+ mode = HRTIMER_ABS;
+ }
+ } while (!t->expired && !signal_pending(current));
+
+ return t->expired;
}
static long __sched nanosleep_restart(struct restart_block *restart)
{
+ struct sleep_hrtimer t;
struct timespec __user *rmtp;
struct timespec tu;
- void *rfn_save = restart->fn;
- struct hrtimer timer;
- ktime_t rem;
+ ktime_t time;
restart->fn = do_no_restart_syscall;
- hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
-
- timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
+ hrtimer_init(&t.timer, restart->arg3, HRTIMER_ABS);
+ t.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
- rem = schedule_hrtimer_interruptible(&timer, HRTIMER_ABS);
-
- if (rem.tv64 <= 0)
+ if (do_nanosleep(&t, HRTIMER_ABS))
return 0;
rmtp = (struct timespec __user *) restart->arg2;
- tu = ktime_to_timespec(rem);
- if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu)))
- return -EFAULT;
+ if (rmtp) {
+ time = ktime_sub(t.timer.expires, t.timer.base->get_time());
+ if (time.tv64 <= 0)
+ return 0;
+ tu = ktime_to_timespec(time);
+ if (copy_to_user(rmtp, &tu, sizeof(tu)))
+ return -EFAULT;
+ }
- restart->fn = rfn_save;
+ restart->fn = nanosleep_restart;
/* The other values in restart are already filled in */
return -ERESTART_RESTARTBLOCK;
@@ -754,33 +735,34 @@ long hrtimer_nanosleep(struct timespec *
const enum hrtimer_mode mode, const clockid_t clockid)
{
struct restart_block *restart;
- struct hrtimer timer;
+ struct sleep_hrtimer t;
struct timespec tu;
ktime_t rem;
- hrtimer_init(&timer, clockid, mode);
-
- timer.expires = timespec_to_ktime(*rqtp);
-
- rem = schedule_hrtimer_interruptible(&timer, mode);
- if (rem.tv64 <= 0)
+ hrtimer_init(&t.timer, clockid, mode);
+ t.timer.expires = timespec_to_ktime(*rqtp);
+ if (do_nanosleep(&t, mode))
return 0;
/* Absolute timers do not update the rmtp value and restart: */
if (mode == HRTIMER_ABS)
return -ERESTARTNOHAND;
- tu = ktime_to_timespec(rem);
-
- if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu)))
- return -EFAULT;
+ if (rmtp) {
+ rem = ktime_sub(t.timer.expires, t.timer.base->get_time());
+ if (rem.tv64 <= 0)
+ return 0;
+ tu = ktime_to_timespec(rem);
+ if (copy_to_user(rmtp, &tu, sizeof(tu)))
+ return -EFAULT;
+ }
restart = ¤t_thread_info()->restart_block;
restart->fn = nanosleep_restart;
- restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
- restart->arg1 = timer.expires.tv64 >> 32;
+ restart->arg0 = t.timer.expires.tv64 & 0xFFFFFFFF;
+ restart->arg1 = t.timer.expires.tv64 >> 32;
restart->arg2 = (unsigned long) rmtp;
- restart->arg3 = (unsigned long) timer.base->index;
+ restart->arg3 = (unsigned long) t.timer.base->index;
return -ERESTART_RESTARTBLOCK;
}
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 5/8] hrtimer remove state field
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
` (3 preceding siblings ...)
2006-03-12 10:37 ` [patch 4/8] hrtimer simplify nanosleep Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 12:13 ` Roman Zippel
2006-03-12 10:37 ` [patch 6/8] Remove it_real_value calculation from proc/*/stat Thomas Gleixner
` (2 subsequent siblings)
7 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: hrtimers-remove-state-field.patch --]
[-- Type: text/plain, Size: 3162 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
Remove the state field and encode this information in the rb_node
similiar to normal timer.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/hrtimer.h | 11 ++---------
kernel/hrtimer.c | 13 ++++---------
2 files changed, 6 insertions(+), 18 deletions(-)
Index: linux-2.6.16-updates/include/linux/hrtimer.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/hrtimer.h
+++ linux-2.6.16-updates/include/linux/hrtimer.h
@@ -34,13 +34,7 @@ enum hrtimer_restart {
HRTIMER_RESTART,
};
-/*
- * Timer states:
- */
-enum hrtimer_state {
- HRTIMER_INACTIVE, /* Timer is inactive */
- HRTIMER_PENDING, /* Timer is pending */
-};
+#define HRTIMER_INACTIVE ((void *)1UL)
struct hrtimer_base;
@@ -61,7 +55,6 @@ struct hrtimer_base;
struct hrtimer {
struct rb_node node;
ktime_t expires;
- enum hrtimer_state state;
int (*function)(void *);
void *data;
struct hrtimer_base *base;
@@ -124,7 +117,7 @@ extern ktime_t hrtimer_get_next_event(vo
static inline int hrtimer_active(const struct hrtimer *timer)
{
- return timer->state == HRTIMER_PENDING;
+ return timer->node.rb_parent != HRTIMER_INACTIVE;
}
/* Forward a hrtimer so it expires after now: */
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -374,8 +374,6 @@ static void enqueue_hrtimer(struct hrtim
rb_link_node(&timer->node, parent, link);
rb_insert_color(&timer->node, &base->active);
- timer->state = HRTIMER_PENDING;
-
if (!base->first || timer->expires.tv64 <
rb_entry(base->first, struct hrtimer, node)->expires.tv64)
base->first = &timer->node;
@@ -395,6 +393,7 @@ static void __remove_hrtimer(struct hrti
if (base->first == &timer->node)
base->first = rb_next(&timer->node);
rb_erase(&timer->node, &base->active);
+ timer->node.rb_parent = HRTIMER_INACTIVE;
}
/*
@@ -405,7 +404,6 @@ remove_hrtimer(struct hrtimer *timer, st
{
if (hrtimer_active(timer)) {
__remove_hrtimer(timer, base);
- timer->state = HRTIMER_INACTIVE;
return 1;
}
return 0;
@@ -579,6 +577,7 @@ void hrtimer_init(struct hrtimer *timer,
clock_id = CLOCK_MONOTONIC;
timer->base = &bases[clock_id];
+ timer->node.rb_parent = HRTIMER_INACTIVE;
}
/**
@@ -625,7 +624,6 @@ static inline void run_hrtimer_queue(str
fn = timer->function;
data = timer->data;
set_curr_timer(base, timer);
- timer->state = HRTIMER_INACTIVE;
__remove_hrtimer(timer, base);
spin_unlock_irq(&base->lock);
@@ -633,11 +631,8 @@ static inline void run_hrtimer_queue(str
spin_lock_irq(&base->lock);
- /* Another CPU has added back the timer */
- if (timer->state != HRTIMER_INACTIVE)
- continue;
-
- if (restart != HRTIMER_NORESTART)
+ if (restart != HRTIMER_NORESTART &&
+ !hrtimer_active(timer))
enqueue_hrtimer(timer, base);
}
set_curr_timer(base, NULL);
--
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [patch 5/8] hrtimer remove state field
2006-03-12 10:37 ` [patch 5/8] hrtimer remove state field Thomas Gleixner
@ 2006-03-12 12:13 ` Roman Zippel
2006-03-12 13:10 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 12:13 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> @@ -633,11 +631,8 @@ static inline void run_hrtimer_queue(str
>
> spin_lock_irq(&base->lock);
>
> - /* Another CPU has added back the timer */
> - if (timer->state != HRTIMER_INACTIVE)
> - continue;
> -
> - if (restart != HRTIMER_NORESTART)
> + if (restart != HRTIMER_NORESTART &&
> + !hrtimer_active(timer))
> enqueue_hrtimer(timer, base);
> }
> set_curr_timer(base, NULL);
BTW the active check can be removed again, as it was added for a state
machine problem, I only didn't want to remove it for 2.6.16.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 12:13 ` Roman Zippel
@ 2006-03-12 13:10 ` Thomas Gleixner
2006-03-12 13:26 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 13:10 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Sun, 2006-03-12 at 13:13 +0100, Roman Zippel wrote:
> > spin_lock_irq(&base->lock);
> >
> > - /* Another CPU has added back the timer */
> > - if (timer->state != HRTIMER_INACTIVE)
> > - continue;
> > -
> > - if (restart != HRTIMER_NORESTART)
> > + if (restart != HRTIMER_NORESTART &&
> > + !hrtimer_active(timer))
> > enqueue_hrtimer(timer, base);
> > }
> > set_curr_timer(base, NULL);
>
> BTW the active check can be removed again, as it was added for a state
> machine problem, I only didn't want to remove it for 2.6.16.
The check can not be removed. The reason why it was added is still the
same.
It has nothing to do with a state machine. It's a simple SMP locking
issue.
softirq runs on CPU0
base->lock()
remove_timer(timer);
base->unlock()
signal of previous expiry is delivered on CPU1
timer is reqeued.
requeue = timer->fn();
base->lock()
if (requeue)
enqueue_timer(timer)
--> OOPS
We can not wait in the signal delivery path until the callback has been
executed, as we hold the posix-timer->lock and this would deadlock
timer->fn().
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 13:10 ` Thomas Gleixner
@ 2006-03-12 13:26 ` Roman Zippel
2006-03-12 13:35 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 13:26 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> > BTW the active check can be removed again, as it was added for a state
> > machine problem, I only didn't want to remove it for 2.6.16.
>
> The check can not be removed. The reason why it was added is still the
> same.
>
> It has nothing to do with a state machine. It's a simple SMP locking
> issue.
>
> softirq runs on CPU0
> base->lock()
>
> remove_timer(timer);
>
> base->unlock()
> signal of previous expiry is delivered on CPU1
> timer is reqeued.
> requeue = timer->fn();
>
> base->lock()
>
> if (requeue)
> enqueue_timer(timer)
>
> --> OOPS
>
> We can not wait in the signal delivery path until the callback has been
> executed, as we hold the posix-timer->lock and this would deadlock
> timer->fn().
posix_timer either restarts the timer directly or via signal delivery, but
never both, so this case can't happen (unless there is a bug in the
posix_timer).
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 13:26 ` Roman Zippel
@ 2006-03-12 13:35 ` Thomas Gleixner
2006-03-12 13:55 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 13:35 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
Roman,
I looked up the exact problem again.
On Sun, 2006-03-12 at 14:26 +0100, Roman Zippel wrote:
> > softirq runs on CPU0
> > base->lock()
> >
> > remove_timer(timer);
> >
> > base->unlock()
> > signal of previous expiry is delivered on CPU1
> > timer is reqeued.
-------> sig_ignore is set
> > requeue = timer->fn();
> >
> > base->lock()
> >
> > if (requeue)
> > enqueue_timer(timer)
> >
> > --> OOPS
> >
> > We can not wait in the signal delivery path until the callback has been
> > executed, as we hold the posix-timer->lock and this would deadlock
> > timer->fn().
>
> posix_timer either restarts the timer directly or via signal delivery, but
> never both, so this case can't happen (unless there is a bug in the
> posix_timer).
It took quite a time to track this down. Its unlikely, but it _CAN_
happen !
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 13:35 ` Thomas Gleixner
@ 2006-03-12 13:55 ` Roman Zippel
2006-03-12 14:15 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 13:55 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> On Sun, 2006-03-12 at 14:26 +0100, Roman Zippel wrote:
> > > softirq runs on CPU0
> > > base->lock()
> > >
> > > remove_timer(timer);
> > >
> > > base->unlock()
> > > signal of previous expiry is delivered on CPU1
> > > timer is reqeued.
>
> -------> sig_ignore is set
??? I can't find any symbol 'sig_ignore'.
> > > requeue = timer->fn();
> > >
> > > base->lock()
> > >
> > > if (requeue)
> > > enqueue_timer(timer)
> > >
What actually happened is that requeue is false and in the meantime the
timer was restarted on another cpu via signal delivery and
run_hrtimer_queue would set the state to expired, while the timer was
still active -> OOPS.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 13:55 ` Roman Zippel
@ 2006-03-12 14:15 ` Thomas Gleixner
2006-03-12 14:30 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 14:15 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Sun, 2006-03-12 at 14:55 +0100, Roman Zippel wrote:
> Hi,
>
> On Sun, 12 Mar 2006, Thomas Gleixner wrote:
>
> > On Sun, 2006-03-12 at 14:26 +0100, Roman Zippel wrote:
> > > > softirq runs on CPU0
> > > > base->lock()
> > > >
> > > > remove_timer(timer);
> > > >
> > > > base->unlock()
> > > > signal of previous expiry is delivered on CPU1
> > > > timer is reqeued.
> >
> > -------> sig_ignore is set
>
> ??? I can't find any symbol 'sig_ignore'.
Oh well. The application sets SIG_IGN for the signal in question, so
send_sigqueue() returns 1 because sig_ignored() returns 1.
Sorry for being imprecise.
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 14:15 ` Thomas Gleixner
@ 2006-03-12 14:30 ` Roman Zippel
2006-03-12 14:54 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 14:30 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> > > > > base->unlock()
> > > > > signal of previous expiry is delivered on CPU1
> > > > > timer is reqeued.
> > >
> > > -------> sig_ignore is set
> >
> > ??? I can't find any symbol 'sig_ignore'.
>
> Oh well. The application sets SIG_IGN for the signal in question, so
> send_sigqueue() returns 1 because sig_ignored() returns 1.
In this case no signal is queued and the timer won't be restarted via
signal delivery.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 14:30 ` Roman Zippel
@ 2006-03-12 14:54 ` Thomas Gleixner
2006-03-12 15:17 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 14:54 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Sun, 2006-03-12 at 15:30 +0100, Roman Zippel wrote:
> Hi,
>
> On Sun, 12 Mar 2006, Thomas Gleixner wrote:
>
> > > > > > base->unlock()
> > > > > > signal of previous expiry is delivered on CPU1
> > > > > > timer is reqeued.
> > > >
> > > > -------> sig_ignore is set
> > >
> > > ??? I can't find any symbol 'sig_ignore'.
> >
> > Oh well. The application sets SIG_IGN for the signal in question, so
> > send_sigqueue() returns 1 because sig_ignored() returns 1.
>
> In this case no signal is queued and the timer won't be restarted via
> signal delivery.
Roman,
Interrupts are enable before fn() is called, so an interrupt can
actually delay it long enough that userspace on CPU1 can set SIG_IGN
CPU 0
spin_unlock_irq(base->lock)
CPU1
signal is dequeued
timer is requeued
user space code is executed
user space code sets SIG_IGN
restart = fn();
Now fn() calls send_sigqeue() which returns 1, resulting in ret =
HRTIMER_RESTART which leads to requeueing of an enqueued timer.
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 14:54 ` Thomas Gleixner
@ 2006-03-12 15:17 ` Roman Zippel
2006-03-12 15:41 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 15:17 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> > In this case no signal is queued and the timer won't be restarted via
> > signal delivery.
>
> Roman,
>
> Interrupts are enable before fn() is called, so an interrupt can
> actually delay it long enough that userspace on CPU1 can set SIG_IGN
>
> CPU 0
> spin_unlock_irq(base->lock)
> CPU1
> signal is dequeued
> timer is requeued
> user space code is executed
> user space code sets SIG_IGN
> restart = fn();
>
> Now fn() calls send_sigqeue() which returns 1, resulting in ret =
> HRTIMER_RESTART which leads to requeueing of an enqueued timer.
I'm not quite sure I follow, when the timer is running no signal should be
queued, so nothing can be dequeued and no new timer can be requeued.
If that somehow is possible (although I don't see how), you'd found a bug
in the signal/posix timer code, which should not be worked around in the
hrtimer run queue.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 15:17 ` Roman Zippel
@ 2006-03-12 15:41 ` Thomas Gleixner
2006-03-12 16:00 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 15:41 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Sun, 2006-03-12 at 16:17 +0100, Roman Zippel wrote:
> >
> > CPU 0
> > spin_unlock_irq(base->lock)
> > CPU1
> > signal is dequeued
> > timer is requeued
> > user space code is executed
> > user space code sets SIG_IGN
> > restart = fn();
> >
> > Now fn() calls send_sigqeue() which returns 1, resulting in ret =
> > HRTIMER_RESTART which leads to requeueing of an enqueued timer.
>
> I'm not quite sure I follow, when the timer is running no signal should be
> queued, so nothing can be dequeued and no new timer can be requeued.
> If that somehow is possible (although I don't see how), you'd found a bug
> in the signal/posix timer code, which should not be worked around in the
> hrtimer run queue.
How do you want to prevent that a signal is dequeued on one CPU while
the softirq expires the timer on another CPU ? This can not be
prevented.
Of course we can check hrtimer_active() in posix_timer_fn(), but I have
to check if there is a problem the other way round. I cook up a patch.
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 15:41 ` Thomas Gleixner
@ 2006-03-12 16:00 ` Roman Zippel
2006-03-12 16:26 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-12 16:00 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> How do you want to prevent that a signal is dequeued on one CPU while
> the softirq expires the timer on another CPU ? This can not be
> prevented.
This should not be possible in first place, otherwise it's a bug.
The original problem was a broken state machine, is that so hard to
believe? If there is another problem, please provide more details.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-12 16:00 ` Roman Zippel
@ 2006-03-12 16:26 ` Thomas Gleixner
2006-03-12 16:57 ` [patch] hrtimer remove replace state check by BUG_ON Thomas Gleixner
2006-03-16 1:05 ` [patch 5/8] hrtimer remove state field Roman Zippel
0 siblings, 2 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 16:26 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Sun, 2006-03-12 at 17:00 +0100, Roman Zippel wrote:
> Hi,
>
> On Sun, 12 Mar 2006, Thomas Gleixner wrote:
>
> > How do you want to prevent that a signal is dequeued on one CPU while
> > the softirq expires the timer on another CPU ? This can not be
> > prevented.
>
> This should not be possible in first place, otherwise it's a bug.
> The original problem was a broken state machine, is that so hard to
> believe? If there is another problem, please provide more details.
Roman,
there was a state machine problem caused by something similar.
But the problem I described now happened with the current patch queue -
without the hrtimer_active() check. I have no direct access to the
machine which lets this surface and I just tried to reconstruct the
scenario from the sparse information which was provided by the customer.
All I can tell, that it is related to something similar and a requeue
happens where none should happen.
I agree, that it should not be handled in the hrtimer code. It has to be
fixed in the posix-timer code.
I make the check a BUG_ON(!hrtimer_active(timer) so it might show up in
-mm again. Ok ?
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* [patch] hrtimer remove replace state check by BUG_ON
2006-03-12 16:26 ` Thomas Gleixner
@ 2006-03-12 16:57 ` Thomas Gleixner
2006-03-16 1:05 ` [patch 5/8] hrtimer remove state field Roman Zippel
1 sibling, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 16:57 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
The rework of the hrtimer code showed a hard to track sporadic problem
on a testers machine. The !hrtimer_active check in the return path of
the timer callback function prevents requeueing of an enqueued timer.
Replace the check by a BUG_ON to get more info about this not yet
verified problem instead of hiding it away.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -631,9 +631,10 @@ static inline void run_hrtimer_queue(str
spin_lock_irq(&base->lock);
- if (restart != HRTIMER_NORESTART &&
- !hrtimer_active(timer))
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(hrtimer_active(timer));
enqueue_hrtimer(timer, base);
+ }
}
set_curr_timer(base, NULL);
spin_unlock_irq(&base->lock);
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [patch 5/8] hrtimer remove state field
2006-03-12 16:26 ` Thomas Gleixner
2006-03-12 16:57 ` [patch] hrtimer remove replace state check by BUG_ON Thomas Gleixner
@ 2006-03-16 1:05 ` Roman Zippel
2006-03-16 9:01 ` Thomas Gleixner
1 sibling, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-16 1:05 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Sun, 12 Mar 2006, Thomas Gleixner wrote:
> But the problem I described now happened with the current patch queue -
> without the hrtimer_active() check. I have no direct access to the
> machine which lets this surface and I just tried to reconstruct the
> scenario from the sparse information which was provided by the customer.
> All I can tell, that it is related to something similar and a requeue
> happens where none should happen.
I have an idea what might have happened. You don't advance the pending
state, if the signal isn't queued, so that the pending state is screwed up
afterwards. Although I don't see how it could crash the kernel (it has
only the potential to mess up the timer queue via hrtimer_forward() a
bit), but I don't know what other patches were applied.
> I make the check a BUG_ON(!hrtimer_active(timer) so it might show up in
> -mm again. Ok ?
That's fine.
The point is that it's currently not needed to allow the user this
behaviour. It's a bit unfortunate that such API details were not discussed
before it got merged - users have a tendency to (ab)use a system in the
least expected way (especially if they somehow gotten the idea it's much
better than the old stuff in every way).
For example no current user restarts an active timer, which could be used
to simplify the locking. If we tightened a bit what a user is allowed to
do, we could gain flexibility on the other side, e.g. allow drivers to
create timer sources or how to integrate cpu timer.
bye, Roman
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Index: linux-2.6-git/kernel/posix-timers.c
===================================================================
--- linux-2.6-git.orig/kernel/posix-timers.c 2006-03-14 19:48:21.000000000 +0100
+++ linux-2.6-git/kernel/posix-timers.c 2006-03-15 20:54:38.000000000 +0100
@@ -353,6 +353,7 @@
hrtimer_forward(&timr->it.real.timer,
timr->it.real.interval);
ret = HRTIMER_RESTART;
+ ++timr->it_requeue_pending;
}
}
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-16 1:05 ` [patch 5/8] hrtimer remove state field Roman Zippel
@ 2006-03-16 9:01 ` Thomas Gleixner
2006-03-17 22:07 ` Roman Zippel
0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-16 9:01 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
Roman,
> I have an idea what might have happened. You don't advance the pending
> state, if the signal isn't queued, so that the pending state is screwed up
> afterwards. Although I don't see how it could crash the kernel (it has
> only the potential to mess up the timer queue via hrtimer_forward() a
> bit), but I don't know what other patches were applied.
Good catch, but I dont see how it would trigger the bug.
> For example no current user restarts an active timer, which could be used
> to simplify the locking.
How does this simplify the locking ? It just removes the
hrtimer_cancel() call in hrtimer_start() and makes the
switch_hrtimer_base() code a bit simpler.
The general locking rules would be still the same and I dont see
increased flexibility at all.
> If we tightened a bit what a user is allowed to
> do, we could gain flexibility on the other side, e.g. allow drivers to
> create timer sources or how to integrate cpu timer.
-ENOPARSE. Can you please explain what "allow drivers to create timer
sources" means and why the above locking is in the way ?
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-16 9:01 ` Thomas Gleixner
@ 2006-03-17 22:07 ` Roman Zippel
2006-03-18 8:46 ` Thomas Gleixner
0 siblings, 1 reply; 26+ messages in thread
From: Roman Zippel @ 2006-03-17 22:07 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: Andrew Morton, LKML, Ingo Molnar
Hi,
On Thu, 16 Mar 2006, Thomas Gleixner wrote:
> > For example no current user restarts an active timer, which could be used
> > to simplify the locking.
>
> How does this simplify the locking ? It just removes the
> hrtimer_cancel() call in hrtimer_start() and makes the
> switch_hrtimer_base() code a bit simpler.
>
> The general locking rules would be still the same and I dont see
> increased flexibility at all.
Current users already do the serialize the calls into hrtimer themselves
(stopping and restarting), which can make the locking even simpler.
> > If we tightened a bit what a user is allowed to
> > do, we could gain flexibility on the other side, e.g. allow drivers to
> > create timer sources or how to integrate cpu timer.
>
> -ENOPARSE. Can you please explain what "allow drivers to create timer
> sources" means and why the above locking is in the way ?
For example dynamically attaching a timer_base to a clock source (e.g. to
create a monotonic timer independent of NTP adjustments). Right now as
soon as any timer_base is active it cannot be deconfigured again due to
pointers to it from timers, so this would require different locking.
bye, Roman
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [patch 5/8] hrtimer remove state field
2006-03-17 22:07 ` Roman Zippel
@ 2006-03-18 8:46 ` Thomas Gleixner
0 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-18 8:46 UTC (permalink / raw)
To: Roman Zippel; +Cc: Andrew Morton, LKML, Ingo Molnar
On Fri, 2006-03-17 at 23:07 +0100, Roman Zippel wrote:
> > The general locking rules would be still the same and I dont see
> > increased flexibility at all.
>
> Current users already do the serialize the calls into hrtimer themselves
> (stopping and restarting), which can make the locking even simpler.
Serialization of the users does not help for protecting the timer queues
especially not on SMP.
Can you please give a real example how to simplify this. Nobody has any
objections to simplify locking when possible. But unproven claims in
repeated form do not help. After a while they just get annoying.
> > > If we tightened a bit what a user is allowed to
> > > do, we could gain flexibility on the other side, e.g. allow drivers to
> > > create timer sources or how to integrate cpu timer.
> >
> > -ENOPARSE. Can you please explain what "allow drivers to create timer
> > sources" means and why the above locking is in the way ?
>
> For example dynamically attaching a timer_base to a clock source (e.g. to
> create a monotonic timer independent of NTP adjustments). Right now as
> soon as any timer_base is active it cannot be deconfigured again due to
> pointers to it from timers, so this would require different locking.
I do not really understand what you want to achieve.
Timer bases are related to clock sources. You can switch the clock
source of the CLOCK_MONOTONIC timer base at any given time to a
different one which is not NTP adjusted. Where is the problem?
The timer base does not care about the underlying clock source at all.
All it knows of it is the function which reads the current time related
to this clock. When the underlying clock changes the way it generates
current time then the timer base does not care at all.
This is not a problem of hrtimers. Its a problem of the clock source
abstraction layer like John Stultz GTOD framework.
If you want to have timer bases with dynamic clock sources which can go
away then you have to take care of much more than locking. But I really
do not see a need for this.
tglx
^ permalink raw reply [flat|nested] 26+ messages in thread
* [patch 6/8] Remove it_real_value calculation from proc/*/stat
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
` (4 preceding siblings ...)
2006-03-12 10:37 ` [patch 5/8] hrtimer remove state field Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 7/8] Remove DEFINE_KTIME and ktime_to_clock_t() Thomas Gleixner
2006-03-12 10:37 ` [patch 8/8] Remove nsec_t typedef Thomas Gleixner
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: remove-it-real-value.patch --]
[-- Type: text/plain, Size: 1871 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
Remove the it_real_value from /proc/*/stat, during 1.2.x was the last
time it returned useful data (as it was directly maintained by the
scheduler), now it's only a waste of time to calculate it. Return 0
instead.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
fs/proc/array.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
Index: linux-2.6.16-updates/fs/proc/array.c
===================================================================
--- linux-2.6.16-updates.orig/fs/proc/array.c
+++ linux-2.6.16-updates/fs/proc/array.c
@@ -330,7 +330,6 @@ static int do_task_stat(struct task_stru
unsigned long min_flt = 0, maj_flt = 0;
cputime_t cutime, cstime, utime, stime;
unsigned long rsslim = 0;
- DEFINE_KTIME(it_real_value);
struct task_struct *t;
char tcomm[sizeof(task->comm)];
@@ -386,7 +385,6 @@ static int do_task_stat(struct task_stru
utime = cputime_add(utime, task->signal->utime);
stime = cputime_add(stime, task->signal->stime);
}
- it_real_value = task->signal->real_timer.expires;
}
ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
read_unlock(&tasklist_lock);
@@ -413,7 +411,7 @@ static int do_task_stat(struct task_stru
start_time = nsec_to_clock_t(start_time);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
task->pid,
tcomm,
@@ -435,7 +433,6 @@ static int do_task_stat(struct task_stru
priority,
nice,
num_threads,
- (long) ktime_to_clock_t(it_real_value),
start_time,
vsize,
mm ? get_mm_rss(mm) : 0,
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 7/8] Remove DEFINE_KTIME and ktime_to_clock_t()
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
` (5 preceding siblings ...)
2006-03-12 10:37 ` [patch 6/8] Remove it_real_value calculation from proc/*/stat Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
2006-03-12 10:37 ` [patch 8/8] Remove nsec_t typedef Thomas Gleixner
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: remove-define-ktime.patch --]
[-- Type: text/plain, Size: 2257 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
Now that it_real_value is gone, the last user of DEFINE_KTIME and
ktime_to_clock_t are also gone, so remove it before someone starts using
it again.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/ktime.h | 20 --------------------
1 file changed, 20 deletions(-)
Index: linux-2.6.16-updates/include/linux/ktime.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/ktime.h
+++ linux-2.6.16-updates/include/linux/ktime.h
@@ -64,9 +64,6 @@ typedef union {
#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)
-/* Define a ktime_t variable and initialize it to zero: */
-#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 }
-
/**
* ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
*
@@ -113,9 +110,6 @@ static inline ktime_t timeval_to_ktime(s
/* Map the ktime_t to timeval conversion to ns_to_timeval function */
#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64)
-/* Map the ktime_t to clock_t conversion to the inline in jiffies.h: */
-#define ktime_to_clock_t(kt) nsec_to_clock_t((kt).tv64)
-
/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
#define ktime_to_ns(kt) ((kt).tv64)
@@ -136,9 +130,6 @@ static inline ktime_t timeval_to_ktime(s
* tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC
*/
-/* Define a ktime_t variable and initialize it to zero: */
-#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 }
-
/* Set a ktime_t variable to a value in sec/nsec representation: */
static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
{
@@ -255,17 +246,6 @@ static inline struct timeval ktime_to_ti
}
/**
- * ktime_to_clock_t - convert a ktime_t variable to clock_t format
- * @kt: the ktime_t variable to convert
- *
- * Returns a clock_t variable with the converted value
- */
-static inline clock_t ktime_to_clock_t(const ktime_t kt)
-{
- return nsec_to_clock_t( (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec);
-}
-
-/**
* ktime_to_ns - convert a ktime_t variable to scalar nanoseconds
* @kt: the ktime_t variable to convert
*
--
^ permalink raw reply [flat|nested] 26+ messages in thread* [patch 8/8] Remove nsec_t typedef
2006-03-12 10:37 [patch 0/8] hrtimer updates Thomas Gleixner
` (6 preceding siblings ...)
2006-03-12 10:37 ` [patch 7/8] Remove DEFINE_KTIME and ktime_to_clock_t() Thomas Gleixner
@ 2006-03-12 10:37 ` Thomas Gleixner
7 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2006-03-12 10:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Roman Zippel
[-- Attachment #1: remove-nsec_t.patch --]
[-- Type: text/plain, Size: 4085 bytes --]
From: Roman Zippel <zippel@linux-m68k.org>
nsec_t predates ktime_t and has mostly been superseded by it. In the few
places that are left it's better to make it explicit that we're dealing
with 64 bit values here.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <johnstul@us.ibm.com>
include/linux/time.h | 18 ++++++------------
kernel/time.c | 4 ++--
2 files changed, 8 insertions(+), 14 deletions(-)
Index: linux-2.6.16-updates/include/linux/time.h
===================================================================
--- linux-2.6.16-updates.orig/include/linux/time.h
+++ linux-2.6.16-updates/include/linux/time.h
@@ -73,12 +73,6 @@ extern void set_normalized_timespec(stru
#define timespec_valid(ts) \
(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
-/*
- * 64-bit nanosec type. Large enough to span 292+ years in nanosecond
- * resolution. Ought to be enough for a while.
- */
-typedef s64 nsec_t;
-
extern struct timespec xtime;
extern struct timespec wall_to_monotonic;
extern seqlock_t xtime_lock;
@@ -113,9 +107,9 @@ extern struct timespec timespec_trunc(st
* Returns the scalar nanosecond representation of the timespec
* parameter.
*/
-static inline nsec_t timespec_to_ns(const struct timespec *ts)
+static inline s64 timespec_to_ns(const struct timespec *ts)
{
- return ((nsec_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+ return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
/**
@@ -125,9 +119,9 @@ static inline nsec_t timespec_to_ns(cons
* Returns the scalar nanosecond representation of the timeval
* parameter.
*/
-static inline nsec_t timeval_to_ns(const struct timeval *tv)
+static inline s64 timeval_to_ns(const struct timeval *tv)
{
- return ((nsec_t) tv->tv_sec * NSEC_PER_SEC) +
+ return ((s64) tv->tv_sec * NSEC_PER_SEC) +
tv->tv_usec * NSEC_PER_USEC;
}
@@ -137,7 +131,7 @@ static inline nsec_t timeval_to_ns(const
*
* Returns the timespec representation of the nsec parameter.
*/
-extern struct timespec ns_to_timespec(const nsec_t nsec);
+extern struct timespec ns_to_timespec(const s64 nsec);
/**
* ns_to_timeval - Convert nanoseconds to timeval
@@ -145,7 +139,7 @@ extern struct timespec ns_to_timespec(co
*
* Returns the timeval representation of the nsec parameter.
*/
-extern struct timeval ns_to_timeval(const nsec_t nsec);
+extern struct timeval ns_to_timeval(const s64 nsec);
#endif /* __KERNEL__ */
Index: linux-2.6.16-updates/kernel/time.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/time.c
+++ linux-2.6.16-updates/kernel/time.c
@@ -637,7 +637,7 @@ void set_normalized_timespec(struct time
*
* Returns the timespec representation of the nsec parameter.
*/
-struct timespec ns_to_timespec(const nsec_t nsec)
+struct timespec ns_to_timespec(const s64 nsec)
{
struct timespec ts;
@@ -657,7 +657,7 @@ struct timespec ns_to_timespec(const nse
*
* Returns the timeval representation of the nsec parameter.
*/
-struct timeval ns_to_timeval(const nsec_t nsec)
+struct timeval ns_to_timeval(const s64 nsec)
{
struct timespec ts = ns_to_timespec(nsec);
struct timeval tv;
Index: linux-2.6.16-updates/kernel/hrtimer.c
===================================================================
--- linux-2.6.16-updates.orig/kernel/hrtimer.c
+++ linux-2.6.16-updates/kernel/hrtimer.c
@@ -266,7 +266,7 @@ ktime_t ktime_add_ns(const ktime_t kt, u
/*
* Divide a ktime value by a nanosecond value
*/
-static unsigned long ktime_divns(const ktime_t kt, nsec_t div)
+static unsigned long ktime_divns(const ktime_t kt, s64 div)
{
u64 dclc, inc, dns;
int sft = 0;
@@ -322,7 +322,7 @@ hrtimer_forward(struct hrtimer *timer, k
interval.tv64 = timer->base->resolution.tv64;
if (unlikely(delta.tv64 >= interval.tv64)) {
- nsec_t incr = ktime_to_ns(interval);
+ s64 incr = ktime_to_ns(interval);
orun = ktime_divns(delta, incr);
timer->expires = ktime_add_ns(timer->expires, incr * orun);
--
^ permalink raw reply [flat|nested] 26+ messages in thread