* [PATCH] Migration of standard timers
@ 2006-09-14 13:29 Dimitri Sivanich
2006-09-14 14:11 ` Jes Sorensen
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Dimitri Sivanich @ 2006-09-14 13:29 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm, Andi Kleen
This patch allows the user to migrate currently queued
standard timers from one cpu to another, thereby reducing
timer induced latency on the chosen cpu. Timers that
were placed with add_timer_on() are considered to have
'cpu affinity' and are not moved.
The changes in drivers/base/cpu.c provide a clean and
convenient interface for triggering the migration through
sysfs, via writing the destination cpu number to a file
associated with the source cpu.
Note that migrating timers will not, by itself, keep new
timers off of the chosen cpu. But with careful control of
thread affinity, one can control the affinity of new timers
and keep timer induced latencies off of the chosen cpu.
This particular patch does not affect the hrtimers. That
could be addressed later.
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Index: linux/kernel/timer.c
===================================================================
--- linux.orig/kernel/timer.c
+++ linux/kernel/timer.c
@@ -147,6 +147,7 @@ void fastcall init_timer(struct timer_li
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->aff = 0;
}
EXPORT_SYMBOL(init_timer);
@@ -250,6 +251,7 @@ void add_timer_on(struct timer_list *tim
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
timer->base = base;
+ timer->aff = 1; /* Don't migrate */
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
}
@@ -1661,6 +1663,52 @@ static void __devinit migrate_timers(int
}
#endif /* CONFIG_HOTPLUG_CPU */
+static void move_timer_list(tvec_base_t *new_base, struct list_head *head)
+{
+ struct timer_list *timer, *t;
+
+ list_for_each_entry_safe(timer, t, head, entry) {
+ if (timer->aff)
+ continue;
+ detach_timer(timer, 0);
+ timer->base = new_base;
+ internal_add_timer(new_base, timer);
+ }
+}
+
+int move_timers(int cpu, int dest)
+{
+ tvec_base_t *old_base;
+ tvec_base_t *new_base;
+ unsigned long flags;
+ int i;
+
+ if (cpu == dest)
+ return -EINVAL;
+
+ if (!cpu_online(cpu) || !cpu_online(dest))
+ return -EINVAL;
+
+ old_base = per_cpu(tvec_bases, cpu);
+ new_base = per_cpu(tvec_bases, dest);
+
+ spin_lock_irqsave(&new_base->lock, flags);
+ spin_lock(&old_base->lock);
+
+ for (i = 0; i < TVR_SIZE; i++)
+ move_timer_list(new_base, old_base->tv1.vec + i);
+ for (i = 0; i < TVN_SIZE; i++) {
+ move_timer_list(new_base, old_base->tv2.vec + i);
+ move_timer_list(new_base, old_base->tv3.vec + i);
+ move_timer_list(new_base, old_base->tv4.vec + i);
+ move_timer_list(new_base, old_base->tv5.vec + i);
+ }
+
+ spin_unlock(&old_base->lock);
+ spin_unlock_irqrestore(&new_base->lock, flags);
+ return 0;
+}
+
static int __cpuinit timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
Index: linux/drivers/base/cpu.c
===================================================================
--- linux.orig/drivers/base/cpu.c
+++ linux/drivers/base/cpu.c
@@ -54,6 +54,26 @@ static ssize_t store_online(struct sys_d
}
static SYSDEV_ATTR(online, 0600, show_online, store_online);
+static ssize_t store_migrate(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned int cpu = dev->id;
+ unsigned long dest;
+ int rc;
+
+ dest = simple_strtoul(buf, NULL, 10);
+ if (dest > INT_MAX)
+ return -EINVAL;
+
+ rc = move_timers(cpu, dest);
+ if (rc < 0)
+ return rc;
+ else
+ return count;
+}
+
+static SYSDEV_ATTR(timer_migrate, 0200, NULL, store_migrate);
+
static void __devinit register_cpu_control(struct cpu *cpu)
{
sysdev_create_file(&cpu->sysdev, &attr_online);
@@ -62,6 +82,8 @@ void unregister_cpu(struct cpu *cpu)
{
int logical_cpu = cpu->sysdev.id;
+ sysdev_remove_file(&cpu->sysdev, &attr_timer_migrate);
+
unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
sysdev_remove_file(&cpu->sysdev, &attr_online);
@@ -124,6 +146,8 @@ int __devinit register_cpu(struct cpu *c
cpu_sys_devices[num] = &cpu->sysdev;
if (!error)
register_cpu_under_node(num, cpu_to_node(num));
+ if (!error)
+ sysdev_create_file(&cpu->sysdev, &attr_timer_migrate);
#ifdef CONFIG_KEXEC
if (!error)
Index: linux/include/linux/timer.h
===================================================================
--- linux.orig/include/linux/timer.h
+++ linux/include/linux/timer.h
@@ -15,6 +15,8 @@ struct timer_list {
unsigned long data;
struct tvec_t_base_s *base;
+
+ short aff;
};
extern struct tvec_t_base_s boot_tvec_bases;
@@ -24,6 +26,7 @@ extern struct tvec_t_base_s boot_tvec_ba
.expires = (_expires), \
.data = (_data), \
.base = &boot_tvec_bases, \
+ .aff = 0, \
}
#define DEFINE_TIMER(_name, _function, _expires, _data) \
@@ -95,6 +98,7 @@ static inline void add_timer(struct time
extern void init_timers(void);
extern void run_local_timers(void);
+extern int move_timers(int, int);
struct hrtimer;
extern int it_real_fn(struct hrtimer *);
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] Migration of standard timers
2006-09-14 13:29 [PATCH] Migration of standard timers Dimitri Sivanich
@ 2006-09-14 14:11 ` Jes Sorensen
2006-09-14 14:30 ` Dimitri Sivanich
2006-09-15 6:06 ` Andrew Morton
2006-09-15 16:39 ` Thomas Gleixner
2 siblings, 1 reply; 11+ messages in thread
From: Jes Sorensen @ 2006-09-14 14:11 UTC (permalink / raw)
To: Dimitri Sivanich; +Cc: linux-kernel, akpm, Andi Kleen
>>>>> "Dimitri" == Dimitri Sivanich <sivanich@sgi.com> writes:
Dimitri> This patch allows the user to migrate currently queued
Dimitri> standard timers from one cpu to another, thereby reducing
Dimitri> timer induced latency on the chosen cpu. Timers that were
Dimitri> placed with add_timer_on() are considered to have 'cpu
Dimitri> affinity' and are not moved.
Dimitri> The changes in drivers/base/cpu.c provide a clean and
Dimitri> convenient interface for triggering the migration through
Dimitri> sysfs, via writing the destination cpu number to a file
Dimitri> associated with the source cpu.
Hi Dimitri,
I just took a quick look at your patch, and at least on the surface it
looks pretty nice to me.
One minor nit, why choose short for the affinity field in struct
timer_list, it seems a strange size to pick for something which is
either 0 or 1. Wouldn't int or char be better? I don't know if all
CPUs have 16 bit stores, but they should have 8 and 32 bit.
The name 'aff' for affinity might not be good either, since we tend to
refer to affinity as a mask specifying where it's locked to, maybe
'locked' would be better?
All in the nit-picking department though.
Cheers,
Jes
Index: linux/include/linux/timer.h
===================================================================
--- linux.orig/include/linux/timer.h
+++ linux/include/linux/timer.h
@@ -15,6 +15,8 @@ struct timer_list {
unsigned long data;
struct tvec_t_base_s *base;
+
+ short aff;
};
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] Migration of standard timers
2006-09-14 14:11 ` Jes Sorensen
@ 2006-09-14 14:30 ` Dimitri Sivanich
0 siblings, 0 replies; 11+ messages in thread
From: Dimitri Sivanich @ 2006-09-14 14:30 UTC (permalink / raw)
To: Jes Sorensen; +Cc: linux-kernel, akpm, Andi Kleen
Hi Jes,
On Thu, Sep 14, 2006 at 10:11:39AM -0400, Jes Sorensen wrote:
> Hi Dimitri,
>
> I just took a quick look at your patch, and at least on the surface it
> looks pretty nice to me.
>
> One minor nit, why choose short for the affinity field in struct
> timer_list, it seems a strange size to pick for something which is
> either 0 or 1. Wouldn't int or char be better? I don't know if all
> CPUs have 16 bit stores, but they should have 8 and 32 bit.
Yes, you're probably right. I would have no problem with this being
changed to a 'char'.
>
> The name 'aff' for affinity might not be good either, since we tend to
> refer to affinity as a mask specifying where it's locked to, maybe
> 'locked' would be better?
A field name of 'locked' would be OK with me.
>
> All in the nit-picking department though.
>
> Cheers,
> Jes
>
>
> Index: linux/include/linux/timer.h
> ===================================================================
> --- linux.orig/include/linux/timer.h
> +++ linux/include/linux/timer.h
> @@ -15,6 +15,8 @@ struct timer_list {
> unsigned long data;
>
> struct tvec_t_base_s *base;
> +
> + short aff;
> };
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] Migration of standard timers
2006-09-14 13:29 [PATCH] Migration of standard timers Dimitri Sivanich
2006-09-14 14:11 ` Jes Sorensen
@ 2006-09-15 6:06 ` Andrew Morton
2006-09-15 16:39 ` Thomas Gleixner
2 siblings, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2006-09-15 6:06 UTC (permalink / raw)
To: Dimitri Sivanich; +Cc: linux-kernel, Andi Kleen
On Thu, 14 Sep 2006 08:29:17 -0500
Dimitri Sivanich <sivanich@sgi.com> wrote:
> This patch allows the user to migrate currently queued
> standard timers from one cpu to another, thereby reducing
> timer induced latency on the chosen cpu.
Need more details, please. Why would a user want to do that?
Performance-related, I assume? If so, some numbers would be nice.
What is the use-case?
Why was a sysfs file chosen as the user inteface?
What are the permissions on that sysfs file and why?
Does it need to be available to non-altix^H^H^H^H^HNUMA machines?
The code you have there is suspiciously similar to migrate_timers(). Do we
really need to duplicate it?
> Index: linux/kernel/timer.c
> ===================================================================
> --- linux.orig/kernel/timer.c
> +++ linux/kernel/timer.c
> @@ -147,6 +147,7 @@ void fastcall init_timer(struct timer_li
> {
> timer->entry.next = NULL;
> timer->base = __raw_get_cpu_var(tvec_bases);
> + timer->aff = 0;
As Jes mentioned: `aff' isn't a very clear identifier. Maybe is_bound_to_cpu?
> }
> EXPORT_SYMBOL(init_timer);
>
> @@ -250,6 +251,7 @@ void add_timer_on(struct timer_list *tim
> BUG_ON(timer_pending(timer) || !timer->function);
> spin_lock_irqsave(&base->lock, flags);
> timer->base = base;
> + timer->aff = 1; /* Don't migrate */
> internal_add_timer(base, timer);
> spin_unlock_irqrestore(&base->lock, flags);
> }
> @@ -1661,6 +1663,52 @@ static void __devinit migrate_timers(int
> }
> #endif /* CONFIG_HOTPLUG_CPU */
>
> +static void move_timer_list(tvec_base_t *new_base, struct list_head *head)
> +{
> + struct timer_list *timer, *t;
> +
> + list_for_each_entry_safe(timer, t, head, entry) {
> + if (timer->aff)
> + continue;
> + detach_timer(timer, 0);
> + timer->base = new_base;
> + internal_add_timer(new_base, timer);
> + }
> +}
> +
> +int move_timers(int cpu, int dest)
> +{
Again, unfortunate naming. What's 'dest'? Better would be `source_cpu'
and `dest_cpu', no?
> + tvec_base_t *old_base;
> + tvec_base_t *new_base;
> + unsigned long flags;
> + int i;
> +
> + if (cpu == dest)
> + return -EINVAL;
> +
> + if (!cpu_online(cpu) || !cpu_online(dest))
> + return -EINVAL;
Racy against CPU hotplug. Wrapping it all in preempt_disable() (with a
comment explaining why) would suffice.
> + old_base = per_cpu(tvec_bases, cpu);
> + new_base = per_cpu(tvec_bases, dest);
> + spin_lock_irqsave(&new_base->lock, flags);
> + spin_lock(&old_base->lock);
If one CPU does move_timers(0, 1) and another CPU does move_timers(1, 0) at
the same time, we have an AB/BA deadlock, don't we?
If so, fixes would include:
a) always take the lower-addressed-lock first or
b) wrap the whole operation inside a single global lock.
Either way, lockdep is likely to get upset about this and special
annotations and cursing might be needed.
> +
> + for (i = 0; i < TVR_SIZE; i++)
> + move_timer_list(new_base, old_base->tv1.vec + i);
> + for (i = 0; i < TVN_SIZE; i++) {
> + move_timer_list(new_base, old_base->tv2.vec + i);
> + move_timer_list(new_base, old_base->tv3.vec + i);
> + move_timer_list(new_base, old_base->tv4.vec + i);
> + move_timer_list(new_base, old_base->tv5.vec + i);
> + }
> +
> + spin_unlock(&old_base->lock);
> + spin_unlock_irqrestore(&new_base->lock, flags);
> + return 0;
> +}
> +
> static int __cpuinit timer_cpu_notify(struct notifier_block *self,
> unsigned long action, void *hcpu)
> {
> Index: linux/drivers/base/cpu.c
> ===================================================================
> --- linux.orig/drivers/base/cpu.c
> +++ linux/drivers/base/cpu.c
> @@ -54,6 +54,26 @@ static ssize_t store_online(struct sys_d
> }
> static SYSDEV_ATTR(online, 0600, show_online, store_online);
>
> +static ssize_t store_migrate(struct sys_device *dev, const char *buf,
> + size_t count)
A better name would be `store_timer_migrate'.
> +{
> + unsigned int cpu = dev->id;
> + unsigned long dest;
> + int rc;
> +
> + dest = simple_strtoul(buf, NULL, 10);
> + if (dest > INT_MAX)
> + return -EINVAL;
> +
> + rc = move_timers(cpu, dest);
> + if (rc < 0)
> + return rc;
> + else
> + return count;
> +}
> +
> +static SYSDEV_ATTR(timer_migrate, 0200, NULL, store_migrate);
I'm supposed to point you at Documentation/ABI/, sorry.
> ===================================================================
> --- linux.orig/include/linux/timer.h
> +++ linux/include/linux/timer.h
> @@ -15,6 +15,8 @@ struct timer_list {
> unsigned long data;
>
> struct tvec_t_base_s *base;
> +
> + short aff;
> };
>
> extern struct tvec_t_base_s boot_tvec_bases;
> @@ -24,6 +26,7 @@ extern struct tvec_t_base_s boot_tvec_ba
> .expires = (_expires), \
> .data = (_data), \
> .base = &boot_tvec_bases, \
> + .aff = 0, \
It's not really needed if it's zero.
> }
>
> #define DEFINE_TIMER(_name, _function, _expires, _data) \
> @@ -95,6 +98,7 @@ static inline void add_timer(struct time
>
> extern void init_timers(void);
> extern void run_local_timers(void);
> +extern int move_timers(int, int);
I think it's nice to include the identifiers here (source_cpu and
dest_cpu), as a little bit of documentation.
(And I think it's more idiomatic to put the destination arg first, although
we violate that in rather a lot of places).
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] Migration of standard timers
2006-09-14 13:29 [PATCH] Migration of standard timers Dimitri Sivanich
2006-09-14 14:11 ` Jes Sorensen
2006-09-15 6:06 ` Andrew Morton
@ 2006-09-15 16:39 ` Thomas Gleixner
2006-09-15 16:58 ` Dimitri Sivanich
2 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2006-09-15 16:39 UTC (permalink / raw)
To: Dimitri Sivanich; +Cc: linux-kernel, akpm, Andi Kleen
On Thu, 2006-09-14 at 08:29 -0500, Dimitri Sivanich wrote:
> This patch allows the user to migrate currently queued
> standard timers from one cpu to another, thereby reducing
> timer induced latency on the chosen cpu. Timers that
> were placed with add_timer_on() are considered to have
> 'cpu affinity' and are not moved.
>
> The changes in drivers/base/cpu.c provide a clean and
> convenient interface for triggering the migration through
> sysfs, via writing the destination cpu number to a file
> associated with the source cpu.
>
> Note that migrating timers will not, by itself, keep new
> timers off of the chosen cpu. But with careful control of
> thread affinity, one can control the affinity of new timers
> and keep timer induced latencies off of the chosen cpu.
>
> This particular patch does not affect the hrtimers. That
> could be addressed later.
Are you trying to work around the latencies caused by long running timer
callbacks ? I'm not convinced that this is not curing the symptoms
instead of the root cause.
tglx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] Migration of standard timers
2006-09-15 16:39 ` Thomas Gleixner
@ 2006-09-15 16:58 ` Dimitri Sivanich
0 siblings, 0 replies; 11+ messages in thread
From: Dimitri Sivanich @ 2006-09-15 16:58 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, akpm, Andi Kleen
On Fri, Sep 15, 2006 at 06:39:19PM +0200, Thomas Gleixner wrote:
> Are you trying to work around the latencies caused by long running timer
> callbacks ? I'm not convinced that this is not curing the symptoms
> instead of the root cause.
Yes, both latency from long running timer callbacks as well as
potential latency from a temporal grouping of timer callbacks
(those occuring on the same tick).
While I agree that root causes of the former should be addressed,
more latencies of this type can always easily creep in. Timer
migration works as a long term preventative aid, not just a fix
for the problem of the moment. And adding this needn't restrict
anyone from looking at the aforementioned root causes.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] Migration of Standard Timers
@ 2006-09-19 15:29 Dimitri Sivanich
2006-09-19 16:33 ` Lee Revell
2006-09-22 19:16 ` Andrew Morton
0 siblings, 2 replies; 11+ messages in thread
From: Dimitri Sivanich @ 2006-09-19 15:29 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm, Thomas Gleixner, Andi Kleen, Jes Sorensen
I'm reposting this with some suggested changes.
This patch allows the user to migrate currently queued
standard timers from one cpu to another. Migrating
timers off of select cpus allows those cpus to run
time critical threads with minimal timer induced latency
(which can reach 100's of usec for a single timer as shown
on an X86_64 test machine), thereby improving overall
determinance on those selected cpus.
This patch considers timers placed with add_timer_on()
to have 'cpu affinity' and does not move them, unless the
timers are being migrated off of a hotplug cpu that is
going down.
The changes in drivers/base/cpu.c provide a clean and
convenient interface for triggering the migration through
sysfs, via writing the destination cpu number to an owner
writeable file (0200 permissions) associated with the source
cpu. In additon, this functionality is available for kernel
module use.
Note that migrating timers will not, by itself, keep new
timers off of the chosen cpu. But with careful control of
thread affinity, one can control the affinity of new timers
and keep timer induced latencies off of the chosen cpu.
This particular patch does not affect the hrtimers. That
could be addressed later.
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Index: linux/kernel/timer.c
===================================================================
--- linux.orig/kernel/timer.c
+++ linux/kernel/timer.c
@@ -147,6 +147,7 @@ void fastcall init_timer(struct timer_li
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->is_bound_to_cpu = 0;
}
EXPORT_SYMBOL(init_timer);
@@ -250,6 +251,7 @@ void add_timer_on(struct timer_list *tim
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
timer->base = base;
+ timer->is_bound_to_cpu = 1; /* Don't migrate if cpu online */
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
}
@@ -1616,50 +1618,65 @@ static int __devinit init_timers_cpu(int
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head, int cpu_down)
{
- struct timer_list *timer;
+ struct timer_list *timer, *t;
- while (!list_empty(head)) {
- timer = list_entry(head->next, struct timer_list, entry);
+ list_for_each_entry_safe(timer, t, head, entry) {
+ if (!cpu_down && timer->is_bound_to_cpu)
+ continue;
detach_timer(timer, 0);
timer->base = new_base;
internal_add_timer(new_base, timer);
}
}
-static void __devinit migrate_timers(int cpu)
+int migrate_timers(int dest_cpu, int source_cpu, int cpu_down)
{
tvec_base_t *old_base;
tvec_base_t *new_base;
+ spinlock_t *lock1, *lock2;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(cpu));
- old_base = per_cpu(tvec_bases, cpu);
- new_base = get_cpu_var(tvec_bases);
-
- local_irq_disable();
- spin_lock(&new_base->lock);
- spin_lock(&old_base->lock);
+ if (source_cpu == dest_cpu)
+ return -EINVAL;
+
+ if (!cpu_online(dest_cpu))
+ return -EINVAL;
+
+ if (cpu_down)
+ BUG_ON(cpu_online(source_cpu));
+ else if (!cpu_online(source_cpu))
+ return -EINVAL;
+
+ old_base = per_cpu(tvec_bases, source_cpu);
+ new_base = per_cpu(tvec_bases, dest_cpu);
- BUG_ON(old_base->running_timer);
+ /* Order locking based on relative cpu number */
+ lock1 = dest_cpu > source_cpu ? &old_base->lock : &new_base->lock;
+ lock2 = dest_cpu > source_cpu ? &new_base->lock : &old_base->lock;
+
+ spin_lock_irqsave(lock1, flags);
+ spin_lock(lock2);
+
+ if (cpu_down)
+ BUG_ON(old_base->running_timer);
for (i = 0; i < TVR_SIZE; i++)
- migrate_timer_list(new_base, old_base->tv1.vec + i);
+ migrate_timer_list(new_base, old_base->tv1.vec + i, cpu_down);
for (i = 0; i < TVN_SIZE; i++) {
- migrate_timer_list(new_base, old_base->tv2.vec + i);
- migrate_timer_list(new_base, old_base->tv3.vec + i);
- migrate_timer_list(new_base, old_base->tv4.vec + i);
- migrate_timer_list(new_base, old_base->tv5.vec + i);
+ migrate_timer_list(new_base, old_base->tv2.vec + i, cpu_down);
+ migrate_timer_list(new_base, old_base->tv3.vec + i, cpu_down);
+ migrate_timer_list(new_base, old_base->tv4.vec + i, cpu_down);
+ migrate_timer_list(new_base, old_base->tv5.vec + i, cpu_down);
}
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
- local_irq_enable();
- put_cpu_var(tvec_bases);
+ spin_unlock(lock2);
+ spin_unlock_irqrestore(lock1, flags);
+ return 0;
}
-#endif /* CONFIG_HOTPLUG_CPU */
+EXPORT_SYMBOL_GPL(migrate_timers);
static int __cpuinit timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
@@ -1672,7 +1689,7 @@ static int __cpuinit timer_cpu_notify(st
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
- migrate_timers(cpu);
+ migrate_timers(smp_processor_id(), cpu, 1);
break;
#endif
default:
Index: linux/drivers/base/cpu.c
===================================================================
--- linux.orig/drivers/base/cpu.c
+++ linux/drivers/base/cpu.c
@@ -54,6 +54,25 @@ static ssize_t store_online(struct sys_d
}
static SYSDEV_ATTR(online, 0600, show_online, store_online);
+static ssize_t store_timer_migrate(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned int source_cpu = dev->id;
+ unsigned long dest_cpu;
+ int rc;
+
+ dest_cpu = simple_strtoul(buf, NULL, 10);
+ if (dest_cpu > INT_MAX)
+ return -EINVAL;
+
+ rc = migrate_timers(dest_cpu, source_cpu, 0);
+ if (rc < 0)
+ return rc;
+ else
+ return count;
+}
+static SYSDEV_ATTR(timer_migrate, 0200, NULL, store_timer_migrate);
+
static void __devinit register_cpu_control(struct cpu *cpu)
{
sysdev_create_file(&cpu->sysdev, &attr_online);
@@ -62,6 +81,8 @@ void unregister_cpu(struct cpu *cpu)
{
int logical_cpu = cpu->sysdev.id;
+ sysdev_remove_file(&cpu->sysdev, &attr_timer_migrate);
+
unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
sysdev_remove_file(&cpu->sysdev, &attr_online);
@@ -124,6 +145,8 @@ int __devinit register_cpu(struct cpu *c
cpu_sys_devices[num] = &cpu->sysdev;
if (!error)
register_cpu_under_node(num, cpu_to_node(num));
+ if (!error)
+ sysdev_create_file(&cpu->sysdev, &attr_timer_migrate);
#ifdef CONFIG_KEXEC
if (!error)
Index: linux/include/linux/timer.h
===================================================================
--- linux.orig/include/linux/timer.h
+++ linux/include/linux/timer.h
@@ -15,6 +15,8 @@ struct timer_list {
unsigned long data;
struct tvec_t_base_s *base;
+
+ char is_bound_to_cpu;
};
extern struct tvec_t_base_s boot_tvec_bases;
@@ -24,6 +26,7 @@ extern struct tvec_t_base_s boot_tvec_ba
.expires = (_expires), \
.data = (_data), \
.base = &boot_tvec_bases, \
+ .is_bound_to_cpu = 0, \
}
#define DEFINE_TIMER(_name, _function, _expires, _data) \
@@ -95,6 +98,7 @@ static inline void add_timer(struct time
extern void init_timers(void);
extern void run_local_timers(void);
+extern int migrate_timers(int dest_cpu, int source_cpu, int cpu_down);
struct hrtimer;
extern int it_real_fn(struct hrtimer *);
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] Migration of Standard Timers
2006-09-19 15:29 [PATCH] Migration of Standard Timers Dimitri Sivanich
@ 2006-09-19 16:33 ` Lee Revell
2006-09-19 16:41 ` Dimitri Sivanich
2006-09-22 19:16 ` Andrew Morton
1 sibling, 1 reply; 11+ messages in thread
From: Lee Revell @ 2006-09-19 16:33 UTC (permalink / raw)
To: Dimitri Sivanich
Cc: linux-kernel, akpm, Thomas Gleixner, Andi Kleen, Jes Sorensen
On Tue, 2006-09-19 at 10:29 -0500, Dimitri Sivanich wrote:
> This patch allows the user to migrate currently queued
> standard timers from one cpu to another. Migrating
> timers off of select cpus allows those cpus to run
> time critical threads with minimal timer induced latency
> (which can reach 100's of usec for a single timer as shown
> on an X86_64 test machine), thereby improving overall
> determinance on those selected cpus.
Which driver or subsystem is doing 100s of usecs of work in a timer?
Shouldn't another mechanism like a workqueue be used instead?
Lee
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] Migration of Standard Timers
2006-09-19 16:33 ` Lee Revell
@ 2006-09-19 16:41 ` Dimitri Sivanich
2006-09-19 16:57 ` Lee Revell
0 siblings, 1 reply; 11+ messages in thread
From: Dimitri Sivanich @ 2006-09-19 16:41 UTC (permalink / raw)
To: Lee Revell; +Cc: linux-kernel, akpm, Thomas Gleixner, Andi Kleen, Jes Sorensen
On Tue, Sep 19, 2006 at 12:33:37PM -0400, Lee Revell wrote:
> Which driver or subsystem is doing 100s of usecs of work in a timer?
The longest one I've captured so far results from:
rsp rip Function (args)
======================= <nmi>
0xffff810257822fd8 0xffffffff803a0e94 rt_check_expire+0x8c
======================= <interrupt>
0xffff81025781fee8 0xffffffff803a0e08 rt_check_expire
0xffff81025781ff08 0xffffffff802386b3 run_timer_softirq+0x133
0xffff81025781ff38 0xffffffff80235262 __do_softirq+0x5e
0xffff81025781ff68 0xffffffff8020a958 call_softirq+0x1c
0xffff81025781ff80 0xffffffff8020bea7 do_softirq+0x2c
0xffff81025781ff90 0xffffffff80235142 irq_exit+0x48
> Shouldn't another mechanism like a workqueue be used instead?
Not quite sure what you're asking here.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] Migration of Standard Timers
2006-09-19 16:41 ` Dimitri Sivanich
@ 2006-09-19 16:57 ` Lee Revell
0 siblings, 0 replies; 11+ messages in thread
From: Lee Revell @ 2006-09-19 16:57 UTC (permalink / raw)
To: Dimitri Sivanich
Cc: linux-kernel, akpm, Thomas Gleixner, Andi Kleen, Jes Sorensen,
Eric Dumazet
On Tue, 2006-09-19 at 11:41 -0500, Dimitri Sivanich wrote:
> On Tue, Sep 19, 2006 at 12:33:37PM -0400, Lee Revell wrote:
> > Which driver or subsystem is doing 100s of usecs of work in a timer?
>
> The longest one I've captured so far results from:
>
> rsp rip Function (args)
> ======================= <nmi>
> 0xffff810257822fd8 0xffffffff803a0e94 rt_check_expire+0x8c
> ======================= <interrupt>
> 0xffff81025781fee8 0xffffffff803a0e08 rt_check_expire
> 0xffff81025781ff08 0xffffffff802386b3 run_timer_softirq+0x133
> 0xffff81025781ff38 0xffffffff80235262 __do_softirq+0x5e
> 0xffff81025781ff68 0xffffffff8020a958 call_softirq+0x1c
> 0xffff81025781ff80 0xffffffff8020bea7 do_softirq+0x2c
> 0xffff81025781ff90 0xffffffff80235142 irq_exit+0x48
>
Ah, I remember that one. Eric Dumazet had some suggestions to fix it
6-12 months ago which never went anywhere - the thread was called "RCU
latency regression in 2.6.16-rc1".
That one is especially annoying as there's no workaround (shrinking the
route cache or reducing the GC interval via net.ipv4.route.* sysctls has
no effect)
Lee
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] Migration of Standard Timers
2006-09-19 15:29 [PATCH] Migration of Standard Timers Dimitri Sivanich
2006-09-19 16:33 ` Lee Revell
@ 2006-09-22 19:16 ` Andrew Morton
1 sibling, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2006-09-22 19:16 UTC (permalink / raw)
To: Dimitri Sivanich; +Cc: linux-kernel, Thomas Gleixner, Andi Kleen, Jes Sorensen
On Tue, 19 Sep 2006 10:29:42 -0500
Dimitri Sivanich <sivanich@sgi.com> wrote:
> This patch allows the user to migrate currently queued
> standard timers from one cpu to another. Migrating
> timers off of select cpus allows those cpus to run
> time critical threads with minimal timer induced latency
> (which can reach 100's of usec for a single timer as shown
> on an X86_64 test machine), thereby improving overall
> determinance on those selected cpus.
>
> This patch considers timers placed with add_timer_on()
> to have 'cpu affinity' and does not move them, unless the
> timers are being migrated off of a hotplug cpu that is
> going down.
>
> The changes in drivers/base/cpu.c provide a clean and
> convenient interface for triggering the migration through
> sysfs, via writing the destination cpu number to an owner
> writeable file (0200 permissions) associated with the source
> cpu. In additon, this functionality is available for kernel
> module use.
>
> Note that migrating timers will not, by itself, keep new
> timers off of the chosen cpu. But with careful control of
> thread affinity, one can control the affinity of new timers
> and keep timer induced latencies off of the chosen cpu.
>
> This particular patch does not affect the hrtimers. That
> could be addressed later.
I can't say I like this, sorry.
- It adds another word to the timer_list structure for a very obscure
application. And a lot of kernel data structures aggregate timer_lists.
- There are places in the kernel which assume that once a timer is added
on a CPU, it will stay there. The timer handler re-arms the timer,
confident in the knowledge that everything stays on this CPU.
I recall working on such code a couple of years ago, but I now forget where
it was.
It is reasonable to do add_timer() within the CPU_ONLINE handler (for
example), in the expectation that that timer will fire on this CPU.
The proposed change permits the administrator to break that assumption.
We would need to hunt down any code which makes that assumption and
convert it to add_timer_on(). And we'd need to be very vigilant in the
future, since people could easily add new code which had the old
assumption which worked just fine for them in testing.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2006-09-22 19:17 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-14 13:29 [PATCH] Migration of standard timers Dimitri Sivanich
2006-09-14 14:11 ` Jes Sorensen
2006-09-14 14:30 ` Dimitri Sivanich
2006-09-15 6:06 ` Andrew Morton
2006-09-15 16:39 ` Thomas Gleixner
2006-09-15 16:58 ` Dimitri Sivanich
-- strict thread matches above, loose matches on Subject: below --
2006-09-19 15:29 [PATCH] Migration of Standard Timers Dimitri Sivanich
2006-09-19 16:33 ` Lee Revell
2006-09-19 16:41 ` Dimitri Sivanich
2006-09-19 16:57 ` Lee Revell
2006-09-22 19:16 ` Andrew Morton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox