* [RFC][PATCH 0/3] update to cpupri algorithm
@ 2011-07-29 15:13 Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 1/3] cpupri: Add profiling Steven Rostedt
` (5 more replies)
0 siblings, 6 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-07-29 15:13 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
Hi Mike,
Could you try this patch set out. Add the first patch and then
run your tests. The first patch only adds benchmarking, and does not
modify the scheduler algorithm.
Do this:
1. apply first patch, build and boot
2. # mount -t debugfs nodev /sys/kernel/debug
3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
The output will give you the contention of the vector locks in the
cpupri algorithm.
Then apply the second patch and do the same thing.
Then apply the third patch and do the same thing.
After that, could you send me the results of the output file for all
three runs? The final patch should probably be the best overall
results.
Thanks!
-- Steve
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC][PATCH 1/3] cpupri: Add profiling
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
@ 2011-07-29 15:13 ` Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 2/3] cpupri: Remove vector locks and read entire loop Steven Rostedt
` (4 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-07-29 15:13 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
[-- Attachment #1: cpupri-benchmark.patch --]
[-- Type: text/plain, Size: 6377 bytes --]
Index: linux-rt.git/kernel/sched_cpupri.c
===================================================================
--- linux-rt.git.orig/kernel/sched_cpupri.c
+++ linux-rt.git/kernel/sched_cpupri.c
@@ -28,8 +28,83 @@
*/
#include <linux/gfp.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include "sched_cpupri.h"
+static DEFINE_PER_CPU(unsigned long long, cpupri_loop_start);
+static DEFINE_PER_CPU(unsigned long long, cpupri_loop_max);
+static DEFINE_PER_CPU(unsigned long long, cpupri_loop_min);
+static DEFINE_PER_CPU(unsigned long long, cpupri_loop_total);
+static DEFINE_PER_CPU(atomic_t, cpupri_loop_count);
+
+static DEFINE_PER_CPU(unsigned long long, cpupri_vec_start);
+static DEFINE_PER_CPU(unsigned long long, cpupri_vec_max);
+static DEFINE_PER_CPU(unsigned long long, cpupri_vec_min);
+static DEFINE_PER_CPU(unsigned long long, cpupri_vec_total);
+static DEFINE_PER_CPU(atomic_t, cpupri_vec_count);
+
+static inline void __cpupri_start(unsigned long long *start)
+{
+ *start = sched_clock();
+}
+
+static inline void __cpupri_stop(unsigned long long *start,
+ unsigned long long *max,
+ unsigned long long *min,
+ unsigned long long *total,
+ atomic_t *count)
+{
+ unsigned long long end, delta;
+
+ end = sched_clock();
+ delta = end - *start;
+
+ /* Init for the first run */
+ if (atomic_inc_return(count) == 1) {
+ *max = delta;
+ *min = delta;
+ *total = delta;
+ return;
+ }
+
+ if (delta > *max)
+ *max = delta;
+
+ if (delta < *min)
+ *min = delta;
+
+ (*total) += delta;
+}
+
+static inline void cpupri_start_loop(void)
+{
+ __cpupri_start(&__get_cpu_var(cpupri_loop_start));
+}
+
+static inline void cpupri_stop_loop(void)
+{
+ __cpupri_stop(&__get_cpu_var(cpupri_loop_start),
+ &__get_cpu_var(cpupri_loop_max),
+ &__get_cpu_var(cpupri_loop_min),
+ &__get_cpu_var(cpupri_loop_total),
+ &__get_cpu_var(cpupri_loop_count));
+}
+
+static inline void cpupri_start_vec(void)
+{
+ __cpupri_start(&__get_cpu_var(cpupri_vec_start));
+}
+
+static inline void cpupri_stop_vec(void)
+{
+ __cpupri_stop(&__get_cpu_var(cpupri_vec_start),
+ &__get_cpu_var(cpupri_vec_max),
+ &__get_cpu_var(cpupri_vec_min),
+ &__get_cpu_var(cpupri_vec_total),
+ &__get_cpu_var(cpupri_vec_count));
+}
+
/* Convert between a 140 based task->prio, and our 102 based cpupri */
static int convert_prio(int prio)
{
@@ -71,6 +146,7 @@ int cpupri_find(struct cpupri *cp, struc
int idx = 0;
int task_pri = convert_prio(p->prio);
+ cpupri_start_loop();
for_each_cpupri_active(cp->pri_active, idx) {
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
@@ -95,8 +171,10 @@ int cpupri_find(struct cpupri *cp, struc
continue;
}
+ cpupri_stop_loop();
return 1;
}
+ cpupri_stop_loop();
return 0;
}
@@ -124,6 +202,7 @@ void cpupri_set(struct cpupri *cp, int c
if (newpri == oldpri)
return;
+ cpupri_start_vec();
/*
* If the cpu was currently mapped to a different value, we
* need to map it to the new value then remove the old value.
@@ -155,6 +234,7 @@ void cpupri_set(struct cpupri *cp, int c
raw_spin_unlock_irqrestore(&vec->lock, flags);
}
+ cpupri_stop_vec();
*currpri = newpri;
}
@@ -202,3 +282,146 @@ void cpupri_cleanup(struct cpupri *cp)
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
free_cpumask_var(cp->pri_to_cpu[i].mask);
}
+
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ /* Add one, because 0 abort seq_file */
+ return (void *)(*pos)+1;
+}
+
+static void *t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return (void *)(*pos)+1;
+}
+
+static void norm(struct seq_file *m, unsigned long long val)
+{
+ unsigned long rem;
+
+ rem = do_div(val, 1000);
+
+ seq_printf(m, "\t%lld.%03ld", val, rem);
+}
+
+static int _t_show(struct seq_file *m, const char *name,
+ unsigned long long max,
+ unsigned long long min,
+ unsigned long long total,
+ unsigned long count)
+{
+ unsigned long long avg = total;
+
+ seq_printf(m, "\t%s\t%ld", name, count);
+
+ if (!count) {
+ seq_printf(m, "\t0\t0\t0\t0\n");
+ return 0;
+ }
+
+ do_div(avg, count);
+ norm(m, max);
+ norm(m, min);
+ norm(m, avg);
+ norm(m, total);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ unsigned long cpu = ((unsigned long)v) - 1;
+
+ if (!cpu) {
+ seq_printf(m, "CPU:\tName\tCount\tMax\tMin\tAverage\tTotal\n");
+ seq_printf(m, "----\t----\t-----\t---\t---\t-------\t-----\n");
+ }
+
+ seq_printf(m, "cpu %ld:", cpu);
+
+ _t_show(m, "loop", per_cpu(cpupri_loop_max, cpu),
+ per_cpu(cpupri_loop_min, cpu),
+ per_cpu(cpupri_loop_total, cpu),
+ atomic_read(&per_cpu(cpupri_loop_count, cpu)));
+
+ _t_show(m, "vec", per_cpu(cpupri_vec_max, cpu),
+ per_cpu(cpupri_vec_min, cpu),
+ per_cpu(cpupri_vec_total, cpu),
+ atomic_read(&per_cpu(cpupri_vec_count, cpu)));
+ return 0;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations cpupri_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int cpupri_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+ int cpu;
+
+ if (file->f_mode & FMODE_WRITE) {
+ for_each_online_cpu(cpu) {
+ atomic_set(&per_cpu(cpupri_loop_count, cpu), 0);
+ atomic_set(&per_cpu(cpupri_vec_count, cpu), 0);
+ }
+ }
+ if (file->f_mode & FMODE_READ)
+ ret = seq_open(file, &cpupri_seq_ops);
+
+ return ret;
+}
+
+static ssize_t
+cpupri_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return cnt;
+}
+
+
+static int cpupri_release(struct inode *inode, struct file *file)
+{
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+ return 0;
+}
+
+static const struct file_operations cpupri_fops = {
+ .open = cpupri_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = cpupri_release,
+ .write = cpupri_write,
+};
+
+static __init int debug_cpupri_init(void)
+{
+ struct dentry *dentry;
+
+ if (!debugfs_initialized())
+ return -1;
+
+ dentry = debugfs_create_file("cpupri", 0644, NULL, NULL, &cpupri_fops);
+ if (!dentry)
+ pr_warning("Could not create debugfs cpupri entry\n");
+ return 0;
+}
+
+fs_initcall(debug_cpupri_init);
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC][PATCH 2/3] cpupri: Remove vector locks and read entire loop
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 1/3] cpupri: Add profiling Steven Rostedt
@ 2011-07-29 15:13 ` Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 3/3] cpupri: Add atomic vector count to speed up loop Steven Rostedt
` (3 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-07-29 15:13 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
[-- Attachment #1: sched-cpupri-nolock.patch --]
[-- Type: text/plain, Size: 2601 bytes --]
Index: linux-rt.git/kernel/sched_cpupri.c
===================================================================
--- linux-rt.git.orig/kernel/sched_cpupri.c
+++ linux-rt.git/kernel/sched_cpupri.c
@@ -122,8 +122,8 @@ static int convert_prio(int prio)
return cpupri;
}
-#define for_each_cpupri_active(array, idx) \
- for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
+#define for_each_cpupri_active(array, idx) \
+ for(idx = 0; (idx) < MAX_RT_PRIO; (idx)++)
/**
* cpupri_find - find the best (lowest-pri) CPU in the system
@@ -153,6 +153,13 @@ int cpupri_find(struct cpupri *cp, struc
if (idx >= task_pri)
break;
+ /*
+ * When a mask is updated, the new prio is set before
+ * the old prio is cleared. This makes sure that we
+ * don't miss this run queue.
+ */
+ smp_rmb();
+
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
continue;
@@ -193,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int c
{
int *currpri = &cp->cpu_to_pri[cpu];
int oldpri = *currpri;
- unsigned long flags;
newpri = convert_prio(newpri);
@@ -213,26 +219,17 @@ void cpupri_set(struct cpupri *cp, int c
if (likely(newpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
- raw_spin_lock_irqsave(&vec->lock, flags);
-
cpumask_set_cpu(cpu, vec->mask);
- vec->count++;
- if (vec->count == 1)
- set_bit(newpri, cp->pri_active);
-
- raw_spin_unlock_irqrestore(&vec->lock, flags);
}
+ /*
+ * Set the new prio before clearing the old prio so we
+ * don't miss this run queue during the loop.
+ */
+ smp_wmb();
if (likely(oldpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
- raw_spin_lock_irqsave(&vec->lock, flags);
-
- vec->count--;
- if (!vec->count)
- clear_bit(oldpri, cp->pri_active);
cpumask_clear_cpu(cpu, vec->mask);
-
- raw_spin_unlock_irqrestore(&vec->lock, flags);
}
cpupri_stop_vec();
@@ -255,7 +252,6 @@ int cpupri_init(struct cpupri *cp)
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
- raw_spin_lock_init(&vec->lock);
vec->count = 0;
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
Index: linux-rt.git/kernel/sched_cpupri.h
===================================================================
--- linux-rt.git.orig/kernel/sched_cpupri.h
+++ linux-rt.git/kernel/sched_cpupri.h
@@ -12,7 +12,6 @@
/* values 2-101 are RT priorities 0-99 */
struct cpupri_vec {
- raw_spinlock_t lock;
int count;
cpumask_var_t mask;
};
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC][PATCH 3/3] cpupri: Add atomic vector count to speed up loop
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 1/3] cpupri: Add profiling Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 2/3] cpupri: Remove vector locks and read entire loop Steven Rostedt
@ 2011-07-29 15:13 ` Steven Rostedt
2011-07-29 15:38 ` [RFC][PATCH 0/3] update to cpupri algorithm Mike Galbraith
` (2 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-07-29 15:13 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
[-- Attachment #1: sched-cpupri-atomic.patch --]
[-- Type: text/plain, Size: 3395 bytes --]
Index: linux-rt.git/kernel/sched_cpupri.h
===================================================================
--- linux-rt.git.orig/kernel/sched_cpupri.h
+++ linux-rt.git/kernel/sched_cpupri.h
@@ -12,8 +12,8 @@
/* values 2-101 are RT priorities 0-99 */
struct cpupri_vec {
- int count;
- cpumask_var_t mask;
+ atomic_t count;
+ cpumask_var_t mask;
};
struct cpupri {
Index: linux-rt.git/kernel/sched_cpupri.c
===================================================================
--- linux-rt.git.orig/kernel/sched_cpupri.c
+++ linux-rt.git/kernel/sched_cpupri.c
@@ -122,9 +122,6 @@ static int convert_prio(int prio)
return cpupri;
}
-#define for_each_cpupri_active(array, idx) \
- for(idx = 0; (idx) < MAX_RT_PRIO; (idx)++)
-
/**
* cpupri_find - find the best (lowest-pri) CPU in the system
* @cp: The cpupri context
@@ -147,16 +144,31 @@ int cpupri_find(struct cpupri *cp, struc
int task_pri = convert_prio(p->prio);
cpupri_start_loop();
- for_each_cpupri_active(cp->pri_active, idx) {
- struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
+ if (task_pri >= MAX_RT_PRIO)
+ return 0;
- if (idx >= task_pri)
- break;
+ for (idx = 0; idx < task_pri; idx++) {
+ struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
+ if (!atomic_read(&(vec)->count))
+ continue;
/*
- * When a mask is updated, the new prio is set before
- * the old prio is cleared. This makes sure that we
- * don't miss this run queue.
+ * When looking at the vector, we need to read the counter,
+ * do a memory barrier, then read the mask.
+ *
+ * Note: This is still all racey, but we can deal with it.
+ * Ideally, we only want to look at masks that are set.
+ *
+ * If a mask is not set, then the only thing wrong is that we
+ * did a little more work than necessary.
+ *
+ * If we read a zero count but the mask is set, because of the
+ * memory barriers, that can only happen when the highest prio
+ * task for a run queue has left the run queue, in which case,
+ * it will be followed by a pull. If the task we are processing
+ * fails to find a proper place to go, that pull request will
+ * pull this task if the run queue is running at a lower
+ * priority.
*/
smp_rmb();
@@ -220,15 +232,23 @@ void cpupri_set(struct cpupri *cp, int c
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
cpumask_set_cpu(cpu, vec->mask);
+ /*
+ * When adding a new vector, we update the mask first,
+ * do a write memory barrier, and then update the count, to
+ * make sure the vector is visible when count is set.
+ */
+ smp_wmb();
+ atomic_inc(&(vec)->count);
}
- /*
- * Set the new prio before clearing the old prio so we
- * don't miss this run queue during the loop.
- */
- smp_wmb();
if (likely(oldpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
+ /*
+ * When removing from the vector, we decrement the counter first
+ * do a memory barrier and then clear the mask.
+ */
+ atomic_dec(&(vec)->count);
+ smp_wmb();
cpumask_clear_cpu(cpu, vec->mask);
}
cpupri_stop_vec();
@@ -252,7 +272,7 @@ int cpupri_init(struct cpupri *cp)
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
- vec->count = 0;
+ atomic_set(&vec->count, 0);
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
}
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
` (2 preceding siblings ...)
2011-07-29 15:13 ` [RFC][PATCH 3/3] cpupri: Add atomic vector count to speed up loop Steven Rostedt
@ 2011-07-29 15:38 ` Mike Galbraith
2011-07-29 18:24 ` Mike Galbraith
2011-07-30 9:19 ` Mike Galbraith
5 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2011-07-29 15:38 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> Hi Mike,
>
> Could you try this patch set out. Add the first patch and then
> run your tests. The first patch only adds benchmarking, and does not
> modify the scheduler algorithm.
I will happily do all of the below.
> Do this:
>
> 1. apply first patch, build and boot
> 2. # mount -t debugfs nodev /sys/kernel/debug
> 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
>
> The output will give you the contention of the vector locks in the
> cpupri algorithm.
>
> Then apply the second patch and do the same thing.
>
> Then apply the third patch and do the same thing.
>
> After that, could you send me the results of the output file for all
> three runs? The final patch should probably be the best overall
> results.
>
> Thanks!
Thank you.
-Mike
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
` (3 preceding siblings ...)
2011-07-29 15:38 ` [RFC][PATCH 0/3] update to cpupri algorithm Mike Galbraith
@ 2011-07-29 18:24 ` Mike Galbraith
2011-07-30 7:12 ` Mike Galbraith
2011-07-30 9:19 ` Mike Galbraith
5 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2011-07-29 18:24 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
[-- Attachment #1: Type: text/plain, Size: 1879 bytes --]
On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> Hi Mike,
>
> Could you try this patch set out. Add the first patch and then
> run your tests. The first patch only adds benchmarking, and does not
> modify the scheduler algorithm.
>
> Do this:
>
> 1. apply first patch, build and boot
> 2. # mount -t debugfs nodev /sys/kernel/debug
> 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
>
> The output will give you the contention of the vector locks in the
> cpupri algorithm.
>
> Then apply the second patch and do the same thing.
>
> Then apply the third patch and do the same thing.
>
> After that, could you send me the results of the output file for all
> three runs? The final patch should probably be the best overall
> results.
Wow.
CPU: Name Count Max Min Average Total
---- ---- ----- --- --- ------- -----
cpu 60: loop 0 0 0 0 0
vec 5410840 277.954 0.084 0.782 4232895.727
cpu 61: loop 0 0 0 0 0
vec 4915648 188.399 0.084 0.570 2803220.301
cpu 62: loop 0 0 0 0 0
vec 5356076 276.417 0.085 0.786 4214544.548
cpu 63: loop 0 0 0 0 0
vec 4891837 170.531 0.085 0.799 3910948.833
vs
cpu 60: loop 0 0 0 0 0
vec 5365118 5.080 0.021 0.063 340490.267
cpu 61: loop 0 0 0 0 0
vec 4898590 1.757 0.019 0.071 347903.615
cpu 62: loop 0 0 0 0 0
vec 5737130 3.067 0.021 0.119 687108.734
cpu 63: loop 0 0 0 0 0
vec 4903228 1.822 0.021 0.071 348506.477
> Thanks!
Thank you. Now to build 2+3, and see how jitter looks with a full box.
-Mike
[-- Attachment #2: output1 --]
[-- Type: text/plain, Size: 4419 bytes --]
CPU: Name Count Max Min Average Total
---- ---- ----- --- --- ------- -----
cpu 0: loop 6 0.297 0.171 0.231 1.390
vec 1216234 1.026 0.086 0.245 298106.414
cpu 1: loop 0 0 0 0 0
vec 1208702 1.001 0.084 0.247 299580.984
cpu 2: loop 9 0.136 0.072 0.110 0.991
vec 1207543 2.848 0.084 0.172 208588.583
cpu 3: loop 0 0 0 0 0
vec 1207745 5.459 0.084 0.171 207572.796
cpu 4: loop 0 0 0 0 0
vec 1207645 0.930 0.084 0.169 204822.965
cpu 5: loop 0 0 0 0 0
vec 1207636 0.748 0.086 0.172 208310.916
cpu 6: loop 0 0 0 0 0
vec 1207636 0.644 0.085 0.168 203043.884
cpu 7: loop 0 0 0 0 0
vec 1207656 0.694 0.086 0.169 205146.436
cpu 8: loop 0 0 0 0 0
vec 6365438 72.036 0.086 0.948 6035188.484
cpu 9: loop 0 0 0 0 0
vec 5004456 23.303 0.086 0.629 3148822.497
cpu 10: loop 0 0 0 0 0
vec 5365920 84.892 0.085 0.856 4595202.057
cpu 11: loop 0 0 0 0 0
vec 4891949 26.536 0.085 0.715 3500040.991
cpu 12: loop 0 0 0 0 0
vec 5907752 93.405 0.083 0.929 5490015.474
cpu 13: loop 0 0 0 0 0
vec 4899184 87.840 0.084 0.794 3892404.964
cpu 14: loop 0 0 0 0 0
vec 5650357 87.713 0.084 0.817 4617404.711
cpu 15: loop 0 0 0 0 0
vec 4900028 101.476 0.085 0.731 3583363.670
cpu 16: loop 0 0 0 0 0
vec 6435043 143.890 0.084 0.821 5286236.674
cpu 17: loop 0 0 0 0 0
vec 5147105 261.153 0.085 0.854 4399617.996
cpu 18: loop 0 0 0 0 0
vec 5372953 170.471 0.084 0.907 4873847.662
cpu 19: loop 0 0 0 0 0
vec 5852034 109.101 0.084 0.598 3499834.934
cpu 20: loop 0 0 0 0 0
vec 5361700 263.053 0.084 0.618 3315747.110
cpu 21: loop 0 0 0 0 0
vec 5992335 104.342 0.084 0.663 3975064.702
cpu 22: loop 0 0 0 0 0
vec 5359525 132.146 0.084 0.756 4053313.648
cpu 23: loop 0 0 0 0 0
vec 5140867 131.715 0.084 0.914 4699033.422
cpu 24: loop 0 0 0 0 0
vec 5357723 151.214 0.084 0.991 5309786.543
cpu 25: loop 0 0 0 0 0
vec 5334728 120.224 0.085 0.707 3773747.739
cpu 26: loop 0 0 0 0 0
vec 5380285 123.677 0.085 0.990 5327517.609
cpu 27: loop 0 0 0 0 0
vec 5397056 121.749 0.084 0.818 4419395.819
cpu 28: loop 0 0 0 0 0
vec 5360682 260.914 0.084 0.707 3792383.788
cpu 29: loop 0 0 0 0 0
vec 4894980 185.310 0.085 0.959 4698950.783
cpu 30: loop 0 0 0 0 0
vec 5359452 170.017 0.085 0.783 4198239.431
cpu 31: loop 0 0 0 0 0
vec 4895734 110.157 0.084 1.015 4973835.751
cpu 32: loop 0 0 0 0 0
vec 5369681 121.182 0.085 1.056 5673959.285
cpu 33: loop 0 0 0 0 0
vec 4895350 176.805 0.085 0.849 4156392.392
cpu 34: loop 0 0 0 0 0
vec 5359142 203.700 0.086 0.899 4822371.587
cpu 35: loop 0 0 0 0 0
vec 4896565 208.669 0.084 0.987 4835912.225
cpu 36: loop 0 0 0 0 0
vec 5356818 209.414 0.085 0.883 4734419.876
cpu 37: loop 0 0 0 0 0
vec 5564435 279.977 0.084 0.776 4323291.950
cpu 38: loop 0 0 0 0 0
vec 5914929 213.197 0.086 0.980 5800294.365
cpu 39: loop 0 0 0 0 0
vec 5747543 278.316 0.085 0.829 4766005.372
cpu 40: loop 0 0 0 0 0
vec 5358126 286.313 0.084 0.906 4856216.257
cpu 41: loop 0 0 0 0 0
vec 4896543 282.441 0.085 0.961 4707080.279
cpu 42: loop 0 0 0 0 0
vec 5357430 265.380 0.085 0.750 4021311.594
cpu 43: loop 0 0 0 0 0
vec 4897014 267.822 0.084 0.846 4147465.000
cpu 44: loop 0 0 0 0 0
vec 5358063 243.259 0.084 0.688 3688890.283
cpu 45: loop 0 0 0 0 0
vec 4896910 206.264 0.084 0.756 3705720.419
cpu 46: loop 0 0 0 0 0
vec 6040919 220.062 0.086 0.852 5147378.527
cpu 47: loop 0 0 0 0 0
vec 4894549 185.927 0.086 1.081 5295612.421
cpu 48: loop 0 0 0 0 0
vec 5477229 148.390 0.085 1.006 5514794.941
cpu 49: loop 0 0 0 0 0
vec 4918826 196.105 0.085 0.789 3881630.724
cpu 50: loop 0 0 0 0 0
vec 5539279 178.413 0.084 0.831 4605915.301
cpu 51: loop 0 0 0 0 0
vec 4896003 199.641 0.085 0.926 4537025.049
cpu 52: loop 0 0 0 0 0
vec 5392644 232.768 0.086 0.790 4262858.944
cpu 53: loop 0 0 0 0 0
vec 4906013 203.663 0.085 0.907 4454027.337
cpu 54: loop 0 0 0 0 0
vec 5356244 209.303 0.085 1.052 5639365.955
cpu 55: loop 0 0 0 0 0
vec 5865460 215.064 0.084 0.981 5755785.977
cpu 56: loop 0 0 0 0 0
vec 6137175 262.377 0.085 0.780 4787490.596
cpu 57: loop 0 0 0 0 0
vec 4898080 270.038 0.086 0.759 3721254.507
cpu 58: loop 0 0 0 0 0
vec 5358704 273.974 0.084 0.862 4621092.667
cpu 59: loop 0 0 0 0 0
vec 4898487 187.645 0.085 0.632 3100122.586
cpu 60: loop 0 0 0 0 0
vec 5410840 277.954 0.084 0.782 4232895.727
cpu 61: loop 0 0 0 0 0
vec 4915648 188.399 0.084 0.570 2803220.301
cpu 62: loop 0 0 0 0 0
vec 5356076 276.417 0.085 0.786 4214544.548
cpu 63: loop 0 0 0 0 0
vec 4891837 170.531 0.085 0.799 3910948.833
[-- Attachment #3: output2 --]
[-- Type: text/plain, Size: 4270 bytes --]
CPU: Name Count Max Min Average Total
---- ---- ----- --- --- ------- -----
cpu 0: loop 3 1.287 0.291 0.630 1.890
vec 1216936 0.298 0.017 0.050 61252.865
cpu 1: loop 0 0 0 0 0
vec 1209075 0.463 0.017 0.046 56631.941
cpu 2: loop 3 1.621 1.289 1.429 4.289
vec 1207927 0.780 0.017 0.042 51633.737
cpu 3: loop 0 0 0 0 0
vec 1208196 0.565 0.017 0.040 49244.733
cpu 4: loop 0 0 0 0 0
vec 1208043 0.319 0.017 0.044 53534.834
cpu 5: loop 0 0 0 0 0
vec 1208053 0.309 0.017 0.044 54289.485
cpu 6: loop 0 0 0 0 0
vec 1208044 0.279 0.017 0.047 56926.443
cpu 7: loop 0 0 0 0 0
vec 1208062 0.142 0.017 0.044 53551.388
cpu 8: loop 0 0 0 0 0
vec 5525752 1.826 0.016 0.131 728502.607
cpu 9: loop 0 0 0 0 0
vec 4906031 2.616 0.016 0.146 719169.712
cpu 10: loop 0 0 0 0 0
vec 5360108 1.775 0.015 0.094 506942.177
cpu 11: loop 0 0 0 0 0
vec 4986367 1.995 0.015 0.148 742125.726
cpu 12: loop 0 0 0 0 0
vec 5585942 2.366 0.015 0.148 827584.999
cpu 13: loop 0 0 0 0 0
vec 4903611 2.500 0.014 0.122 599644.398
cpu 14: loop 0 0 0 0 0
vec 5360667 2.512 0.017 0.142 764770.776
cpu 15: loop 0 0 0 0 0
vec 5032095 2.335 0.015 0.153 770341.226
cpu 16: loop 0 0 0 0 0
vec 5364481 1.960 0.015 0.144 775150.778
cpu 17: loop 0 0 0 0 0
vec 4902803 2.280 0.015 0.093 456265.973
cpu 18: loop 0 0 0 0 0
vec 5398251 2.166 0.016 0.143 776982.539
cpu 19: loop 0 0 0 0 0
vec 4901793 2.054 0.017 0.123 603270.142
cpu 20: loop 0 0 0 0 0
vec 5362554 3.401 0.016 0.106 571069.572
cpu 21: loop 0 0 0 0 0
vec 5472390 2.345 0.015 0.115 634281.717
cpu 22: loop 0 0 0 0 0
vec 5361382 2.020 0.015 0.150 809240.717
cpu 23: loop 0 0 0 0 0
vec 5269458 2.036 0.015 0.150 790750.609
cpu 24: loop 0 0 0 0 0
vec 5917353 6.401 0.016 0.216 1279866.035
cpu 25: loop 0 0 0 0 0
vec 5055842 2.412 0.015 0.187 947468.333
cpu 26: loop 0 0 0 0 0
vec 5399171 2.779 0.016 0.188 1019882.235
cpu 27: loop 0 0 0 0 0
vec 5133356 2.239 0.015 0.134 690775.919
cpu 28: loop 0 0 0 0 0
vec 5361222 2.620 0.015 0.189 1016805.262
cpu 29: loop 0 0 0 0 0
vec 5064672 2.149 0.015 0.167 847190.872
cpu 30: loop 0 0 0 0 0
vec 5368321 1.993 0.014 0.161 869035.667
cpu 31: loop 0 0 0 0 0
vec 4906487 2.897 0.015 0.138 677603.671
cpu 32: loop 0 0 0 0 0
vec 5361503 7.795 0.016 0.134 722242.563
cpu 33: loop 0 0 0 0 0
vec 5191242 8.402 0.015 0.172 896969.628
cpu 34: loop 0 0 0 0 0
vec 5392429 8.528 0.016 0.181 976546.295
cpu 35: loop 0 0 0 0 0
vec 5446240 9.070 0.015 0.192 1047569.644
cpu 36: loop 0 0 0 0 0
vec 5388077 2.454 0.015 0.168 906697.065
cpu 37: loop 0 0 0 0 0
vec 4905785 7.504 0.016 0.145 712048.383
cpu 38: loop 0 0 0 0 0
vec 5425469 8.175 0.016 0.140 761210.354
cpu 39: loop 0 0 0 0 0
vec 5475228 12.966 0.016 0.177 970298.257
cpu 40: loop 0 0 0 0 0
vec 5368469 8.849 0.016 0.201 1083277.884
cpu 41: loop 0 0 0 0 0
vec 4933841 4.991 0.015 0.162 801858.047
cpu 42: loop 0 0 0 0 0
vec 5370134 10.376 0.015 0.137 737222.176
cpu 43: loop 0 0 0 0 0
vec 4965883 4.149 0.015 0.186 925370.213
cpu 44: loop 0 0 0 0 0
vec 5455523 5.794 0.014 0.151 825708.835
cpu 45: loop 0 0 0 0 0
vec 5617715 7.539 0.015 0.156 878782.079
cpu 46: loop 0 0 0 0 0
vec 5365957 7.481 0.015 0.184 988597.601
cpu 47: loop 0 0 0 0 0
vec 5217591 3.775 0.015 0.174 910379.784
cpu 48: loop 0 0 0 0 0
vec 5366578 6.633 0.016 0.187 1006970.047
cpu 49: loop 0 0 0 0 0
vec 4905769 6.678 0.015 0.193 951691.722
cpu 50: loop 0 0 0 0 0
vec 5367344 2.737 0.016 0.113 608111.203
cpu 51: loop 0 0 0 0 0
vec 6037981 4.473 0.014 0.201 1219438.780
cpu 52: loop 0 0 0 0 0
vec 5368694 7.075 0.015 0.192 1034306.592
cpu 53: loop 0 0 0 0 0
vec 4904642 8.612 0.016 0.168 824120.350
cpu 54: loop 0 0 0 0 0
vec 5368770 4.945 0.015 0.190 1024225.428
cpu 55: loop 0 0 0 0 0
vec 4908197 2.023 0.016 0.204 1003857.550
cpu 56: loop 0 0 0 0 0
vec 5388485 11.392 0.016 0.064 348511.438
cpu 57: loop 0 0 0 0 0
vec 5448720 11.463 0.016 0.092 501426.885
cpu 58: loop 0 0 0 0 0
vec 5414077 11.893 0.016 0.053 289594.054
cpu 59: loop 0 0 0 0 0
vec 4909051 13.002 0.016 0.047 235061.419
cpu 60: loop 0 0 0 0 0
vec 5481077 11.948 0.015 0.075 415695.026
cpu 61: loop 0 0 0 0 0
vec 4909176 11.604 0.016 0.045 225017.178
cpu 62: loop 0 0 0 0 0
vec 5368471 12.257 0.015 0.060 324502.009
cpu 63: loop 0 0 0 0 0
vec 5046229 12.114 0.017 0.082 416591.940
[-- Attachment #4: output3 --]
[-- Type: text/plain, Size: 4266 bytes --]
CPU: Name Count Max Min Average Total
---- ---- ----- --- --- ------- -----
cpu 0: loop 7 0.386 0.164 0.272 1.909
vec 1216797 0.602 0.021 0.039 47651.736
cpu 1: loop 0 0 0 0 0
vec 1208629 0.424 0.022 0.041 50264.545
cpu 2: loop 1 0.298 0.298 0.298 0.298
vec 1207764 0.334 0.022 0.041 49928.832
cpu 3: loop 0 0 0 0 0
vec 1207935 0.814 0.021 0.043 52042.976
cpu 4: loop 0 0 0 0 0
vec 1207943 1.000 0.021 0.050 60940.628
cpu 5: loop 0 0 0 0 0
vec 1207849 0.135 0.021 0.049 59245.776
cpu 6: loop 0 0 0 0 0
vec 1207850 0.277 0.022 0.050 61204.400
cpu 7: loop 0 0 0 0 0
vec 1207861 0.171 0.021 0.048 59181.777
cpu 8: loop 0 0 0 0 0
vec 5360664 2.117 0.021 0.180 968644.359
cpu 9: loop 0 0 0 0 0
vec 5020352 2.246 0.021 0.142 717185.281
cpu 10: loop 0 0 0 0 0
vec 5363341 1.621 0.021 0.100 538185.720
cpu 11: loop 0 0 0 0 0
vec 5438058 1.826 0.021 0.120 657667.134
cpu 12: loop 0 0 0 0 0
vec 5368919 2.083 0.021 0.122 658317.239
cpu 13: loop 0 0 0 0 0
vec 5105994 2.017 0.021 0.154 789376.417
cpu 14: loop 0 0 0 0 0
vec 5499093 2.172 0.021 0.130 719157.486
cpu 15: loop 0 0 0 0 0
vec 5339232 2.369 0.019 0.174 931028.799
cpu 16: loop 0 0 0 0 0
vec 5770606 1.649 0.021 0.172 992698.127
cpu 17: loop 0 0 0 0 0
vec 5471560 1.895 0.021 0.180 986788.267
cpu 18: loop 0 0 0 0 0
vec 5649186 1.847 0.021 0.155 877754.176
cpu 19: loop 0 0 0 0 0
vec 4903278 1.735 0.019 0.114 561407.976
cpu 20: loop 0 0 0 0 0
vec 5798082 1.794 0.021 0.187 1089769.811
cpu 21: loop 0 0 0 0 0
vec 4906094 1.838 0.021 0.101 497368.020
cpu 22: loop 0 0 0 0 0
vec 5363804 1.688 0.021 0.128 690648.710
cpu 23: loop 0 0 0 0 0
vec 5428115 2.841 0.021 0.162 880842.616
cpu 24: loop 0 0 0 0 0
vec 5362856 1.822 0.021 0.245 1314292.486
cpu 25: loop 0 0 0 0 0
vec 5423925 2.272 0.021 0.228 1237734.704
cpu 26: loop 0 0 0 0 0
vec 5564428 2.214 0.021 0.123 684849.158
cpu 27: loop 0 0 0 0 0
vec 4903295 1.972 0.019 0.174 856156.422
cpu 28: loop 0 0 0 0 0
vec 5365711 2.158 0.021 0.153 825078.390
cpu 29: loop 0 0 0 0 0
vec 5320397 2.052 0.021 0.236 1259236.014
cpu 30: loop 0 0 0 0 0
vec 5909975 1.628 0.021 0.138 819134.828
cpu 31: loop 0 0 0 0 0
vec 4933454 1.806 0.021 0.217 1074360.020
cpu 32: loop 0 0 0 0 0
vec 5363456 2.803 0.021 0.175 940610.241
cpu 33: loop 0 0 0 0 0
vec 5252733 2.619 0.021 0.120 633890.344
cpu 34: loop 0 0 0 0 0
vec 5363614 2.601 0.021 0.168 901835.513
cpu 35: loop 0 0 0 0 0
vec 4936869 3.680 0.019 0.127 631751.582
cpu 36: loop 0 0 0 0 0
vec 5922339 2.832 0.021 0.211 1255204.987
cpu 37: loop 0 0 0 0 0
vec 4910052 8.747 0.021 0.156 768758.380
cpu 38: loop 0 0 0 0 0
vec 5811067 3.098 0.019 0.130 759899.100
cpu 39: loop 0 0 0 0 0
vec 4903135 3.353 0.021 0.227 1113971.435
cpu 40: loop 0 0 0 0 0
vec 5371182 5.330 0.021 0.186 1000114.521
cpu 41: loop 0 0 0 0 0
vec 4906195 5.382 0.020 0.158 777099.361
cpu 42: loop 0 0 0 0 0
vec 5404715 3.548 0.021 0.137 743475.307
cpu 43: loop 0 0 0 0 0
vec 4901640 2.489 0.021 0.151 744220.605
cpu 44: loop 0 0 0 0 0
vec 5363571 3.523 0.021 0.202 1084684.998
cpu 45: loop 0 0 0 0 0
vec 5319646 2.274 0.021 0.196 1044613.152
cpu 46: loop 0 0 0 0 0
vec 5498500 2.777 0.021 0.205 1128627.536
cpu 47: loop 0 0 0 0 0
vec 4903761 2.528 0.021 0.143 702722.969
cpu 48: loop 0 0 0 0 0
vec 5800814 2.792 0.020 0.226 1312258.287
cpu 49: loop 0 0 0 0 0
vec 5335192 3.189 0.021 0.182 971787.892
cpu 50: loop 0 0 0 0 0
vec 5361204 2.404 0.021 0.204 1098507.789
cpu 51: loop 0 0 0 0 0
vec 4900869 1.833 0.021 0.228 1121984.786
cpu 52: loop 0 0 0 0 0
vec 6116938 4.508 0.019 0.196 1202757.006
cpu 53: loop 0 0 0 0 0
vec 4958737 2.507 0.021 0.175 871568.644
cpu 54: loop 0 0 0 0 0
vec 5393101 1.852 0.021 0.156 845557.350
cpu 55: loop 0 0 0 0 0
vec 5274061 2.021 0.020 0.224 1183107.607
cpu 56: loop 0 0 0 0 0
vec 5394135 5.041 0.021 0.056 305725.022
cpu 57: loop 0 0 0 0 0
vec 4901849 4.027 0.019 0.068 334411.917
cpu 58: loop 0 0 0 0 0
vec 5361518 1.907 0.021 0.051 273485.193
cpu 59: loop 0 0 0 0 0
vec 4899290 5.408 0.021 0.069 341790.762
cpu 60: loop 0 0 0 0 0
vec 5365118 5.080 0.021 0.063 340490.267
cpu 61: loop 0 0 0 0 0
vec 4898590 1.757 0.019 0.071 347903.615
cpu 62: loop 0 0 0 0 0
vec 5737130 3.067 0.021 0.119 687108.734
cpu 63: loop 0 0 0 0 0
vec 4903228 1.822 0.021 0.071 348506.477
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-29 18:24 ` Mike Galbraith
@ 2011-07-30 7:12 ` Mike Galbraith
2011-07-30 8:16 ` Mike Galbraith
0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2011-07-30 7:12 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Fri, 2011-07-29 at 20:24 +0200, Mike Galbraith wrote:
> On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> > Hi Mike,
> >
> > Could you try this patch set out. Add the first patch and then
> > run your tests. The first patch only adds benchmarking, and does not
> > modify the scheduler algorithm.
> >
> > Do this:
> >
> > 1. apply first patch, build and boot
> > 2. # mount -t debugfs nodev /sys/kernel/debug
> > 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
> >
> > The output will give you the contention of the vector locks in the
> > cpupri algorithm.
> >
> > Then apply the second patch and do the same thing.
> >
> > Then apply the third patch and do the same thing.
> >
> > After that, could you send me the results of the output file for all
> > three runs? The final patch should probably be the best overall
> > results.
>
> Wow.
>
> CPU: Name Count Max Min Average Total
> ---- ---- ----- --- --- ------- -----
> cpu 60: loop 0 0 0 0 0
> vec 5410840 277.954 0.084 0.782 4232895.727
> cpu 61: loop 0 0 0 0 0
> vec 4915648 188.399 0.084 0.570 2803220.301
> cpu 62: loop 0 0 0 0 0
> vec 5356076 276.417 0.085 0.786 4214544.548
> cpu 63: loop 0 0 0 0 0
> vec 4891837 170.531 0.085 0.799 3910948.833
BTW, that's a _lot_ more usecs than I'm looking for. Neither cyclictest
not jitter test proggy's main thread hit that for some reason, must be
worker threads getting nailed or something.
Your patches did improve jitter (of course), but +-30 usecs with a ~full
box isn't achievable yet (oh darn). Cyclictest shows max latency well
within the goal, but jitter still goes over.
My profile looks much better, but jitter proggy using posix-timers on 56
cores warms up a spot you know all about. Lucky me, I know where fixes
for that bugger live.
With your fixes, looks like 3.0.0-rtN should be much better on hefty HW.
1 # dso: [kernel.kallsyms]
2 # Events: 272K cycles
3 #
4 # Overhead Symbol
5 # ........ .................................
6 #
7 11.58% [k] cpupri_set
8 |
9 |--71.03%-- dequeue_rt_stack
10 | dequeue_task_rt
11 | dequeue_task
12 | |
13 | |--99.98%-- deactivate_task
14 | | __schedule
15 | | schedule
16 | | |
17 | | |--35.07%-- run_ksoftirqd
18 | | | kthread
19 | | | kernel_thread_helper
20 | | |
21 | | |--32.23%-- sys_semtimedop
22 | | | system_call_fastpath
23 | | | |
24 | | | |--2.96%-- 0x7fe09af86e37
25 | | | | __semop
...
713 9.67% [k] _raw_spin_lock_irqsave
714 |
715 |--61.75%-- rt_spin_lock_slowlock
716 | |
717 | |--97.54%-- lock_timer (Hi idr_lock, you haven't met Eric yet. Clever fellow, you'll like him)
718 | | do_schedule_next_timer
719 | | dequeue_signal
720 | | sys_rt_sigtimedwait
721 | | system_call_fastpath
722 | | |
723 | | |--6.42%-- 0x7fb4c2ebbf27
724 | | | do_sigwait
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-30 7:12 ` Mike Galbraith
@ 2011-07-30 8:16 ` Mike Galbraith
0 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2011-07-30 8:16 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Sat, 2011-07-30 at 09:12 +0200, Mike Galbraith wrote:
> y profile looks much better, but jitter proggy using posix-timers on 56
> cores warms up a spot you know all about. Lucky me, I know where fixes
> for that bugger live.
JiitterChasing::TheEnd() doesn't exist does it? Aw poo.
30999 0.05% [k] lock_timer
31000 |
31001 --- do_schedule_next_timer
31002 dequeue_signal
31003 |
31004 |--87.94%-- sys_rt_sigtimedwait
31005 | system_call_fastpath
31006 | |
31007 | |--20.24%-- 0x7f0935011f27
31008 | | do_sigwait
# dso: [kernel.kallsyms]
# Events: 118K cycles
#
# Overhead Symbol
# ........ ...................................
#
22.82% [k] _raw_spin_lock_irqsave
|
|--32.46%-- rt_mutex_trylock
| tty_write_lock
| tty_write
| vfs_write
| sys_write
| system_call_fastpath
| |
| |--4.38%-- 0x7f46672a30f0
| | __GI___libc_write
...
|
|--32.20%-- rt_mutex_slowlock
| tty_write_lock
| tty_write
| vfs_write
| sys_write
| system_call_fastpath
| |
| |--4.71%-- 0x7f46672a30f0
| | __GI___libc_write
...
|--25.36%-- rcu_sched_qs
| __schedule
| |
| |--52.52%-- preempt_schedule
| | try_to_wake_up
| | signal_wake_up
| | send_sigqueue
| | posix_timer_event
| | posix_timer_fn
| | run_hrtimer_softirq
| | run_ksoftirqd
| | kthread
| | kernel_thread_helper
| |
| --47.48%-- schedule
| rt_spin_lock_slowlock
| lock_timer
| do_schedule_next_timer
| dequeue_signal
| |
| |--63.61%-- sys_rt_sigtimedwait
| | system_call_fastpath
| | |
| | |--9.26%-- 0x7f3c840acf27
| | | do_sigwait
...
| | | do_sigwait
| | --5.58%-- [...]
| |
| --36.39%-- get_signal_to_deliver
| do_signal
| do_notify_resume
| int_signal
| |
| |--6.17%-- 0x7f1dd90610f0
| | __GI___libc_write
...
|
|--6.54%-- rt_mutex_slowunlock
| tty_write_unlock
| tty_write
| vfs_write
| sys_write
| system_call_fastpath
| |
| |--6.06%-- 0x7f11267610f0
| | __GI___libc_write
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
` (4 preceding siblings ...)
2011-07-29 18:24 ` Mike Galbraith
@ 2011-07-30 9:19 ` Mike Galbraith
2011-08-01 14:18 ` Steven Rostedt
5 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2011-07-30 9:19 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> Hi Mike,
>
> Could you try this patch set out. Add the first patch and then
> run your tests. The first patch only adds benchmarking, and does not
> modify the scheduler algorithm.
>
> Do this:
>
> 1. apply first patch, build and boot
> 2. # mount -t debugfs nodev /sys/kernel/debug
> 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
>
> The output will give you the contention of the vector locks in the
> cpupri algorithm.
>
> Then apply the second patch and do the same thing.
>
> Then apply the third patch and do the same thing.
>
> After that, could you send me the results of the output file for all
> three runs? The final patch should probably be the best overall
> results.
These patches are RFC, so here's my Comment. Steven rocks.
Below is what cyclictest has to say with the patches listed below
applied, and simulation model running on 56 isolated cores.
The model squeaks once in a while with jitter ever so slightly over the
30us squeak threshold, so I have yet more perturbation sources, but wrt
your patches, they are mana from heaven for largish boxen ;-)
patches/ipc-sem-Optimize-update_queue-for-bulk-wakeup-calls
patches/ipc-sem-Move-wake_up_process-out-of-the-spinlock-section
patches/ipc-sem-Cacheline-align-the-ipc-spinlock-for-semaphores
patches/ipc-sem-Update-description-of-the-implementation
patches/ipc-sem-Use-ERR_CAST
patches/ipc-sem-Bugfix-for-semop-not-reporting-successful-operation
patches/ipc-sys_semctl-Fix-kernel-stack-leakage
patches/cpupri-2-3-nolock.patch
patches/cpupri-3-3-atomic.patch
patches/posix-timers-RCU-conversion
cyclictest -Smqp 99 -D 30m; ./killit.sh
T: 0 ( 7362) P:99 I:1000 C:1799001 Min: 1 Act: 2 Avg: 1 Max: 19
T: 1 ( 7363) P:99 I:1500 C:1199333 Min: 1 Act: 2 Avg: 1 Max: 4
T: 2 ( 7364) P:99 I:2000 C: 899498 Min: 1 Act: 2 Avg: 1 Max: 4
T: 3 ( 7365) P:99 I:2500 C: 719597 Min: 1 Act: 2 Avg: 1 Max: 4
T: 4 ( 7366) P:99 I:3000 C: 599664 Min: 1 Act: 2 Avg: 1 Max: 4
T: 5 ( 7367) P:99 I:3500 C: 513997 Min: 1 Act: 2 Avg: 1 Max: 5
T: 6 ( 7368) P:99 I:4000 C: 449746 Min: 1 Act: 2 Avg: 1 Max: 4
T: 7 ( 7369) P:99 I:4500 C: 399774 Min: 1 Act: 2 Avg: 1 Max: 4
T: 8 ( 7370) P:99 I:5000 C: 359796 Min: 1 Act: 3 Avg: 2 Max: 5
T: 9 ( 7371) P:99 I:5500 C: 327087 Min: 1 Act: 3 Avg: 2 Max: 7
T:10 ( 7372) P:99 I:6000 C: 299829 Min: 1 Act: 3 Avg: 2 Max: 7
T:11 ( 7373) P:99 I:6500 C: 276765 Min: 1 Act: 3 Avg: 2 Max: 13
T:12 ( 7374) P:99 I:7000 C: 256995 Min: 1 Act: 3 Avg: 2 Max: 6
T:13 ( 7375) P:99 I:7500 C: 239862 Min: 1 Act: 3 Avg: 2 Max: 10
T:14 ( 7376) P:99 I:8000 C: 224870 Min: 1 Act: 2 Avg: 2 Max: 6
T:15 ( 7377) P:99 I:8500 C: 211642 Min: 1 Act: 4 Avg: 2 Max: 7
T:16 ( 7378) P:99 I:9000 C: 199884 Min: 1 Act: 3 Avg: 2 Max: 13
T:17 ( 7379) P:99 I:9500 C: 189364 Min: 1 Act: 4 Avg: 2 Max: 6
T:18 ( 7380) P:99 I:10000 C: 179895 Min: 1 Act: 3 Avg: 2 Max: 5
T:19 ( 7381) P:99 I:10500 C: 171328 Min: 1 Act: 3 Avg: 2 Max: 9
T:20 ( 7382) P:99 I:11000 C: 163540 Min: 1 Act: 4 Avg: 2 Max: 8
T:21 ( 7383) P:99 I:11500 C: 156430 Min: 2 Act: 3 Avg: 2 Max: 7
T:22 ( 7384) P:99 I:12000 C: 149912 Min: 2 Act: 3 Avg: 2 Max: 10
T:23 ( 7385) P:99 I:12500 C: 143915 Min: 2 Act: 3 Avg: 3 Max: 8
T:24 ( 7386) P:99 I:13000 C: 138380 Min: 2 Act: 4 Avg: 3 Max: 7
T:25 ( 7387) P:99 I:13500 C: 133254 Min: 2 Act: 2 Avg: 3 Max: 7
T:26 ( 7388) P:99 I:14000 C: 128495 Min: 1 Act: 3 Avg: 3 Max: 6
T:27 ( 7389) P:99 I:14500 C: 124064 Min: 1 Act: 4 Avg: 3 Max: 10
T:28 ( 7390) P:99 I:15000 C: 119928 Min: 2 Act: 4 Avg: 3 Max: 7
T:29 ( 7391) P:99 I:15500 C: 116059 Min: 2 Act: 3 Avg: 2 Max: 6
T:30 ( 7392) P:99 I:16000 C: 112432 Min: 2 Act: 4 Avg: 3 Max: 6
T:31 ( 7393) P:99 I:16500 C: 109025 Min: 1 Act: 3 Avg: 3 Max: 6
T:32 ( 7394) P:99 I:17000 C: 105818 Min: 2 Act: 4 Avg: 3 Max: 7
T:33 ( 7395) P:99 I:17500 C: 102795 Min: 2 Act: 4 Avg: 3 Max: 7
T:34 ( 7396) P:99 I:18000 C: 99939 Min: 1 Act: 3 Avg: 3 Max: 7
T:35 ( 7397) P:99 I:18500 C: 97238 Min: 2 Act: 4 Avg: 3 Max: 8
T:36 ( 7398) P:99 I:19000 C: 94679 Min: 2 Act: 3 Avg: 3 Max: 7
T:37 ( 7399) P:99 I:19500 C: 92251 Min: 1 Act: 3 Avg: 3 Max: 8
T:38 ( 7400) P:99 I:20000 C: 89945 Min: 2 Act: 4 Avg: 3 Max: 8
T:39 ( 7401) P:99 I:20500 C: 87751 Min: 2 Act: 4 Avg: 3 Max: 13
T:40 ( 7402) P:99 I:21000 C: 85661 Min: 2 Act: 3 Avg: 3 Max: 10
T:41 ( 7403) P:99 I:21500 C: 83669 Min: 1 Act: 3 Avg: 3 Max: 7
T:42 ( 7404) P:99 I:22000 C: 81767 Min: 2 Act: 3 Avg: 3 Max: 10
T:43 ( 7405) P:99 I:22500 C: 79950 Min: 2 Act: 4 Avg: 3 Max: 9
T:44 ( 7406) P:99 I:23000 C: 78212 Min: 1 Act: 2 Avg: 3 Max: 8
T:45 ( 7407) P:99 I:23500 C: 76548 Min: 1 Act: 3 Avg: 3 Max: 7
T:46 ( 7408) P:99 I:24000 C: 74953 Min: 1 Act: 3 Avg: 3 Max: 6
T:47 ( 7409) P:99 I:24500 C: 73423 Min: 1 Act: 3 Avg: 3 Max: 6
T:48 ( 7410) P:99 I:25000 C: 71955 Min: 2 Act: 4 Avg: 3 Max: 7
T:49 ( 7411) P:99 I:25500 C: 70544 Min: 1 Act: 3 Avg: 3 Max: 6
T:50 ( 7412) P:99 I:26000 C: 69187 Min: 2 Act: 4 Avg: 3 Max: 11
T:51 ( 7413) P:99 I:26500 C: 67881 Min: 1 Act: 3 Avg: 3 Max: 6
T:52 ( 7414) P:99 I:27000 C: 66624 Min: 2 Act: 3 Avg: 3 Max: 7
T:53 ( 7415) P:99 I:27500 C: 65413 Min: 1 Act: 4 Avg: 3 Max: 6
T:54 ( 7416) P:99 I:28000 C: 64245 Min: 2 Act: 4 Avg: 3 Max: 6
T:55 ( 7417) P:99 I:28500 C: 63118 Min: 2 Act: 3 Avg: 3 Max: 6
T:56 ( 7418) P:99 I:29000 C: 62029 Min: 2 Act: 3 Avg: 3 Max: 8
T:57 ( 7419) P:99 I:29500 C: 60978 Min: 2 Act: 3 Avg: 2 Max: 5
T:58 ( 7420) P:99 I:30000 C: 59961 Min: 2 Act: 3 Avg: 2 Max: 5
T:59 ( 7421) P:99 I:30500 C: 58978 Min: 2 Act: 3 Avg: 2 Max: 5
T:60 ( 7422) P:99 I:31000 C: 58027 Min: 2 Act: 3 Avg: 3 Max: 10
T:61 ( 7423) P:99 I:31500 C: 57106 Min: 2 Act: 3 Avg: 3 Max: 9
T:62 ( 7424) P:99 I:32000 C: 56213 Min: 2 Act: 2 Avg: 2 Max: 5
T:63 ( 7425) P:99 I:32500 C: 55349 Min: 2 Act: 5 Avg: 3 Max: 6
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-07-30 9:19 ` Mike Galbraith
@ 2011-08-01 14:18 ` Steven Rostedt
2011-08-01 18:41 ` Mike Galbraith
2011-08-02 8:46 ` Mike Galbraith
0 siblings, 2 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-08-01 14:18 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
On Sat, 2011-07-30 at 11:19 +0200, Mike Galbraith wrote:
> On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> > Hi Mike,
> >
> > Could you try this patch set out. Add the first patch and then
> > run your tests. The first patch only adds benchmarking, and does not
> > modify the scheduler algorithm.
> >
> > Do this:
> >
> > 1. apply first patch, build and boot
> > 2. # mount -t debugfs nodev /sys/kernel/debug
> > 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
> >
> > The output will give you the contention of the vector locks in the
> > cpupri algorithm.
> >
> > Then apply the second patch and do the same thing.
> >
> > Then apply the third patch and do the same thing.
> >
> > After that, could you send me the results of the output file for all
> > three runs? The final patch should probably be the best overall
> > results.
>
> These patches are RFC, so here's my Comment. Steven rocks.
/me blushes!
Thanks for testing! I'll redo the patches to remove the logging, and
send them to you again. Could you return back a 'Tested-by' tag
afterward.
I'll also post the numbers from most of those that sent them to me.
>
> Below is what cyclictest has to say with the patches listed below
> applied, and simulation model running on 56 isolated cores.
>
> The model squeaks once in a while with jitter ever so slightly over the
> 30us squeak threshold, so I have yet more perturbation sources, but wrt
> your patches, they are mana from heaven for largish boxen ;-)
>
> patches/ipc-sem-Optimize-update_queue-for-bulk-wakeup-calls
> patches/ipc-sem-Move-wake_up_process-out-of-the-spinlock-section
> patches/ipc-sem-Cacheline-align-the-ipc-spinlock-for-semaphores
> patches/ipc-sem-Update-description-of-the-implementation
> patches/ipc-sem-Use-ERR_CAST
> patches/ipc-sem-Bugfix-for-semop-not-reporting-successful-operation
> patches/ipc-sys_semctl-Fix-kernel-stack-leakage
> patches/cpupri-2-3-nolock.patch
> patches/cpupri-3-3-atomic.patch
> patches/posix-timers-RCU-conversion
>
> cyclictest -Smqp 99 -D 30m; ./killit.sh
Could you also post the results without the two cpupri patches?
Thanks,
-- Steve
>
> T: 0 ( 7362) P:99 I:1000 C:1799001 Min: 1 Act: 2 Avg: 1 Max: 19
> T: 1 ( 7363) P:99 I:1500 C:1199333 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 2 ( 7364) P:99 I:2000 C: 899498 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 3 ( 7365) P:99 I:2500 C: 719597 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 4 ( 7366) P:99 I:3000 C: 599664 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 5 ( 7367) P:99 I:3500 C: 513997 Min: 1 Act: 2 Avg: 1 Max: 5
> T: 6 ( 7368) P:99 I:4000 C: 449746 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 7 ( 7369) P:99 I:4500 C: 399774 Min: 1 Act: 2 Avg: 1 Max: 4
> T: 8 ( 7370) P:99 I:5000 C: 359796 Min: 1 Act: 3 Avg: 2 Max: 5
> T: 9 ( 7371) P:99 I:5500 C: 327087 Min: 1 Act: 3 Avg: 2 Max: 7
> T:10 ( 7372) P:99 I:6000 C: 299829 Min: 1 Act: 3 Avg: 2 Max: 7
> T:11 ( 7373) P:99 I:6500 C: 276765 Min: 1 Act: 3 Avg: 2 Max: 13
> T:12 ( 7374) P:99 I:7000 C: 256995 Min: 1 Act: 3 Avg: 2 Max: 6
> T:13 ( 7375) P:99 I:7500 C: 239862 Min: 1 Act: 3 Avg: 2 Max: 10
> T:14 ( 7376) P:99 I:8000 C: 224870 Min: 1 Act: 2 Avg: 2 Max: 6
> T:15 ( 7377) P:99 I:8500 C: 211642 Min: 1 Act: 4 Avg: 2 Max: 7
> T:16 ( 7378) P:99 I:9000 C: 199884 Min: 1 Act: 3 Avg: 2 Max: 13
> T:17 ( 7379) P:99 I:9500 C: 189364 Min: 1 Act: 4 Avg: 2 Max: 6
> T:18 ( 7380) P:99 I:10000 C: 179895 Min: 1 Act: 3 Avg: 2 Max: 5
> T:19 ( 7381) P:99 I:10500 C: 171328 Min: 1 Act: 3 Avg: 2 Max: 9
> T:20 ( 7382) P:99 I:11000 C: 163540 Min: 1 Act: 4 Avg: 2 Max: 8
> T:21 ( 7383) P:99 I:11500 C: 156430 Min: 2 Act: 3 Avg: 2 Max: 7
> T:22 ( 7384) P:99 I:12000 C: 149912 Min: 2 Act: 3 Avg: 2 Max: 10
> T:23 ( 7385) P:99 I:12500 C: 143915 Min: 2 Act: 3 Avg: 3 Max: 8
> T:24 ( 7386) P:99 I:13000 C: 138380 Min: 2 Act: 4 Avg: 3 Max: 7
> T:25 ( 7387) P:99 I:13500 C: 133254 Min: 2 Act: 2 Avg: 3 Max: 7
> T:26 ( 7388) P:99 I:14000 C: 128495 Min: 1 Act: 3 Avg: 3 Max: 6
> T:27 ( 7389) P:99 I:14500 C: 124064 Min: 1 Act: 4 Avg: 3 Max: 10
> T:28 ( 7390) P:99 I:15000 C: 119928 Min: 2 Act: 4 Avg: 3 Max: 7
> T:29 ( 7391) P:99 I:15500 C: 116059 Min: 2 Act: 3 Avg: 2 Max: 6
> T:30 ( 7392) P:99 I:16000 C: 112432 Min: 2 Act: 4 Avg: 3 Max: 6
> T:31 ( 7393) P:99 I:16500 C: 109025 Min: 1 Act: 3 Avg: 3 Max: 6
> T:32 ( 7394) P:99 I:17000 C: 105818 Min: 2 Act: 4 Avg: 3 Max: 7
> T:33 ( 7395) P:99 I:17500 C: 102795 Min: 2 Act: 4 Avg: 3 Max: 7
> T:34 ( 7396) P:99 I:18000 C: 99939 Min: 1 Act: 3 Avg: 3 Max: 7
> T:35 ( 7397) P:99 I:18500 C: 97238 Min: 2 Act: 4 Avg: 3 Max: 8
> T:36 ( 7398) P:99 I:19000 C: 94679 Min: 2 Act: 3 Avg: 3 Max: 7
> T:37 ( 7399) P:99 I:19500 C: 92251 Min: 1 Act: 3 Avg: 3 Max: 8
> T:38 ( 7400) P:99 I:20000 C: 89945 Min: 2 Act: 4 Avg: 3 Max: 8
> T:39 ( 7401) P:99 I:20500 C: 87751 Min: 2 Act: 4 Avg: 3 Max: 13
> T:40 ( 7402) P:99 I:21000 C: 85661 Min: 2 Act: 3 Avg: 3 Max: 10
> T:41 ( 7403) P:99 I:21500 C: 83669 Min: 1 Act: 3 Avg: 3 Max: 7
> T:42 ( 7404) P:99 I:22000 C: 81767 Min: 2 Act: 3 Avg: 3 Max: 10
> T:43 ( 7405) P:99 I:22500 C: 79950 Min: 2 Act: 4 Avg: 3 Max: 9
> T:44 ( 7406) P:99 I:23000 C: 78212 Min: 1 Act: 2 Avg: 3 Max: 8
> T:45 ( 7407) P:99 I:23500 C: 76548 Min: 1 Act: 3 Avg: 3 Max: 7
> T:46 ( 7408) P:99 I:24000 C: 74953 Min: 1 Act: 3 Avg: 3 Max: 6
> T:47 ( 7409) P:99 I:24500 C: 73423 Min: 1 Act: 3 Avg: 3 Max: 6
> T:48 ( 7410) P:99 I:25000 C: 71955 Min: 2 Act: 4 Avg: 3 Max: 7
> T:49 ( 7411) P:99 I:25500 C: 70544 Min: 1 Act: 3 Avg: 3 Max: 6
> T:50 ( 7412) P:99 I:26000 C: 69187 Min: 2 Act: 4 Avg: 3 Max: 11
> T:51 ( 7413) P:99 I:26500 C: 67881 Min: 1 Act: 3 Avg: 3 Max: 6
> T:52 ( 7414) P:99 I:27000 C: 66624 Min: 2 Act: 3 Avg: 3 Max: 7
> T:53 ( 7415) P:99 I:27500 C: 65413 Min: 1 Act: 4 Avg: 3 Max: 6
> T:54 ( 7416) P:99 I:28000 C: 64245 Min: 2 Act: 4 Avg: 3 Max: 6
> T:55 ( 7417) P:99 I:28500 C: 63118 Min: 2 Act: 3 Avg: 3 Max: 6
> T:56 ( 7418) P:99 I:29000 C: 62029 Min: 2 Act: 3 Avg: 3 Max: 8
> T:57 ( 7419) P:99 I:29500 C: 60978 Min: 2 Act: 3 Avg: 2 Max: 5
> T:58 ( 7420) P:99 I:30000 C: 59961 Min: 2 Act: 3 Avg: 2 Max: 5
> T:59 ( 7421) P:99 I:30500 C: 58978 Min: 2 Act: 3 Avg: 2 Max: 5
> T:60 ( 7422) P:99 I:31000 C: 58027 Min: 2 Act: 3 Avg: 3 Max: 10
> T:61 ( 7423) P:99 I:31500 C: 57106 Min: 2 Act: 3 Avg: 3 Max: 9
> T:62 ( 7424) P:99 I:32000 C: 56213 Min: 2 Act: 2 Avg: 2 Max: 5
> T:63 ( 7425) P:99 I:32500 C: 55349 Min: 2 Act: 5 Avg: 3 Max: 6
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-08-01 14:18 ` Steven Rostedt
@ 2011-08-01 18:41 ` Mike Galbraith
2011-08-01 18:54 ` Steven Rostedt
2011-08-02 8:46 ` Mike Galbraith
1 sibling, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2011-08-01 18:41 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Mon, 2011-08-01 at 10:18 -0400, Steven Rostedt wrote:
> On Sat, 2011-07-30 at 11:19 +0200, Mike Galbraith wrote:
> > On Fri, 2011-07-29 at 11:13 -0400, Steven Rostedt wrote:
> > > Hi Mike,
> > >
> > > Could you try this patch set out. Add the first patch and then
> > > run your tests. The first patch only adds benchmarking, and does not
> > > modify the scheduler algorithm.
> > >
> > > Do this:
> > >
> > > 1. apply first patch, build and boot
> > > 2. # mount -t debugfs nodev /sys/kernel/debug
> > > 3. # echo 0 > /sys/kernel/debug/cpupri; ./runtest; cat /sys/kernel/debug/cpupri > output
> > >
> > > The output will give you the contention of the vector locks in the
> > > cpupri algorithm.
> > >
> > > Then apply the second patch and do the same thing.
> > >
> > > Then apply the third patch and do the same thing.
> > >
> > > After that, could you send me the results of the output file for all
> > > three runs? The final patch should probably be the best overall
> > > results.
> >
> > These patches are RFC, so here's my Comment. Steven rocks.
>
> /me blushes!
Don't, they're excellent. /me was having one _hell_ of a hard time
trying to convince box that somewhat tight constraint realtime really
really should be possible on isolated CPUs.
> Thanks for testing! I'll redo the patches to remove the logging, and
> send them to you again. Could you return back a 'Tested-by' tag
> afterward.
(I did the logging removal, the posted numbers were that, but..)
Sure. I've been beating on them (heftily), there are there have been no
ill effects detected. You can have my..
Tested-by: Mike Galbraith <mgalbraith@suse.de> ||
Tested-by: Mike Galbraith <efault@gmx.de> (the real /me)
..now fwiw, they were the deciding factor here.
> Could you also post the results without the two cpupri patches?
Sure, will do. As noted, the cyclictest numbers were never as nasty as
the benchmark indicated they could (did) get. With this particular test
app, there's there's a nasty feedback perturbation source, tty. It can
feed on itself if several threads start griping.
While testing your patches, I just let it do it's thing with a ~full up
load it never could handle, and let the chips fall where they may. The
cyclictest numbers I post will be 1:1 with the results posted, ie taking
tty out of the picture, so the difference won't be as huge as the lock
benchmark showed it can (did) get.
-Mike
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-08-01 18:41 ` Mike Galbraith
@ 2011-08-01 18:54 ` Steven Rostedt
2011-08-01 19:06 ` Mike Galbraith
2011-08-02 8:59 ` Mike Galbraith
0 siblings, 2 replies; 15+ messages in thread
From: Steven Rostedt @ 2011-08-01 18:54 UTC (permalink / raw)
To: Mike Galbraith; +Cc: Peter Zijlstra, RT
On Mon, 2011-08-01 at 20:41 +0200, Mike Galbraith wrote:
> > Thanks for testing! I'll redo the patches to remove the logging, and
> > send them to you again. Could you return back a 'Tested-by' tag
> > afterward.
>
> (I did the logging removal, the posted numbers were that, but..)
Could you merge the two patches and diff them against the one I posted
privately. Just to make sure that you really tested the one I plan on
giving to Mainline.
>
> Sure. I've been beating on them (heftily), there are there have been no
> ill effects detected. You can have my..
> Tested-by: Mike Galbraith <mgalbraith@suse.de> ||
> Tested-by: Mike Galbraith <efault@gmx.de> (the real /me)
Heh, I'll probably take the suse.de, just because its nice to see
multiple companies on a single patch. Especially one that affects the
scheduler.
-- Steve
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-08-01 18:54 ` Steven Rostedt
@ 2011-08-01 19:06 ` Mike Galbraith
2011-08-02 8:59 ` Mike Galbraith
1 sibling, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2011-08-01 19:06 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Mon, 2011-08-01 at 14:54 -0400, Steven Rostedt wrote:
> On Mon, 2011-08-01 at 20:41 +0200, Mike Galbraith wrote:
>
> > > Thanks for testing! I'll redo the patches to remove the logging, and
> > > send them to you again. Could you return back a 'Tested-by' tag
> > > afterward.
> >
> > (I did the logging removal, the posted numbers were that, but..)
>
> Could you merge the two patches and diff them against the one I posted
> privately. Just to make sure that you really tested the one I plan on
> giving to Mainline.
Good idea, will do.
-Mike
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-08-01 14:18 ` Steven Rostedt
2011-08-01 18:41 ` Mike Galbraith
@ 2011-08-02 8:46 ` Mike Galbraith
1 sibling, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2011-08-02 8:46 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
On Mon, 2011-08-01 at 10:18 -0400, Steven Rostedt wrote:
> Could you also post the results without the two cpupri patches?
ipc/timer patches still applied, cpupri patches popped.
During this 10 minute, 56 CPUs run, there were 230 events > 30us, max
+49.337, min -41.739.
T: 0 ( 8149) P:99 I:1000 C: 599001 Min: 1 Act: 2 Avg: 1 Max: 12
T: 1 ( 8150) P:99 I:1500 C: 399333 Min: 1 Act: 2 Avg: 1 Max: 5
T: 2 ( 8151) P:99 I:2000 C: 299498 Min: 1 Act: 2 Avg: 1 Max: 7
T: 3 ( 8152) P:99 I:2500 C: 239598 Min: 1 Act: 2 Avg: 1 Max: 4
T: 4 ( 8153) P:99 I:3000 C: 199664 Min: 1 Act: 1 Avg: 1 Max: 4
T: 5 ( 8154) P:99 I:3500 C: 171139 Min: 1 Act: 1 Avg: 1 Max: 5
T: 6 ( 8155) P:99 I:4000 C: 149746 Min: 1 Act: 1 Avg: 1 Max: 4
T: 7 ( 8156) P:99 I:4500 C: 133107 Min: 1 Act: 2 Avg: 1 Max: 4
T: 8 ( 8157) P:99 I:5000 C: 119796 Min: 2 Act: 3 Avg: 3 Max: 11
T: 9 ( 8158) P:99 I:5500 C: 108905 Min: 1 Act: 3 Avg: 2 Max: 14
T:10 ( 8159) P:99 I:6000 C: 99829 Min: 1 Act: 5 Avg: 2 Max: 13
T:11 ( 8160) P:99 I:6500 C: 92149 Min: 1 Act: 3 Avg: 2 Max: 10
T:12 ( 8161) P:99 I:7000 C: 85567 Min: 2 Act: 2 Avg: 2 Max: 11
T:13 ( 8162) P:99 I:7500 C: 79862 Min: 1 Act: 3 Avg: 2 Max: 12
T:14 ( 8163) P:99 I:8000 C: 74870 Min: 1 Act: 2 Avg: 2 Max: 11
T:15 ( 8164) P:99 I:8500 C: 70466 Min: 1 Act: 4 Avg: 2 Max: 15
T:16 ( 8165) P:99 I:9000 C: 66551 Min: 2 Act: 3 Avg: 3 Max: 16
T:17 ( 8166) P:99 I:9500 C: 63048 Min: 2 Act: 5 Avg: 3 Max: 15
T:18 ( 8167) P:99 I:10000 C: 59895 Min: 2 Act: 3 Avg: 3 Max: 12
T:19 ( 8168) P:99 I:10500 C: 57043 Min: 1 Act: 3 Avg: 3 Max: 10
T:20 ( 8169) P:99 I:11000 C: 54450 Min: 1 Act: 5 Avg: 3 Max: 15
T:21 ( 8170) P:99 I:11500 C: 52082 Min: 1 Act: 3 Avg: 3 Max: 10
T:22 ( 8171) P:99 I:12000 C: 49912 Min: 2 Act: 2 Avg: 3 Max: 11
T:23 ( 8172) P:99 I:12500 C: 47915 Min: 2 Act: 3 Avg: 3 Max: 10
T:24 ( 8173) P:99 I:13000 C: 46072 Min: 2 Act: 2 Avg: 3 Max: 11
T:25 ( 8174) P:99 I:13500 C: 44365 Min: 2 Act: 3 Avg: 3 Max: 12
T:26 ( 8175) P:99 I:14000 C: 42781 Min: 2 Act: 5 Avg: 4 Max: 12
T:27 ( 8176) P:99 I:14500 C: 41305 Min: 2 Act: 3 Avg: 3 Max: 11
T:28 ( 8177) P:99 I:15000 C: 39928 Min: 2 Act: 3 Avg: 3 Max: 11
T:29 ( 8178) P:99 I:15500 C: 38640 Min: 2 Act: 7 Avg: 3 Max: 12
T:30 ( 8179) P:99 I:16000 C: 37432 Min: 2 Act: 4 Avg: 4 Max: 14
T:31 ( 8180) P:99 I:16500 C: 36298 Min: 2 Act: 4 Avg: 3 Max: 14
T:32 ( 8181) P:99 I:17000 C: 35230 Min: 2 Act: 4 Avg: 3 Max: 10
T:33 ( 8182) P:99 I:17500 C: 34223 Min: 2 Act: 3 Avg: 3 Max: 11
T:34 ( 8183) P:99 I:18000 C: 33273 Min: 2 Act: 4 Avg: 3 Max: 10
T:35 ( 8184) P:99 I:18500 C: 32373 Min: 2 Act: 3 Avg: 3 Max: 10
T:36 ( 8185) P:99 I:19000 C: 31521 Min: 1 Act: 4 Avg: 3 Max: 10
T:37 ( 8186) P:99 I:19500 C: 30713 Min: 2 Act: 7 Avg: 3 Max: 12
T:38 ( 8187) P:99 I:20000 C: 29945 Min: 1 Act: 7 Avg: 6 Max: 11
T:39 ( 8188) P:99 I:20500 C: 29214 Min: 2 Act: 5 Avg: 3 Max: 11
T:40 ( 8189) P:99 I:21000 C: 28519 Min: 2 Act: 5 Avg: 4 Max: 14
T:41 ( 8190) P:99 I:21500 C: 27855 Min: 2 Act: 4 Avg: 3 Max: 9
T:42 ( 8191) P:99 I:22000 C: 27222 Min: 2 Act: 3 Avg: 3 Max: 10
T:43 ( 8192) P:99 I:22500 C: 26617 Min: 2 Act: 3 Avg: 3 Max: 15
T:44 ( 8193) P:99 I:23000 C: 26038 Min: 2 Act: 3 Avg: 3 Max: 11
T:45 ( 8194) P:99 I:23500 C: 25484 Min: 2 Act: 3 Avg: 3 Max: 13
T:46 ( 8195) P:99 I:24000 C: 24953 Min: 2 Act: 4 Avg: 3 Max: 10
T:47 ( 8196) P:99 I:24500 C: 24444 Min: 2 Act: 5 Avg: 3 Max: 11
T:48 ( 8197) P:99 I:25000 C: 23955 Min: 2 Act: 3 Avg: 3 Max: 11
T:49 ( 8198) P:99 I:25500 C: 23485 Min: 3 Act: 5 Avg: 5 Max: 15
T:50 ( 8199) P:99 I:26000 C: 23033 Min: 2 Act: 5 Avg: 3 Max: 12
T:51 ( 8200) P:99 I:26500 C: 22599 Min: 2 Act: 4 Avg: 3 Max: 12
T:52 ( 8201) P:99 I:27000 C: 22180 Min: 2 Act: 4 Avg: 3 Max: 10
T:53 ( 8202) P:99 I:27500 C: 21777 Min: 2 Act: 4 Avg: 3 Max: 11
T:54 ( 8203) P:99 I:28000 C: 21388 Min: 2 Act: 3 Avg: 3 Max: 11
T:55 ( 8204) P:99 I:28500 C: 21012 Min: 2 Act: 4 Avg: 3 Max: 11
T:56 ( 8205) P:99 I:29000 C: 20650 Min: 2 Act: 4 Avg: 3 Max: 10
T:57 ( 8206) P:99 I:29500 C: 20300 Min: 2 Act: 4 Avg: 3 Max: 11
T:58 ( 8207) P:99 I:30000 C: 19961 Min: 2 Act: 2 Avg: 3 Max: 10
T:59 ( 8208) P:99 I:30500 C: 19634 Min: 2 Act: 4 Avg: 3 Max: 11
T:60 ( 8209) P:99 I:31000 C: 19317 Min: 2 Act: 4 Avg: 3 Max: 15
T:61 ( 8210) P:99 I:31500 C: 19011 Min: 2 Act: 4 Avg: 3 Max: 10
T:62 ( 8211) P:99 I:32000 C: 18713 Min: 2 Act: 2 Avg: 2 Max: 10
T:63 ( 8212) P:99 I:32500 C: 18425 Min: 2 Act: 3 Avg: 2 Max: 12
Hrmph, hiccups don't show up in cyclictest numbers.
Repeating same exact test with folded patch re-applied..
T: 0 (12130) P:99 I:1000 C: 599001 Min: 1 Act: 2 Avg: 1 Max: 6
T: 1 (12131) P:99 I:1500 C: 399333 Min: 1 Act: 1 Avg: 1 Max: 5
T: 2 (12132) P:99 I:2000 C: 299498 Min: 1 Act: 1 Avg: 1 Max: 4
T: 3 (12133) P:99 I:2500 C: 239598 Min: 1 Act: 2 Avg: 1 Max: 4
T: 4 (12134) P:99 I:3000 C: 199664 Min: 1 Act: 1 Avg: 1 Max: 4
T: 5 (12135) P:99 I:3500 C: 171139 Min: 1 Act: 1 Avg: 1 Max: 4
T: 6 (12136) P:99 I:4000 C: 149746 Min: 1 Act: 2 Avg: 1 Max: 4
T: 7 (12137) P:99 I:4500 C: 133107 Min: 1 Act: 1 Avg: 1 Max: 5
T: 8 (12138) P:99 I:5000 C: 119796 Min: 1 Act: 4 Avg: 2 Max: 6
T: 9 (12139) P:99 I:5500 C: 108905 Min: 1 Act: 3 Avg: 2 Max: 6
T:10 (12140) P:99 I:6000 C: 99829 Min: 1 Act: 2 Avg: 2 Max: 6
T:11 (12141) P:99 I:6500 C: 92149 Min: 1 Act: 2 Avg: 2 Max: 7
T:12 (12142) P:99 I:7000 C: 85567 Min: 1 Act: 3 Avg: 2 Max: 5
T:13 (12143) P:99 I:7500 C: 79862 Min: 1 Act: 3 Avg: 2 Max: 8
T:14 (12144) P:99 I:8000 C: 74870 Min: 1 Act: 2 Avg: 2 Max: 5
T:15 (12145) P:99 I:8500 C: 70466 Min: 1 Act: 2 Avg: 2 Max: 11
T:16 (12146) P:99 I:9000 C: 66551 Min: 1 Act: 3 Avg: 2 Max: 6
T:17 (12147) P:99 I:9500 C: 63048 Min: 1 Act: 2 Avg: 2 Max: 6
T:18 (12148) P:99 I:10000 C: 59895 Min: 2 Act: 4 Avg: 3 Max: 7
T:19 (12149) P:99 I:10500 C: 57043 Min: 1 Act: 3 Avg: 2 Max: 5
T:20 (12150) P:99 I:11000 C: 54450 Min: 1 Act: 3 Avg: 2 Max: 5
T:21 (12151) P:99 I:11500 C: 52082 Min: 1 Act: 3 Avg: 2 Max: 5
T:22 (12152) P:99 I:12000 C: 49912 Min: 1 Act: 3 Avg: 2 Max: 6
T:23 (12153) P:99 I:12500 C: 47915 Min: 1 Act: 2 Avg: 2 Max: 5
T:24 (12154) P:99 I:13000 C: 46072 Min: 2 Act: 4 Avg: 3 Max: 8
T:25 (12155) P:99 I:13500 C: 44365 Min: 1 Act: 3 Avg: 3 Max: 5
T:26 (12156) P:99 I:14000 C: 42781 Min: 2 Act: 3 Avg: 2 Max: 6
T:27 (12157) P:99 I:14500 C: 41305 Min: 2 Act: 3 Avg: 3 Max: 6
T:28 (12158) P:99 I:15000 C: 39928 Min: 1 Act: 3 Avg: 2 Max: 6
T:29 (12159) P:99 I:15500 C: 38640 Min: 2 Act: 3 Avg: 3 Max: 6
T:30 (12160) P:99 I:16000 C: 37432 Min: 1 Act: 3 Avg: 2 Max: 5
T:31 (12161) P:99 I:16500 C: 36298 Min: 1 Act: 3 Avg: 3 Max: 7
T:32 (12162) P:99 I:17000 C: 35230 Min: 2 Act: 4 Avg: 3 Max: 8
T:33 (12163) P:99 I:17500 C: 34223 Min: 2 Act: 4 Avg: 3 Max: 8
T:34 (12164) P:99 I:18000 C: 33273 Min: 2 Act: 4 Avg: 3 Max: 8
T:35 (12165) P:99 I:18500 C: 32373 Min: 1 Act: 3 Avg: 3 Max: 9
T:36 (12166) P:99 I:19000 C: 31521 Min: 2 Act: 3 Avg: 3 Max: 7
T:37 (12167) P:99 I:19500 C: 30713 Min: 1 Act: 3 Avg: 2 Max: 6
T:38 (12168) P:99 I:20000 C: 29945 Min: 1 Act: 3 Avg: 2 Max: 6
T:39 (12169) P:99 I:20500 C: 29214 Min: 1 Act: 4 Avg: 2 Max: 8
T:40 (12170) P:99 I:21000 C: 28519 Min: 1 Act: 3 Avg: 2 Max: 7
T:41 (12171) P:99 I:21500 C: 27855 Min: 1 Act: 3 Avg: 2 Max: 6
T:42 (12172) P:99 I:22000 C: 27222 Min: 1 Act: 3 Avg: 2 Max: 9
T:43 (12173) P:99 I:22500 C: 26617 Min: 1 Act: 4 Avg: 2 Max: 5
T:44 (12174) P:99 I:23000 C: 26038 Min: 1 Act: 3 Avg: 2 Max: 7
T:45 (12175) P:99 I:23500 C: 25484 Min: 1 Act: 3 Avg: 2 Max: 8
T:46 (12176) P:99 I:24000 C: 24953 Min: 1 Act: 3 Avg: 2 Max: 7
T:47 (12177) P:99 I:24500 C: 24444 Min: 1 Act: 2 Avg: 3 Max: 5
T:48 (12178) P:99 I:25000 C: 23955 Min: 1 Act: 3 Avg: 2 Max: 6
T:49 (12179) P:99 I:25500 C: 23485 Min: 2 Act: 3 Avg: 2 Max: 7
T:50 (12180) P:99 I:26000 C: 23033 Min: 1 Act: 2 Avg: 2 Max: 5
T:51 (12181) P:99 I:26500 C: 22599 Min: 1 Act: 3 Avg: 2 Max: 5
T:52 (12182) P:99 I:27000 C: 22180 Min: 1 Act: 3 Avg: 2 Max: 6
T:53 (12183) P:99 I:27500 C: 21777 Min: 1 Act: 3 Avg: 2 Max: 5
T:54 (12184) P:99 I:28000 C: 21388 Min: 1 Act: 3 Avg: 2 Max: 6
T:55 (12185) P:99 I:28500 C: 21012 Min: 1 Act: 4 Avg: 2 Max: 8
T:56 (12186) P:99 I:29000 C: 20650 Min: 1 Act: 3 Avg: 2 Max: 6
T:57 (12187) P:99 I:29500 C: 20300 Min: 2 Act: 3 Avg: 2 Max: 6
T:58 (12188) P:99 I:30000 C: 19961 Min: 1 Act: 3 Avg: 2 Max: 6
T:59 (12189) P:99 I:30500 C: 19634 Min: 1 Act: 3 Avg: 3 Max: 7
T:60 (12190) P:99 I:31000 C: 19317 Min: 2 Act: 3 Avg: 2 Max: 7
T:61 (12191) P:99 I:31500 C: 19011 Min: 2 Act: 3 Avg: 2 Max: 7
T:62 (12192) P:99 I:32000 C: 18713 Min: 2 Act: 3 Avg: 2 Max: 5
T:63 (12193) P:99 I:32500 C: 18425 Min: 1 Act: 3 Avg: 2 Max: 5
No hiccups after startup, zero, nada. Whatever. Cyclictest is happier,
jitter test is finally a happy camper, so I'm a happy camper too.
-Mike
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC][PATCH 0/3] update to cpupri algorithm
2011-08-01 18:54 ` Steven Rostedt
2011-08-01 19:06 ` Mike Galbraith
@ 2011-08-02 8:59 ` Mike Galbraith
1 sibling, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2011-08-02 8:59 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Peter Zijlstra, RT
[-- Attachment #1: Type: text/plain, Size: 724 bytes --]
On Mon, 2011-08-01 at 14:54 -0400, Steven Rostedt wrote:
> On Mon, 2011-08-01 at 20:41 +0200, Mike Galbraith wrote:
>
> > > Thanks for testing! I'll redo the patches to remove the logging, and
> > > send them to you again. Could you return back a 'Tested-by' tag
> > > afterward.
> >
> > (I did the logging removal, the posted numbers were that, but..)
>
> Could you merge the two patches and diff them against the one I posted
> privately. Just to make sure that you really tested the one I plan on
> giving to Mainline.
Zero difference, but then it's the same guy wedging patches. Attached
is the result of merging your folded patch, then applying it to virgin
33-rt git repo to generate a pretty=email diff.
-Mike
[-- Attachment #2: sched-cpupri-Remove-the-vec-lock-2.6.33.9-rt31 --]
[-- Type: application/mbox, Size: 11343 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2011-08-02 9:00 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-29 15:13 [RFC][PATCH 0/3] update to cpupri algorithm Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 1/3] cpupri: Add profiling Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 2/3] cpupri: Remove vector locks and read entire loop Steven Rostedt
2011-07-29 15:13 ` [RFC][PATCH 3/3] cpupri: Add atomic vector count to speed up loop Steven Rostedt
2011-07-29 15:38 ` [RFC][PATCH 0/3] update to cpupri algorithm Mike Galbraith
2011-07-29 18:24 ` Mike Galbraith
2011-07-30 7:12 ` Mike Galbraith
2011-07-30 8:16 ` Mike Galbraith
2011-07-30 9:19 ` Mike Galbraith
2011-08-01 14:18 ` Steven Rostedt
2011-08-01 18:41 ` Mike Galbraith
2011-08-01 18:54 ` Steven Rostedt
2011-08-01 19:06 ` Mike Galbraith
2011-08-02 8:59 ` Mike Galbraith
2011-08-02 8:46 ` Mike Galbraith
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).