* [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency
@ 2010-04-05 19:33 Mike Chan
2010-04-05 19:52 ` Paul Menage
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Mike Chan @ 2010-04-05 19:33 UTC (permalink / raw)
Cc: menage, balbir, cpufreq, linux-kernel, Mike Chan
New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled.
cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported
in nano-seconds
We do not know the cpufreq table size at compile time.
So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced,
to determine the cpufreq table per-cpu in the cpuacct struct.
Signed-off-by: Mike Chan <mike@android.com>
---
init/Kconfig | 5 +++
kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 104 insertions(+), 0 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index eb77e8c..e1e86df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -534,6 +534,11 @@ config CGROUP_CPUACCT
Provides a simple Resource Controller for monitoring the
total CPU consumed by the tasks in a cgroup.
+config CPUACCT_CPUFREQ_TABLE_MAX
+ int "Max CPUFREQ table size"
+ depends on CGROUP_CPUACCT && CPU_FREQ_TABLE
+ default 32
+
config RESOURCE_COUNTERS
bool "Resource counters"
help
diff --git a/kernel/sched.c b/kernel/sched.c
index 528a105..a0b56b5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <linux/cpufreq.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = {
* (balbir@in.ibm.com).
*/
+#ifdef CONFIG_CPU_FREQ_STAT
+/* The alloc_percpu macro uses typeof so we must define a type here. */
+typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t;
+#endif
+
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
@@ -8824,6 +8830,10 @@ struct cpuacct {
u64 __percpu *cpuusage;
struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
struct cpuacct *parent;
+#ifdef CONFIG_CPU_FREQ_STAT
+ /* cpufreq_usage is a pointer to an array of u64-types on every cpu */
+ cpufreq_usage_t *cpufreq_usage;
+#endif
};
struct cgroup_subsys cpuacct_subsys;
@@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create(
if (!ca->cpuusage)
goto out_free_ca;
+#ifdef CONFIG_CPU_FREQ_STAT
+ ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t);
+#endif
+
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
if (percpu_counter_init(&ca->cpustat[i], 0))
goto out_free_counters;
@@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
kfree(ca);
}
+#ifdef CONFIG_CPU_FREQ_STAT
+static int cpufreq_index;
+static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ int ret;
+ struct cpufreq_policy *policy;
+ struct cpufreq_freqs *freq = data;
+ struct cpufreq_frequency_table *table;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ /* Update cpufreq_index with current speed */
+ table = cpufreq_frequency_get_table(freq->cpu);
+ policy = cpufreq_cpu_get(freq->cpu);
+ ret = cpufreq_frequency_table_target(policy, table,
+ cpufreq_quick_get(freq->cpu),
+ CPUFREQ_RELATION_L, &cpufreq_index);
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+
+static struct notifier_block cpufreq_notifier = {
+ .notifier_call = cpuacct_cpufreq_notify,
+};
+
+static __init int cpuacct_init(void)
+{
+ return cpufreq_register_notifier(&cpufreq_notifier,
+ CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+module_init(cpuacct_init);
+
+static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ int i;
+ unsigned int cpu;
+ char buf[32];
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpufreq_frequency_table *table =
+ cpufreq_frequency_get_table(smp_processor_id());
+
+ for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ u64 total = 0;
+
+ if (table[i].frequency == CPUFREQ_ENTRY_INVALID)
+ continue;
+
+ for_each_present_cpu(cpu) {
+ cpufreq_usage_t *cpufreq_usage;
+
+ cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu);
+ table = cpufreq_frequency_get_table(cpu);
+
+ total += cpufreq_usage->usage[i];
+ }
+
+ snprintf(buf, sizeof(buf), "%d", table[i].frequency);
+ cb->fill(cb, buf, total);
+ }
+
+ return 0;
+}
+#endif
+
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
{
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
@@ -9003,6 +9085,12 @@ static struct cftype files[] = {
.name = "stat",
.read_map = cpuacct_stats_show,
},
+#ifdef CONFIG_CPU_FREQ_STAT
+ {
+ .name = "cpufreq",
+ .read_map = cpuacct_cpufreq_show,
+ },
+#endif
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+#ifdef CONFIG_CPU_FREQ_STAT
+ cpufreq_usage_t *cpufreq_usage =
+ per_cpu_ptr(ca->cpufreq_usage, cpu);
+
+ if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX)
+ printk_once(KERN_WARNING "cpuacct_charge: "
+ "cpufreq_index: %d exceeds max table "
+ "size\n", cpufreq_index);
+ else
+ cpufreq_usage->usage[cpufreq_index] += cputime;
+#endif
*cpuusage += cputime;
}
--
1.7.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-05 19:33 [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency Mike Chan @ 2010-04-05 19:52 ` Paul Menage 2010-04-05 21:02 ` Mike Chan 2010-04-06 2:15 ` Balbir Singh 2010-04-06 2:13 ` Balbir Singh 2010-04-06 17:48 ` Daniel Walker 2 siblings, 2 replies; 10+ messages in thread From: Paul Menage @ 2010-04-05 19:52 UTC (permalink / raw) To: Mike Chan; +Cc: balbir, cpufreq, linux-kernel On Mon, Apr 5, 2010 at 12:33 PM, Mike Chan <mike@android.com> wrote: > New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. > > cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported > in nano-seconds Can you clarify the wording of this (and describe it in the relevant Documentation/... file)? It's not clear. >From the code, it appears that the file reports a breakdown of how much CPU time the cgroup has been consuming at each different CPU frequency level. If so, then you probably want to reword the description to avoid "per-cpu", since that makes it sounds as though it's reporting something, well, "per CPU". Also, what's the motivation here? If it's for power monitoring purposes, might it be simpler to just report a single number, that's the integral of the CPU usage by frequency index (i.e. calculated from the same information that this patch is already gathering in cpuacct_charge()) rather than dumping a whole table on userspace? Paul > > We do not know the cpufreq table size at compile time. > So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, > to determine the cpufreq table per-cpu in the cpuacct struct. > > Signed-off-by: Mike Chan <mike@android.com> > --- > init/Kconfig | 5 +++ > kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 104 insertions(+), 0 deletions(-) > > diff --git a/init/Kconfig b/init/Kconfig > index eb77e8c..e1e86df 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -534,6 +534,11 @@ config CGROUP_CPUACCT > Provides a simple Resource Controller for monitoring the > total CPU consumed by the tasks in a cgroup. > > +config CPUACCT_CPUFREQ_TABLE_MAX > + int "Max CPUFREQ table size" > + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE > + default 32 > + > config RESOURCE_COUNTERS > bool "Resource counters" > help > diff --git a/kernel/sched.c b/kernel/sched.c > index 528a105..a0b56b5 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -71,6 +71,7 @@ > #include <linux/debugfs.h> > #include <linux/ctype.h> > #include <linux/ftrace.h> > +#include <linux/cpufreq.h> > > #include <asm/tlb.h> > #include <asm/irq_regs.h> > @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { > * (balbir@in.ibm.com). > */ > > +#ifdef CONFIG_CPU_FREQ_STAT > +/* The alloc_percpu macro uses typeof so we must define a type here. */ > +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; > +#endif > + > /* track cpu usage of a group of tasks and its child groups */ > struct cpuacct { > struct cgroup_subsys_state css; > @@ -8824,6 +8830,10 @@ struct cpuacct { > u64 __percpu *cpuusage; > struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; > struct cpuacct *parent; > +#ifdef CONFIG_CPU_FREQ_STAT > + /* cpufreq_usage is a pointer to an array of u64-types on every cpu */ > + cpufreq_usage_t *cpufreq_usage; > +#endif > }; > > struct cgroup_subsys cpuacct_subsys; > @@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create( > if (!ca->cpuusage) > goto out_free_ca; > > +#ifdef CONFIG_CPU_FREQ_STAT > + ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t); > +#endif > + > for (i = 0; i < CPUACCT_STAT_NSTATS; i++) > if (percpu_counter_init(&ca->cpustat[i], 0)) > goto out_free_counters; > @@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) > kfree(ca); > } > > +#ifdef CONFIG_CPU_FREQ_STAT > +static int cpufreq_index; > +static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + int ret; > + struct cpufreq_policy *policy; > + struct cpufreq_freqs *freq = data; > + struct cpufreq_frequency_table *table; > + > + if (val != CPUFREQ_POSTCHANGE) > + return 0; > + > + /* Update cpufreq_index with current speed */ > + table = cpufreq_frequency_get_table(freq->cpu); > + policy = cpufreq_cpu_get(freq->cpu); > + ret = cpufreq_frequency_table_target(policy, table, > + cpufreq_quick_get(freq->cpu), > + CPUFREQ_RELATION_L, &cpufreq_index); > + cpufreq_cpu_put(policy); > + return ret; > +} > + > +static struct notifier_block cpufreq_notifier = { > + .notifier_call = cpuacct_cpufreq_notify, > +}; > + > +static __init int cpuacct_init(void) > +{ > + return cpufreq_register_notifier(&cpufreq_notifier, > + CPUFREQ_TRANSITION_NOTIFIER); > +} > + > +module_init(cpuacct_init); > + > +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, > + struct cgroup_map_cb *cb) > +{ > + int i; > + unsigned int cpu; > + char buf[32]; > + struct cpuacct *ca = cgroup_ca(cgrp); > + struct cpufreq_frequency_table *table = > + cpufreq_frequency_get_table(smp_processor_id()); > + > + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { > + u64 total = 0; > + > + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) > + continue; > + > + for_each_present_cpu(cpu) { > + cpufreq_usage_t *cpufreq_usage; > + > + cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu); > + table = cpufreq_frequency_get_table(cpu); > + > + total += cpufreq_usage->usage[i]; > + } > + > + snprintf(buf, sizeof(buf), "%d", table[i].frequency); > + cb->fill(cb, buf, total); > + } > + > + return 0; > +} > +#endif > + > static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) > { > u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); > @@ -9003,6 +9085,12 @@ static struct cftype files[] = { > .name = "stat", > .read_map = cpuacct_stats_show, > }, > +#ifdef CONFIG_CPU_FREQ_STAT > + { > + .name = "cpufreq", > + .read_map = cpuacct_cpufreq_show, > + }, > +#endif > }; > > static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) > @@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) > > for (; ca; ca = ca->parent) { > u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); > +#ifdef CONFIG_CPU_FREQ_STAT > + cpufreq_usage_t *cpufreq_usage = > + per_cpu_ptr(ca->cpufreq_usage, cpu); > + > + if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX) > + printk_once(KERN_WARNING "cpuacct_charge: " > + "cpufreq_index: %d exceeds max table " > + "size\n", cpufreq_index); > + else > + cpufreq_usage->usage[cpufreq_index] += cputime; > +#endif > *cpuusage += cputime; > } > > -- > 1.7.0.1 > > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-05 19:52 ` Paul Menage @ 2010-04-05 21:02 ` Mike Chan 2010-04-06 2:15 ` Balbir Singh 1 sibling, 0 replies; 10+ messages in thread From: Mike Chan @ 2010-04-05 21:02 UTC (permalink / raw) To: Paul Menage; +Cc: balbir, cpufreq, linux-kernel On Mon, Apr 5, 2010 at 12:52 PM, Paul Menage <menage@google.com> wrote: > On Mon, Apr 5, 2010 at 12:33 PM, Mike Chan <mike@android.com> wrote: >> New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. >> >> cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported >> in nano-seconds > > Can you clarify the wording of this (and describe it in the relevant > Documentation/... file)? It's not clear. > > From the code, it appears that the file reports a breakdown of how > much CPU time the cgroup has been consuming at each different CPU > frequency level. If so, then you probably want to reword the > description to avoid "per-cpu", since that makes it sounds as though > it's reporting something, well, "per CPU". > Thanks for the input, I'll clarify in the commit and documentation. > Also, what's the motivation here? If it's for power monitoring > purposes, might it be simpler to just report a single number, that's > the integral of the CPU usage by frequency index (i.e. calculated from > the same information that this patch is already gathering in > cpuacct_charge()) rather than dumping a whole table on userspace? > The motivation is for power monitoring. Exporting an integral is an interesting idea, but this gets tricky since we can't just take the integral of the cpu speed, since each speed has different power measurements, and these are not linear for each cpu speed. We could export power values in the board files for various cpu-frequencies This can also be useful for userspace developers / system admins so they can optimized their cpu utilization and overall cpu speed usage can give insight for cpu max / min speed capping. I liked the integral idea, but I think we lose some useful information that the original patch intended to show. -- Mike > Paul > >> >> We do not know the cpufreq table size at compile time. >> So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, >> to determine the cpufreq table per-cpu in the cpuacct struct. >> >> Signed-off-by: Mike Chan <mike@android.com> >> --- >> init/Kconfig | 5 +++ >> kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 104 insertions(+), 0 deletions(-) >> >> diff --git a/init/Kconfig b/init/Kconfig >> index eb77e8c..e1e86df 100644 >> --- a/init/Kconfig >> +++ b/init/Kconfig >> @@ -534,6 +534,11 @@ config CGROUP_CPUACCT >> Provides a simple Resource Controller for monitoring the >> total CPU consumed by the tasks in a cgroup. >> >> +config CPUACCT_CPUFREQ_TABLE_MAX >> + int "Max CPUFREQ table size" >> + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE >> + default 32 >> + >> config RESOURCE_COUNTERS >> bool "Resource counters" >> help >> diff --git a/kernel/sched.c b/kernel/sched.c >> index 528a105..a0b56b5 100644 >> --- a/kernel/sched.c >> +++ b/kernel/sched.c >> @@ -71,6 +71,7 @@ >> #include <linux/debugfs.h> >> #include <linux/ctype.h> >> #include <linux/ftrace.h> >> +#include <linux/cpufreq.h> >> >> #include <asm/tlb.h> >> #include <asm/irq_regs.h> >> @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { >> * (balbir@in.ibm.com). >> */ >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> +/* The alloc_percpu macro uses typeof so we must define a type here. */ >> +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; >> +#endif >> + >> /* track cpu usage of a group of tasks and its child groups */ >> struct cpuacct { >> struct cgroup_subsys_state css; >> @@ -8824,6 +8830,10 @@ struct cpuacct { >> u64 __percpu *cpuusage; >> struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; >> struct cpuacct *parent; >> +#ifdef CONFIG_CPU_FREQ_STAT >> + /* cpufreq_usage is a pointer to an array of u64-types on every cpu */ >> + cpufreq_usage_t *cpufreq_usage; >> +#endif >> }; >> >> struct cgroup_subsys cpuacct_subsys; >> @@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create( >> if (!ca->cpuusage) >> goto out_free_ca; >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> + ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t); >> +#endif >> + >> for (i = 0; i < CPUACCT_STAT_NSTATS; i++) >> if (percpu_counter_init(&ca->cpustat[i], 0)) >> goto out_free_counters; >> @@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) >> kfree(ca); >> } >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> +static int cpufreq_index; >> +static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val, >> + void *data) >> +{ >> + int ret; >> + struct cpufreq_policy *policy; >> + struct cpufreq_freqs *freq = data; >> + struct cpufreq_frequency_table *table; >> + >> + if (val != CPUFREQ_POSTCHANGE) >> + return 0; >> + >> + /* Update cpufreq_index with current speed */ >> + table = cpufreq_frequency_get_table(freq->cpu); >> + policy = cpufreq_cpu_get(freq->cpu); >> + ret = cpufreq_frequency_table_target(policy, table, >> + cpufreq_quick_get(freq->cpu), >> + CPUFREQ_RELATION_L, &cpufreq_index); >> + cpufreq_cpu_put(policy); >> + return ret; >> +} >> + >> +static struct notifier_block cpufreq_notifier = { >> + .notifier_call = cpuacct_cpufreq_notify, >> +}; >> + >> +static __init int cpuacct_init(void) >> +{ >> + return cpufreq_register_notifier(&cpufreq_notifier, >> + CPUFREQ_TRANSITION_NOTIFIER); >> +} >> + >> +module_init(cpuacct_init); >> + >> +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, >> + struct cgroup_map_cb *cb) >> +{ >> + int i; >> + unsigned int cpu; >> + char buf[32]; >> + struct cpuacct *ca = cgroup_ca(cgrp); >> + struct cpufreq_frequency_table *table = >> + cpufreq_frequency_get_table(smp_processor_id()); >> + >> + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { >> + u64 total = 0; >> + >> + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) >> + continue; >> + >> + for_each_present_cpu(cpu) { >> + cpufreq_usage_t *cpufreq_usage; >> + >> + cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu); >> + table = cpufreq_frequency_get_table(cpu); >> + >> + total += cpufreq_usage->usage[i]; >> + } >> + >> + snprintf(buf, sizeof(buf), "%d", table[i].frequency); >> + cb->fill(cb, buf, total); >> + } >> + >> + return 0; >> +} >> +#endif >> + >> static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) >> { >> u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); >> @@ -9003,6 +9085,12 @@ static struct cftype files[] = { >> .name = "stat", >> .read_map = cpuacct_stats_show, >> }, >> +#ifdef CONFIG_CPU_FREQ_STAT >> + { >> + .name = "cpufreq", >> + .read_map = cpuacct_cpufreq_show, >> + }, >> +#endif >> }; >> >> static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) >> @@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) >> >> for (; ca; ca = ca->parent) { >> u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); >> +#ifdef CONFIG_CPU_FREQ_STAT >> + cpufreq_usage_t *cpufreq_usage = >> + per_cpu_ptr(ca->cpufreq_usage, cpu); >> + >> + if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX) >> + printk_once(KERN_WARNING "cpuacct_charge: " >> + "cpufreq_index: %d exceeds max table " >> + "size\n", cpufreq_index); >> + else >> + cpufreq_usage->usage[cpufreq_index] += cputime; >> +#endif >> *cpuusage += cputime; >> } >> >> -- >> 1.7.0.1 >> >> > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-05 19:52 ` Paul Menage 2010-04-05 21:02 ` Mike Chan @ 2010-04-06 2:15 ` Balbir Singh 2010-04-06 2:40 ` Mike Chan 1 sibling, 1 reply; 10+ messages in thread From: Balbir Singh @ 2010-04-06 2:15 UTC (permalink / raw) To: Paul Menage; +Cc: Mike Chan, cpufreq, linux-kernel * menage@google.com <menage@google.com> [2010-04-05 12:52:57]: > On Mon, Apr 5, 2010 at 12:33 PM, Mike Chan <mike@android.com> wrote: > > New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. > > > > cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported > > in nano-seconds > > Can you clarify the wording of this (and describe it in the relevant > Documentation/... file)? It's not clear. > > From the code, it appears that the file reports a breakdown of how > much CPU time the cgroup has been consuming at each different CPU > frequency level. If so, then you probably want to reword the > description to avoid "per-cpu", since that makes it sounds as though > it's reporting something, well, "per CPU". > > Also, what's the motivation here? If it's for power monitoring > purposes, might it be simpler to just report a single number, that's > the integral of the CPU usage by frequency index (i.e. calculated from > the same information that this patch is already gathering in > cpuacct_charge()) rather than dumping a whole table on userspace? As utilization increases, won't the integral quickly overflow? BTW, Mike have you looked at the scaled accounting infrastructure we have in taskstats? -- Three Cheers, Balbir ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-06 2:15 ` Balbir Singh @ 2010-04-06 2:40 ` Mike Chan 0 siblings, 0 replies; 10+ messages in thread From: Mike Chan @ 2010-04-06 2:40 UTC (permalink / raw) To: balbir; +Cc: Paul Menage, cpufreq, linux-kernel On Mon, Apr 5, 2010 at 7:15 PM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > * menage@google.com <menage@google.com> [2010-04-05 12:52:57]: > >> On Mon, Apr 5, 2010 at 12:33 PM, Mike Chan <mike@android.com> wrote: >> > New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. >> > >> > cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported >> > in nano-seconds >> >> Can you clarify the wording of this (and describe it in the relevant >> Documentation/... file)? It's not clear. >> >> From the code, it appears that the file reports a breakdown of how >> much CPU time the cgroup has been consuming at each different CPU >> frequency level. If so, then you probably want to reword the >> description to avoid "per-cpu", since that makes it sounds as though >> it's reporting something, well, "per CPU". >> >> Also, what's the motivation here? If it's for power monitoring >> purposes, might it be simpler to just report a single number, that's >> the integral of the CPU usage by frequency index (i.e. calculated from >> the same information that this patch is already gathering in >> cpuacct_charge()) rather than dumping a whole table on userspace? > > As utilization increases, won't the integral quickly overflow? BTW, > Mike have you looked at the scaled accounting infrastructure we have > in taskstats? > I just looked (thanks for pointed that out, it was new to me, thx!). The problem here is that the accounting scales time based of cpu speed. As it currently stands it will not represent power tracking accurately as the power consumption of running at 1ghz / 2 != 500mhz. We would have to register a weight factor for each speed and those would have to be registered from the board files (at least in the ARM world), since voltage levels can vary across projects. -- Mike > -- > Three Cheers, > Balbir > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-05 19:33 [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency Mike Chan 2010-04-05 19:52 ` Paul Menage @ 2010-04-06 2:13 ` Balbir Singh 2010-04-06 4:14 ` Mike Chan 2010-04-06 17:48 ` Daniel Walker 2 siblings, 1 reply; 10+ messages in thread From: Balbir Singh @ 2010-04-06 2:13 UTC (permalink / raw) To: Mike Chan; +Cc: menage, cpufreq, linux-kernel * Mike Chan <mike@android.com> [2010-04-05 12:33:24]: > New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. > > cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported > in nano-seconds > > We do not know the cpufreq table size at compile time. > So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, > to determine the cpufreq table per-cpu in the cpuacct struct. > > Signed-off-by: Mike Chan <mike@android.com> > --- > init/Kconfig | 5 +++ > kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 104 insertions(+), 0 deletions(-) > > diff --git a/init/Kconfig b/init/Kconfig > index eb77e8c..e1e86df 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -534,6 +534,11 @@ config CGROUP_CPUACCT > Provides a simple Resource Controller for monitoring the > total CPU consumed by the tasks in a cgroup. > > +config CPUACCT_CPUFREQ_TABLE_MAX > + int "Max CPUFREQ table size" > + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE > + default 32 > + > config RESOURCE_COUNTERS > bool "Resource counters" > help > diff --git a/kernel/sched.c b/kernel/sched.c > index 528a105..a0b56b5 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -71,6 +71,7 @@ > #include <linux/debugfs.h> > #include <linux/ctype.h> > #include <linux/ftrace.h> > +#include <linux/cpufreq.h> > > #include <asm/tlb.h> > #include <asm/irq_regs.h> > @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { > * (balbir@in.ibm.com). > */ > > +#ifdef CONFIG_CPU_FREQ_STAT > +/* The alloc_percpu macro uses typeof so we must define a type here. */ > +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; > +#endif > + > /* track cpu usage of a group of tasks and its child groups */ > struct cpuacct { > struct cgroup_subsys_state css; > @@ -8824,6 +8830,10 @@ struct cpuacct { > u64 __percpu *cpuusage; > struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; > struct cpuacct *parent; > +#ifdef CONFIG_CPU_FREQ_STAT > + /* cpufreq_usage is a pointer to an array of u64-types on every cpu */ > + cpufreq_usage_t *cpufreq_usage; > +#endif > }; > > struct cgroup_subsys cpuacct_subsys; > @@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create( > if (!ca->cpuusage) > goto out_free_ca; > > +#ifdef CONFIG_CPU_FREQ_STAT > + ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t); > +#endif > + > for (i = 0; i < CPUACCT_STAT_NSTATS; i++) > if (percpu_counter_init(&ca->cpustat[i], 0)) > goto out_free_counters; > @@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) > kfree(ca); > } > > +#ifdef CONFIG_CPU_FREQ_STAT > +static int cpufreq_index; > +static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + int ret; > + struct cpufreq_policy *policy; > + struct cpufreq_freqs *freq = data; > + struct cpufreq_frequency_table *table; > + > + if (val != CPUFREQ_POSTCHANGE) > + return 0; > + > + /* Update cpufreq_index with current speed */ > + table = cpufreq_frequency_get_table(freq->cpu); > + policy = cpufreq_cpu_get(freq->cpu); > + ret = cpufreq_frequency_table_target(policy, table, > + cpufreq_quick_get(freq->cpu), Since you've already done a cpufreq_cpu_get, can't you directly dereference policy->cur here? > + CPUFREQ_RELATION_L, &cpufreq_index); The code is unclear > + cpufreq_cpu_put(policy); > + return ret; > +} > + > +static struct notifier_block cpufreq_notifier = { > + .notifier_call = cpuacct_cpufreq_notify, > +}; > + > +static __init int cpuacct_init(void) > +{ > + return cpufreq_register_notifier(&cpufreq_notifier, > + CPUFREQ_TRANSITION_NOTIFIER); > +} > + > +module_init(cpuacct_init); > + > +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, > + struct cgroup_map_cb *cb) > +{ > + int i; > + unsigned int cpu; > + char buf[32]; > + struct cpuacct *ca = cgroup_ca(cgrp); > + struct cpufreq_frequency_table *table = > + cpufreq_frequency_get_table(smp_processor_id()); > + > + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { > + u64 total = 0; > + > + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) > + continue; > + > + for_each_present_cpu(cpu) { Why do we care about all present cpus? A comment would be nice, is this an ABI thing? Do we care about data of offline cpus, etc. > + cpufreq_usage_t *cpufreq_usage; > + > + cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu); > + table = cpufreq_frequency_get_table(cpu); > + > + total += cpufreq_usage->usage[i]; > + } > + > + snprintf(buf, sizeof(buf), "%d", table[i].frequency); > + cb->fill(cb, buf, total); > + } > + > + return 0; > +} > +#endif > + > static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) > { > u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); > @@ -9003,6 +9085,12 @@ static struct cftype files[] = { > .name = "stat", > .read_map = cpuacct_stats_show, > }, > +#ifdef CONFIG_CPU_FREQ_STAT > + { > + .name = "cpufreq", > + .read_map = cpuacct_cpufreq_show, > + }, > +#endif > }; > > static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) > @@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) > > for (; ca; ca = ca->parent) { > u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); > +#ifdef CONFIG_CPU_FREQ_STAT > + cpufreq_usage_t *cpufreq_usage = > + per_cpu_ptr(ca->cpufreq_usage, cpu); > + > + if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX) > + printk_once(KERN_WARNING "cpuacct_charge: " > + "cpufreq_index: %d exceeds max table " > + "size\n", cpufreq_index); WARN_ON_ONCE() would be more readable, IMHO > + else > + cpufreq_usage->usage[cpufreq_index] += cputime; > +#endif > *cpuusage += cputime; > } > > -- > 1.7.0.1 > -- Three Cheers, Balbir ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-06 2:13 ` Balbir Singh @ 2010-04-06 4:14 ` Mike Chan 0 siblings, 0 replies; 10+ messages in thread From: Mike Chan @ 2010-04-06 4:14 UTC (permalink / raw) To: balbir; +Cc: menage, cpufreq, linux-kernel On Mon, Apr 5, 2010 at 7:13 PM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > * Mike Chan <mike@android.com> [2010-04-05 12:33:24]: > >> New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. >> >> cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported >> in nano-seconds >> >> We do not know the cpufreq table size at compile time. >> So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, >> to determine the cpufreq table per-cpu in the cpuacct struct. >> >> Signed-off-by: Mike Chan <mike@android.com> >> --- >> init/Kconfig | 5 +++ >> kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 104 insertions(+), 0 deletions(-) >> >> diff --git a/init/Kconfig b/init/Kconfig >> index eb77e8c..e1e86df 100644 >> --- a/init/Kconfig >> +++ b/init/Kconfig >> @@ -534,6 +534,11 @@ config CGROUP_CPUACCT >> Provides a simple Resource Controller for monitoring the >> total CPU consumed by the tasks in a cgroup. >> >> +config CPUACCT_CPUFREQ_TABLE_MAX >> + int "Max CPUFREQ table size" >> + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE >> + default 32 >> + >> config RESOURCE_COUNTERS >> bool "Resource counters" >> help >> diff --git a/kernel/sched.c b/kernel/sched.c >> index 528a105..a0b56b5 100644 >> --- a/kernel/sched.c >> +++ b/kernel/sched.c >> @@ -71,6 +71,7 @@ >> #include <linux/debugfs.h> >> #include <linux/ctype.h> >> #include <linux/ftrace.h> >> +#include <linux/cpufreq.h> >> >> #include <asm/tlb.h> >> #include <asm/irq_regs.h> >> @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { >> * (balbir@in.ibm.com). >> */ >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> +/* The alloc_percpu macro uses typeof so we must define a type here. */ >> +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; >> +#endif >> + >> /* track cpu usage of a group of tasks and its child groups */ >> struct cpuacct { >> struct cgroup_subsys_state css; >> @@ -8824,6 +8830,10 @@ struct cpuacct { >> u64 __percpu *cpuusage; >> struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; >> struct cpuacct *parent; >> +#ifdef CONFIG_CPU_FREQ_STAT >> + /* cpufreq_usage is a pointer to an array of u64-types on every cpu */ >> + cpufreq_usage_t *cpufreq_usage; >> +#endif >> }; >> >> struct cgroup_subsys cpuacct_subsys; >> @@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create( >> if (!ca->cpuusage) >> goto out_free_ca; >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> + ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t); >> +#endif >> + >> for (i = 0; i < CPUACCT_STAT_NSTATS; i++) >> if (percpu_counter_init(&ca->cpustat[i], 0)) >> goto out_free_counters; >> @@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) >> kfree(ca); >> } >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> +static int cpufreq_index; >> +static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val, >> + void *data) >> +{ >> + int ret; >> + struct cpufreq_policy *policy; >> + struct cpufreq_freqs *freq = data; >> + struct cpufreq_frequency_table *table; >> + >> + if (val != CPUFREQ_POSTCHANGE) >> + return 0; >> + >> + /* Update cpufreq_index with current speed */ >> + table = cpufreq_frequency_get_table(freq->cpu); >> + policy = cpufreq_cpu_get(freq->cpu); >> + ret = cpufreq_frequency_table_target(policy, table, >> + cpufreq_quick_get(freq->cpu), > > Since you've already done a cpufreq_cpu_get, can't you directly > dereference policy->cur here? Yes, we could. I thought I would use the cpufreq public api's from the header just incase there were any core-internal changes in the future. Although its probably safe to use policy->cur. > >> + CPUFREQ_RELATION_L, &cpufreq_index); > > The code is unclear > I can put a comment here about this. Basically when selecting a frequency from the freq_table using the cpufreq, incase the exact frequency you are looking for does not exist in the table you can specify "highest speed at or below requested speed" or "lowest speed at or below requested speed", either of these params work here since the speed we are choosing will always exist in the freq_table. >> + cpufreq_cpu_put(policy); >> + return ret; >> +} >> + >> +static struct notifier_block cpufreq_notifier = { >> + .notifier_call = cpuacct_cpufreq_notify, >> +}; >> + >> +static __init int cpuacct_init(void) >> +{ >> + return cpufreq_register_notifier(&cpufreq_notifier, >> + CPUFREQ_TRANSITION_NOTIFIER); >> +} >> + >> +module_init(cpuacct_init); >> + >> +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, >> + struct cgroup_map_cb *cb) >> +{ >> + int i; >> + unsigned int cpu; >> + char buf[32]; >> + struct cpuacct *ca = cgroup_ca(cgrp); >> + struct cpufreq_frequency_table *table = >> + cpufreq_frequency_get_table(smp_processor_id()); >> + >> + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { >> + u64 total = 0; >> + >> + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) >> + continue; >> + >> + for_each_present_cpu(cpu) { > > Why do we care about all present cpus? A comment would be nice, is > this an ABI thing? Do we care about data of offline cpus, etc. If we have a system with cpu hotplug and cpu's come online and offline for the lifetime of the system, we want to include their statistics when you dump the file. If you only care about currently online, you can have a case where Chrome has been running on CPU1, and you hotplug CPU1, immediately after you read cpuacct.cpufreq. I can add a comment here. > >> + cpufreq_usage_t *cpufreq_usage; >> + >> + cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu); >> + table = cpufreq_frequency_get_table(cpu); >> + >> + total += cpufreq_usage->usage[i]; >> + } >> + >> + snprintf(buf, sizeof(buf), "%d", table[i].frequency); >> + cb->fill(cb, buf, total); >> + } >> + >> + return 0; >> +} >> +#endif >> + >> static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) >> { >> u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); >> @@ -9003,6 +9085,12 @@ static struct cftype files[] = { >> .name = "stat", >> .read_map = cpuacct_stats_show, >> }, >> +#ifdef CONFIG_CPU_FREQ_STAT >> + { >> + .name = "cpufreq", >> + .read_map = cpuacct_cpufreq_show, >> + }, >> +#endif >> }; >> >> static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) >> @@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) >> >> for (; ca; ca = ca->parent) { >> u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); >> +#ifdef CONFIG_CPU_FREQ_STAT >> + cpufreq_usage_t *cpufreq_usage = >> + per_cpu_ptr(ca->cpufreq_usage, cpu); >> + >> + if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX) >> + printk_once(KERN_WARNING "cpuacct_charge: " >> + "cpufreq_index: %d exceeds max table " >> + "size\n", cpufreq_index); > > WARN_ON_ONCE() would be more readable, IMHO Although I do admit WARN is much easier to spot, WARN gives a stacktrace, which isn't too useful in this case and wastes dmesg buffer. > >> + else >> + cpufreq_usage->usage[cpufreq_index] += cputime; >> +#endif >> *cpuusage += cputime; >> } >> >> -- >> 1.7.0.1 >> > > -- > Three Cheers, > Balbir > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-05 19:33 [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency Mike Chan 2010-04-05 19:52 ` Paul Menage 2010-04-06 2:13 ` Balbir Singh @ 2010-04-06 17:48 ` Daniel Walker 2010-04-06 19:43 ` Mike Chan 2 siblings, 1 reply; 10+ messages in thread From: Daniel Walker @ 2010-04-06 17:48 UTC (permalink / raw) To: Mike Chan; +Cc: menage, balbir, cpufreq, linux-kernel On Mon, 2010-04-05 at 12:33 -0700, Mike Chan wrote: > New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. > > cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported > in nano-seconds > > We do not know the cpufreq table size at compile time. > So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, > to determine the cpufreq table per-cpu in the cpuacct struct. > > Signed-off-by: Mike Chan <mike@android.com> > --- > init/Kconfig | 5 +++ > kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 104 insertions(+), 0 deletions(-) > > diff --git a/init/Kconfig b/init/Kconfig > index eb77e8c..e1e86df 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -534,6 +534,11 @@ config CGROUP_CPUACCT > Provides a simple Resource Controller for monitoring the > total CPU consumed by the tasks in a cgroup. > > +config CPUACCT_CPUFREQ_TABLE_MAX > + int "Max CPUFREQ table size" > + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE > + default 32 > + I'd say make it just a regular define unless you can think of a reason why a non-developer would want to touch this value. > config RESOURCE_COUNTERS > bool "Resource counters" > help > diff --git a/kernel/sched.c b/kernel/sched.c > index 528a105..a0b56b5 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -71,6 +71,7 @@ > #include <linux/debugfs.h> > #include <linux/ctype.h> > #include <linux/ftrace.h> > +#include <linux/cpufreq.h> > > #include <asm/tlb.h> > #include <asm/irq_regs.h> > @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { > * (balbir@in.ibm.com). > */ > > +#ifdef CONFIG_CPU_FREQ_STAT > +/* The alloc_percpu macro uses typeof so we must define a type here. */ > +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; > +#endif you should be able to send it a regular struct . I found this usage, lport->dev_stats = alloc_percpu(struct fcoe_dev_stats); as an example. Daniel ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-06 17:48 ` Daniel Walker @ 2010-04-06 19:43 ` Mike Chan 2010-04-06 19:58 ` Daniel Walker 0 siblings, 1 reply; 10+ messages in thread From: Mike Chan @ 2010-04-06 19:43 UTC (permalink / raw) To: Daniel Walker; +Cc: menage, balbir, cpufreq, linux-kernel On Tue, Apr 6, 2010 at 10:48 AM, Daniel Walker <dwalker@fifo99.com> wrote: > On Mon, 2010-04-05 at 12:33 -0700, Mike Chan wrote: >> New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. >> >> cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported >> in nano-seconds >> >> We do not know the cpufreq table size at compile time. >> So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, >> to determine the cpufreq table per-cpu in the cpuacct struct. >> >> Signed-off-by: Mike Chan <mike@android.com> >> --- >> init/Kconfig | 5 +++ >> kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 104 insertions(+), 0 deletions(-) >> >> diff --git a/init/Kconfig b/init/Kconfig >> index eb77e8c..e1e86df 100644 >> --- a/init/Kconfig >> +++ b/init/Kconfig >> @@ -534,6 +534,11 @@ config CGROUP_CPUACCT >> Provides a simple Resource Controller for monitoring the >> total CPU consumed by the tasks in a cgroup. >> >> +config CPUACCT_CPUFREQ_TABLE_MAX >> + int "Max CPUFREQ table size" >> + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE >> + default 32 >> + > > I'd say make it just a regular define unless you can think of a reason > why a non-developer would want to touch this value. I originally thought about doing this, but my concern here is for future (or existing) cpu's that have more than 32 speed stepping. This will have to be updated as new cpu's are supported in mainline (if they exceed the max). Which might be acceptable or not. Ideally it would be nice to be able to pull the table size from the board-file, or determine the size at run-time instead of compile time. -- Mike > >> config RESOURCE_COUNTERS >> bool "Resource counters" >> help >> diff --git a/kernel/sched.c b/kernel/sched.c >> index 528a105..a0b56b5 100644 >> --- a/kernel/sched.c >> +++ b/kernel/sched.c >> @@ -71,6 +71,7 @@ >> #include <linux/debugfs.h> >> #include <linux/ctype.h> >> #include <linux/ftrace.h> >> +#include <linux/cpufreq.h> >> >> #include <asm/tlb.h> >> #include <asm/irq_regs.h> >> @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { >> * (balbir@in.ibm.com). >> */ >> >> +#ifdef CONFIG_CPU_FREQ_STAT >> +/* The alloc_percpu macro uses typeof so we must define a type here. */ >> +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; >> +#endif > > you should be able to send it a regular struct . I found this usage, > > lport->dev_stats = alloc_percpu(struct fcoe_dev_stats); > > as an example. > True, I shouldn't need the typedef. Actually I think I can get away with getting rid of this statement all together and just define an anonymous struct in the alloc_percpu function. ca->cpufreq_usage = alloc_percpu(struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; }); > Daniel > > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency 2010-04-06 19:43 ` Mike Chan @ 2010-04-06 19:58 ` Daniel Walker 0 siblings, 0 replies; 10+ messages in thread From: Daniel Walker @ 2010-04-06 19:58 UTC (permalink / raw) To: Mike Chan; +Cc: menage, balbir, cpufreq, linux-kernel On Tue, 2010-04-06 at 12:43 -0700, Mike Chan wrote: > On Tue, Apr 6, 2010 at 10:48 AM, Daniel Walker <dwalker@fifo99.com> wrote: > > On Mon, 2010-04-05 at 12:33 -0700, Mike Chan wrote: > >> New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. > >> > >> cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported > >> in nano-seconds > >> > >> We do not know the cpufreq table size at compile time. > >> So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, > >> to determine the cpufreq table per-cpu in the cpuacct struct. > >> > >> Signed-off-by: Mike Chan <mike@android.com> > >> --- > >> init/Kconfig | 5 +++ > >> kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > >> 2 files changed, 104 insertions(+), 0 deletions(-) > >> > >> diff --git a/init/Kconfig b/init/Kconfig > >> index eb77e8c..e1e86df 100644 > >> --- a/init/Kconfig > >> +++ b/init/Kconfig > >> @@ -534,6 +534,11 @@ config CGROUP_CPUACCT > >> Provides a simple Resource Controller for monitoring the > >> total CPU consumed by the tasks in a cgroup. > >> > >> +config CPUACCT_CPUFREQ_TABLE_MAX > >> + int "Max CPUFREQ table size" > >> + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE > >> + default 32 > >> + > > > > I'd say make it just a regular define unless you can think of a reason > > why a non-developer would want to touch this value. > > I originally thought about doing this, but my concern here is for > future (or existing) cpu's that have more than 32 speed stepping. This > will have to be updated as new cpu's are supported in mainline (if > they exceed the max). Which might be acceptable or not. > > Ideally it would be nice to be able to pull the table size from the > board-file, or determine the size at run-time instead of compile time. You should try to discover any current cpu's that have more than 32, if they exist. (32 seems like a lot) Then it should be ok to just allow others to patch up the value as needed, when new cpu's get added with more. There's nothing wrong with that practice AFAIK .. Daniel ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2010-04-06 19:58 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-04-05 19:33 [RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency Mike Chan 2010-04-05 19:52 ` Paul Menage 2010-04-05 21:02 ` Mike Chan 2010-04-06 2:15 ` Balbir Singh 2010-04-06 2:40 ` Mike Chan 2010-04-06 2:13 ` Balbir Singh 2010-04-06 4:14 ` Mike Chan 2010-04-06 17:48 ` Daniel Walker 2010-04-06 19:43 ` Mike Chan 2010-04-06 19:58 ` Daniel Walker
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox