* [PATCH] tracing, perf: add more power related events @ 2010-09-07 7:21 Jean Pihet 2010-09-07 8:01 ` Jean Pihet 2010-09-08 6:53 ` Ingo Molnar 0 siblings, 2 replies; 47+ messages in thread From: Jean Pihet @ 2010-09-07 7:21 UTC (permalink / raw) To: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown Cc: discuss, linux-pm, linux-omap Hi, Here is a patch proposal for adding new trace events for power management. Note: thread restarted after the initial discussions on LKML. Jean >From 6768b88e8133129fa847dd7a95dc6dd17c0662d2 Mon Sep 17 00:00:00 2001 From: Jean Pihet <jean.pihet@newoldbits.com> Date: Tue, 7 Sep 2010 09:12:48 +0200 Subject: [PATCH] [PATCH] tracing, perf: add more power related events This patch adds new generic events for dynamic power management tracing: - clock events class: used for clock enable/disable and for clock rate change, - power_domain events class: used for power domains transitions. The OMAP architecture is using the new events for PM debugging, however the new events are made generic enough to be used by all platforms. Signed-off-by: Jean Pihet <j-pihet@ti.com> --- arch/arm/mach-omap2/cpuidle34xx.c | 2 + arch/arm/mach-omap2/pm34xx.c | 4 ++ arch/arm/mach-omap2/powerdomain.c | 3 + arch/arm/plat-omap/clock.c | 13 ++++- arch/arm/plat-omap/cpu-omap.c | 5 ++- include/trace/events/power.h | 90 +++++++++++++++++++++++++++++++++++- 6 files changed, 110 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 3d3d035..6113bd9 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -24,6 +24,7 @@ #include <linux/sched.h> #include <linux/cpuidle.h> +#include <trace/events/power.h> #include <plat/prcm.h> #include <plat/irqs.h> @@ -130,6 +131,7 @@ static int omap3_enter_idle(struct cpuidle_device *dev, local_irq_disable(); local_fiq_disable(); + trace_power_start(POWER_CSTATE, cx->type, smp_processor_id()); pwrdm_set_next_pwrst(mpu_pd, mpu_state); pwrdm_set_next_pwrst(core_pd, core_state); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index f25bc3d..7bf8a87 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -28,6 +28,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/slab.h> +#include <trace/events/power.h> #include <plat/sram.h> #include <plat/clockdomain.h> @@ -588,6 +589,7 @@ static void omap3_pm_idle(void) if (omap_irq_pending() || need_resched()) goto out; + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); omap_sram_idle(); out: @@ -628,6 +630,8 @@ static int omap3_pm_suspend(void) omap2_pm_wakeup_on_timer(wakeup_timer_seconds, wakeup_timer_milliseconds); + trace_power_start(POWER_SSTATE, 1, smp_processor_id()); + /* Read current next_pwrsts */ list_for_each_entry(pwrst, &pwrst_list, node) pwrst->saved_state = pwrdm_read_next_pwrst(pwrst->pwrdm); diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 6527ec3..73cbe9a 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/io.h> +#include <trace/events/power.h> #include <asm/atomic.h> @@ -440,6 +441,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) pr_debug("powerdomain: setting next powerstate for %s to %0x\n", pwrdm->name, pwrst); + trace_power_domain_target(pwrdm->name, pwrst, smp_processor_id()); + prm_rmw_mod_reg_bits(OMAP_POWERSTATE_MASK, (pwrst << OMAP_POWERSTATE_SHIFT), pwrdm->prcm_offs, pwrstctrl_reg_offs); diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index 7190cbd..d6518f5 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/debugfs.h> #include <linux/io.h> +#include <trace/events/power.h> #include <plat/clock.h> @@ -43,8 +44,10 @@ int clk_enable(struct clk *clk) return -EINVAL; spin_lock_irqsave(&clockfw_lock, flags); - if (arch_clock->clk_enable) + if (arch_clock->clk_enable) { + trace_clock_enable(clk->name, 1, smp_processor_id()); ret = arch_clock->clk_enable(clk); + } spin_unlock_irqrestore(&clockfw_lock, flags); return ret; @@ -66,8 +69,10 @@ void clk_disable(struct clk *clk) goto out; } - if (arch_clock->clk_disable) + if (arch_clock->clk_disable) { + trace_clock_disable(clk->name, 0, smp_processor_id()); arch_clock->clk_disable(clk); + } out: spin_unlock_irqrestore(&clockfw_lock, flags); @@ -120,8 +125,10 @@ int clk_set_rate(struct clk *clk, unsigned long rate) return ret; spin_lock_irqsave(&clockfw_lock, flags); - if (arch_clock->clk_set_rate) + if (arch_clock->clk_set_rate) { + trace_clock_set_rate(clk->name, rate, smp_processor_id()); ret = arch_clock->clk_set_rate(clk, rate); + } if (ret == 0) { if (clk->recalc) clk->rate = clk->recalc(clk); diff --git a/arch/arm/plat-omap/cpu-omap.c b/arch/arm/plat-omap/cpu-omap.c index df08829..cc4e41f 100644 --- a/arch/arm/plat-omap/cpu-omap.c +++ b/arch/arm/plat-omap/cpu-omap.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> +#include <trace/events/power.h> #include <mach/hardware.h> #include <plat/clock.h> @@ -116,8 +117,10 @@ static int omap_target(struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); #elif defined(CONFIG_ARCH_OMAP3) && !defined(CONFIG_OMAP_PM_NONE) freq = target_freq * 1000; - if (opp_find_freq_ceil(mpu_dev, &freq)) + if (opp_find_freq_ceil(mpu_dev, &freq)) { + trace_power_frequency(POWER_PSTATE, freq, smp_processor_id()); omap_pm_cpu_set_freq(freq); + } #endif return ret; } diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e..286784d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -10,12 +10,17 @@ #ifndef _TRACE_POWER_ENUM_ #define _TRACE_POWER_ENUM_ enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ }; #endif +/* + * The power events are used for cpuidle & suspend (power_start, power_end) + * and for cpufreq (power_frequency) + */ DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), @@ -70,6 +75,85 @@ TRACE_EVENT(power_end, ); +/* + * The clock events are used for clock enable/disable and for + * clock rate change + */ +DECLARE_EVENT_CLASS(clock, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; + ), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(clock, clock_enable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_disable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_set_rate, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +/* + * The power domain events are used for power domains transitions + */ +DECLARE_EVENT_CLASS(power_domain, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; +), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(power_domain, power_domain_target, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ -- 1.7.1 ^ permalink raw reply related [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-07 7:21 [PATCH] tracing, perf: add more power related events Jean Pihet @ 2010-09-07 8:01 ` Jean Pihet 2010-09-08 6:53 ` Ingo Molnar 1 sibling, 0 replies; 47+ messages in thread From: Jean Pihet @ 2010-09-07 8:01 UTC (permalink / raw) To: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown Cc: discuss, linux-pm, linux-omap Here is some more information about the patch: Initial discussion on LKML: cf. http://marc.info/?l=linux-kernel&m=128195697205096&w=4, PM debug and profiling wiki page with rationale, todo, patches, tools screenshots ...: http://omappedia.org/wiki/Power_Management_Debug_and_Profiling On Tue, Sep 7, 2010 at 9:21 AM, Jean Pihet <jean.pihet@newoldbits.com> wrote: > Hi, > > Here is a patch proposal for adding new trace events for power management. > Note: thread restarted after the initial discussions on LKML. Sorry the thread did not get restarted because I am using the same e-mail subject. Jean ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-07 7:21 [PATCH] tracing, perf: add more power related events Jean Pihet 2010-09-07 8:01 ` Jean Pihet @ 2010-09-08 6:53 ` Ingo Molnar 2010-09-09 7:15 ` Jean Pihet 1 sibling, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2010-09-08 6:53 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap * Jean Pihet <jean.pihet@newoldbits.com> wrote: > Hi, > > Here is a patch proposal for adding new trace events for power management. > Note: thread restarted after the initial discussions on LKML. Also mind including the ACPI tracepoints we talked about? That way a lot more people could test it, etc. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-08 6:53 ` Ingo Molnar @ 2010-09-09 7:15 ` Jean Pihet 2010-09-09 7:54 ` Ingo Molnar 0 siblings, 1 reply; 47+ messages in thread From: Jean Pihet @ 2010-09-09 7:15 UTC (permalink / raw) To: Ingo Molnar Cc: Thomas Renninger, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap On Wed, Sep 8, 2010 at 8:53 AM, Ingo Molnar <mingo@elte.hu> wrote: > > * Jean Pihet <jean.pihet@newoldbits.com> wrote: > >> Hi, >> >> Here is a patch proposal for adding new trace events for power management. >> Note: thread restarted after the initial discussions on LKML. > > Also mind including the ACPI tracepoints we talked about? That way a lot > more people could test it, etc. I think the ACPI tracepoints can be added on top of the proposed patch. Is that ok? Jean ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-09 7:15 ` Jean Pihet @ 2010-09-09 7:54 ` Ingo Molnar 2010-09-15 15:45 ` Jean Pihet 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2010-09-09 7:54 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap * Jean Pihet <jean.pihet@newoldbits.com> wrote: > On Wed, Sep 8, 2010 at 8:53 AM, Ingo Molnar <mingo@elte.hu> wrote: > > > > * Jean Pihet <jean.pihet@newoldbits.com> wrote: > > > >> Hi, > >> > >> Here is a patch proposal for adding new trace events for power management. > >> Note: thread restarted after the initial discussions on LKML. > > > > Also mind including the ACPI tracepoints we talked about? That way a lot > > more people could test it, etc. > > I think the ACPI tracepoints can be added on top of the proposed > patch. Is that ok? Yeah - and the OMAP thing can be split up too if the OMAP folks prefer it that way, but we still want to _see_ all the patches in this thread together so that we have a critical mass of people interested in all this ... Thanks, Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-09 7:54 ` Ingo Molnar @ 2010-09-15 15:45 ` Jean Pihet 2010-09-17 13:08 ` Thomas Renninger 0 siblings, 1 reply; 47+ messages in thread From: Jean Pihet @ 2010-09-15 15:45 UTC (permalink / raw) To: Ingo Molnar Cc: Thomas Renninger, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap On Thu, Sep 9, 2010 at 9:54 AM, Ingo Molnar <mingo@elte.hu> wrote: >> I think the ACPI tracepoints can be added on top of the proposed >> patch. Is that ok? > > Yeah - and the OMAP thing can be split up too if the OMAP folks prefer > it that way, but we still want to _see_ all the patches in this thread > together so that we have a critical mass of people interested in all > this ... Any chance to get the patch merged in? What is needed wrt to ACPI support? > > Thanks, > > Ingo Thanks, Jean ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-15 15:45 ` Jean Pihet @ 2010-09-17 13:08 ` Thomas Renninger 2010-09-17 14:05 ` Jean Pihet 0 siblings, 1 reply; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 13:08 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi, I had a quick look at this and it's amazing how broken the whole power event tracing interfaces are. It's not your fault, Jean, they always were and adding your stuff is fine. Some questions, maybe I've overseen something: Why does this event: DEFINE_EVENT(power, power_frequency, exist and takes a C-/P-state, now also an S-state type as argument? It should be named more generic, like: DEFINE_EVENT(power, power_switch, then it could get invoked when any P-/C-/S-/X- state switch happened. What kind of hack is this: TRACE_EVENT(power_end, showing up as: power_end: dummy=65535 What ends here? I know it's a workaround/hack for userspace apps to be able to detect leaving of a sleep state, but how would someone know or how would someone describe this in a proper API documentation? Apropos documentation..., are the power trace events documented somewhere? At least the state should still be passed, then the _start/_end thing can be reused by something else than C-states. I can't see the use of having _start/_end events at all. You are always in a state, having one: power_switch as suggested above, is enough to track everything. Examples: Today's C-state tracking: power_start: type=1 state=1 -> C1 entered power_end: dummy=65535 -> C-state left power_start: type=1 state=2 -> C2 entered power_end: dummy=65535 -> C-state left would then be: power_switch: type=1 state=1 -> C1 entered power_switch: type=1 state=0 -> C0 entered -> means C1 left... power_switch: type=1 state=2 -> C2 entered power_switch: type=1 state=0 -> C0 entered -> means C2 left... ... Todays P-state tracking: power_frequency: type=2 state=125000000 -> P-state change to 125 MHz power_frequency: type=2 state=90000000 -> P-state change to 90 MHz would then be: power_switch: type=2 state=125000000 -> P-state change to 125 MHz power_switch: type=2 state=90000000 -> P-state change to 90 MHz The S-state and T-state tracking would fit into that without modification. Thinking one step further, a possibility to track D-states would need an additional field, a pointer to the device, best a sysfs path or similar. Jean, I do not think tracing the S-state with power_start is a good idea. Best would be the power_frequency gets renamed (yes, breaks userspace, but those have to be adjusted) and used for P- and S- state tracking (and C-state tracking as well, but this would need additional userspace modifications). How do you track when the S-states end? Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 13:08 ` Thomas Renninger @ 2010-09-17 14:05 ` Jean Pihet 2010-09-17 14:24 ` Ingo Molnar ` (5 more replies) 0 siblings, 6 replies; 47+ messages in thread From: Jean Pihet @ 2010-09-17 14:05 UTC (permalink / raw) To: Thomas Renninger Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi Thomas, On Fri, Sep 17, 2010 at 3:08 PM, Thomas Renninger <trenn@suse.de> wrote: > Hi, > > I had a quick look at this and it's amazing how broken > the whole power event tracing interfaces are. > It's not your fault, Jean, they always were and adding your stuff is > fine. That is the whole point! This code needs a serious clean-up and reorganization. The patch I submitted is now merged in the tip tree, but the can be corrected once the new power trace API is agreed on. > Some questions, maybe I've overseen something: > > Why does this event: > DEFINE_EVENT(power, power_frequency, > exist and takes a C-/P-state, now also an S-state type as argument? There is no clear link between the POWER_ types enum and the API, that needs to change. > It should be named more generic, like: > DEFINE_EVENT(power, power_switch, > then it could get invoked when any P-/C-/S-/X- state > switch happened. Agree. We need some better definitions for the events and at the same time a more flexible way to pass extra arguments (e.g. like a return code or the real HW state in case the desired state is not reached). Also it is required to be able to track sub-states. The idea is to identify the C-states latency in the low power entry and exit paths, using 2 new macros in the enum and a sub state argument. Cf. http://omappedia.org/images/b/b9/Pytimechart_screenshot9_rs.jpg (from http://omappedia.org/wiki/Power_Management_Debug_and_Profiling). > What kind of hack is this: > TRACE_EVENT(power_end, > showing up as: > power_end: dummy=65535 > What ends here? > I know it's a workaround/hack for userspace apps to be > able to detect leaving of a sleep state, but how would someone > know or how would someone describe this in a proper API documentation? That was a hack, it is now gone. The new power_end only takes the cpu_id argument. > Apropos documentation..., are the power trace events documented > somewhere? No. We need something like Documentation/trace/events-kmem.txt. I can write that with for the new power API. > At least the state should still be passed, then the _start/_end thing > can be reused by something else than C-states. > > I can't see the use of having _start/_end events at all. > You are always in a state, having one: > power_switch > as suggested above, is enough to track everything. Most of the events can be converted to power_switch, but not all. Example: you need to know when the suspend ends. > > Examples: > > Today's C-state tracking: > power_start: type=1 state=1 -> C1 entered > power_end: dummy=65535 -> C-state left > power_start: type=1 state=2 -> C2 entered > power_end: dummy=65535 -> C-state left > would then be: > power_switch: type=1 state=1 -> C1 entered > power_switch: type=1 state=0 -> C0 entered -> means C1 left... > power_switch: type=1 state=2 -> C2 entered > power_switch: type=1 state=0 -> C0 entered -> means C2 left... > ... > > Todays P-state tracking: > power_frequency: type=2 state=125000000 -> P-state change to 125 MHz > power_frequency: type=2 state=90000000 -> P-state change to 90 MHz > would then be: > power_switch: type=2 state=125000000 -> P-state change to 125 MHz > power_switch: type=2 state=90000000 -> P-state change to 90 MHz > > The S-state and T-state tracking would fit into that without > modification. > Thinking one step further, a possibility to track D-states would What are the T- and D- states? > need an additional field, a pointer to the device, best a sysfs path > or similar. Agree on that. As said above I would like to have extra args. > Jean, I do not think tracing the S-state with power_start is a > good idea. Best would be the power_frequency gets renamed >(yes, breaks > userspace, but those have to be adjusted) BTW I can patch pytimechart. What other tools have to change? powertop ...? > and used for P- and S- > state tracking (and C-state tracking as well, but this would need > additional userspace modifications). OK > How do you track when the S-states end? Two options: 1) have new arguments that indicates enter/exit and a sub-state; 2) a new event fot the exit. I am in favor of 1 which is more generic. Ok thanks for those remarks and suggetions. Let me come back soon with a new power API proposal. > > Thomas > Jean ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet @ 2010-09-17 14:24 ` Ingo Molnar 2010-09-17 15:36 ` Thomas Renninger 2010-09-17 15:29 ` Thomas Renninger ` (4 subsequent siblings) 5 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2010-09-17 14:24 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users * Jean Pihet <jean.pihet@newoldbits.com> wrote: > > Apropos documentation..., are the power trace events documented > > somewhere? > > No. We need something like Documentation/trace/events-kmem.txt. I can > write that with for the new power API. Such a patch introducing events-power.txt would be most welcome! Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:24 ` Ingo Molnar @ 2010-09-17 15:36 ` Thomas Renninger 2010-09-17 16:24 ` Ingo Molnar 0 siblings, 1 reply; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 15:36 UTC (permalink / raw) To: Ingo Molnar Cc: Jean Pihet, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Friday 17 September 2010 16:24:59 Ingo Molnar wrote: > > * Jean Pihet <jean.pihet@newoldbits.com> wrote: > > > > Apropos documentation..., are the power trace events documented > > > somewhere? > > > > No. We need something like Documentation/trace/events-kmem.txt. I > > can write that with for the new power API. > Such a patch introducing events-power.txt would be most welcome! Better wait a bit... As soon as it exists, things are harder to change. As long as there isn't an API documented, there doesn't exist one (yes we want to have perf timechart, etc. still functional/compatible, but still, better let's do not the same mistake with some quick shots). Whatabout adding perf/trace interfaces to: Documentation/ABI/*/perf-xy Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 15:36 ` Thomas Renninger @ 2010-09-17 16:24 ` Ingo Molnar 2010-09-17 22:26 ` Thomas Renninger 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2010-09-17 16:24 UTC (permalink / raw) To: Thomas Renninger Cc: Jean Pihet, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users * Thomas Renninger <trenn@suse.de> wrote: > On Friday 17 September 2010 16:24:59 Ingo Molnar wrote: > > > > * Jean Pihet <jean.pihet@newoldbits.com> wrote: > > > > > > Apropos documentation..., are the power trace events documented > > > > somewhere? > > > > > > No. We need something like Documentation/trace/events-kmem.txt. I > > > can write that with for the new power API. > > Such a patch introducing events-power.txt would be most welcome! > Better wait a bit... > As soon as it exists, things are harder to change. > As long as there isn't an API documented, there doesn't exist one > (yes we want to have perf timechart, etc. still functional/compatible, > but still, better let's do not the same mistake with some quick shots). Music to my ears ;-) I was waiting for ACPI side feedback to all this, so whatever we do it would be nice to cover ARM and x86 as well - and in a single coherent framework. [ You dont even have to document it, as good code is self-explanatory ;-) ] Thanks, Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 16:24 ` Ingo Molnar @ 2010-09-17 22:26 ` Thomas Renninger 2010-09-22 15:31 ` Jean Pihet 0 siblings, 1 reply; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 22:26 UTC (permalink / raw) To: Ingo Molnar Cc: Jean Pihet, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Friday 17 September 2010 18:24:12 Ingo Molnar wrote: > > * Thomas Renninger <trenn@suse.de> wrote: > > > On Friday 17 September 2010 16:24:59 Ingo Molnar wrote: > [ You dont even have to document it, as good code is self-explanatory ;-) ] I recently posted a patch exporting some things through /sys/kernel/debug/... Greg complained that a file for Documentation/ABI/{testing,stable}/* is missing and I fully agree. If different userspace apps should make use of this (in above case nobody than my debug userspace tool will...) and this should be called something like an API, it should be documented and if something changes, it should first be marked deprecated, etc. Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 22:26 ` Thomas Renninger @ 2010-09-22 15:31 ` Jean Pihet 2010-09-22 15:33 ` Arjan van de Ven 2010-09-22 18:49 ` Rafael J. Wysocki 0 siblings, 2 replies; 47+ messages in thread From: Jean Pihet @ 2010-09-22 15:31 UTC (permalink / raw) To: Thomas Renninger Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi, Here is a patch that redefines the power events API. The advantages are: easier maintainance of the kernel and the user space tools, a cleaner and more generic interface, more parameters for fine tracing and even documentation! Thomas, this patch has your patch above merged in ('power-trace: Use power_switch_state instead of power_start and power_end'). The revised ACPI patch is coming asap. The trace points for x86 and OMAP are also udated accordingly. The pytimechart tool needs an update for the new API. This can be done as soon as the kernel code gets merged in. Please note the point below about the existing code in builtin-timechart.c. On Sat, Sep 18, 2010 at 12:26 AM, Thomas Renninger <trenn@suse.de> wrote: > On Friday 17 September 2010 18:24:12 Ingo Molnar wrote: >> >> * Thomas Renninger <trenn@suse.de> wrote: >> >> > On Friday 17 September 2010 16:24:59 Ingo Molnar wrote: > >> [ You dont even have to document it, as good code is self-explanatory ;-) ] > I recently posted a patch exporting some things through /sys/kernel/debug/... > Greg complained that a file for Documentation/ABI/{testing,stable}/* is missing > and I fully agree. The proposed patch has the documentation in Documentation/trace/events-power.txt. > If different userspace apps should make use of this (in above case nobody > than my debug userspace tool will...) and this should be called something like an API, > it should be documented and if something changes, it should > first be marked deprecated, etc. Note: the exsiting code in tools/perf/builtin-timechart.c needs an update for the new events API. Is this code still maintained? I not, could pytimechart be merged in instead? Feedback is welcome! > > Thomas > Thanks, Jean --- From f37d1875050d33273b10e99497b4059b37e6680d Mon Sep 17 00:00:00 2001 From: Jean Pihet <jean.pihet@newoldbits.com> Date: Wed, 22 Sep 2010 17:10:47 +0200 Subject: [PATCH] tools, perf: redefine the power events API Redefine the API with: - power_switch_state for C-, P- and S-states, - clock and power_domain events The new API allows easier maintainance of the kernel and the user space tools, a cleaner and more generic interface, more parameters for fine tracing and even documentation. The new events are used by the x86 and OMAP platforms. Signed-off-by: Jean Pihet <j-pihet@ti.com> --- Documentation/trace/events-power.txt | 70 +++++++++++++++++++++++++ arch/arm/mach-omap2/cpuidle34xx.c | 3 + arch/arm/mach-omap2/pm34xx.c | 11 ++++ arch/arm/mach-omap2/powerdomain.c | 3 + arch/arm/plat-omap/clock.c | 13 ++++- arch/arm/plat-omap/cpu-omap.c | 3 + arch/x86/kernel/process.c | 13 +++-- arch/x86/kernel/process_32.c | 3 +- arch/x86/kernel/process_64.c | 3 +- drivers/cpufreq/cpufreq.c | 3 +- drivers/cpuidle/cpuidle.c | 2 +- drivers/idle/intel_idle.c | 2 +- include/trace/events/power.h | 95 ++++++++++++++++------------------ kernel/trace/power-traces.c | 2 - 14 files changed, 161 insertions(+), 65 deletions(-) create mode 100644 Documentation/trace/events-power.txt diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt new file mode 100644 index 0000000..967f842 --- /dev/null +++ b/Documentation/trace/events-power.txt @@ -0,0 +1,70 @@ + + Subsystem Trace Points: power + +The power tracing system captures events related to power transitions +within the kernel. Broadly speaking there are three major subheadings: + + o Power state switch which reports events related to suspend (S-states), + cpuidle (C-states) and cpufreq (P-states) + o System clock related changes + o Power domains related changes and transitions + +This document describes what each of the tracepoints is and why they +might be useful. + +Cf. include/trace/events/power.h for the events definitions. + +1. Power state switch events +============================ + +power_switch_state type=%lu state=%lu sub_state=%lu cpu_id=%lu + +The 'type' parameter takes one of those macros: + . POWER_NONE = 0, + . POWER_CSTATE = 1, /* C-State */ + . POWER_PSTATE = 2, /* Fequency change or DVFS */ + . POWER_SSTATE = 3, /* Suspend */ + +The 'state' parameter is set depending on the type: + . Target C-state for type=POWER_CSTATE, + . Target frequency for type=POWER_PSTATE, + . Target suspend state for type=POWER_SSTATE + +Note: the value of '0' for state means an exit from the current state, +i.e. power_switch_state(POWER_SSTATE, 1, 0, cpu) means 'the system enters +the suspend state' while power_switch_state(POWER_SSTATE, 0, 0, cpu) means +'the system exits the suspend state). + +The event which has 'state=0' is very important to the user space tools +which are using it to detect the end of the current state, and so to correctly +draw the states diagrams and to calculate accurate statistics etc. + +The 'sub_state' parameter is optional and is used as a sub state in the low +power mode enter or exit, a return code or the real state the system went into etc. + +2. Clocks events +================ +The clock events are used for clock enable/disable and for +clock rate change. + +clock_enable %s state=%lu sub_state=%lu cpu_id=%lu +clock_disable %s state=%lu sub_state=%lu cpu_id=%lu +clock_set_rate %s state=%lu sub_state=%lu cpu_id=%lu + +The first parameter gives the clock name (e.g. "gpio1_iclk"). +The second parameter is '1' for enable, '0' for disable, the target +clock rate for set_rate. +The 'sub_state' parameter is optional and is used as a sub state in the low +power mode enter or exit, a return code, the real state the system went into etc. + +3. Power domains events +======================= +The power domain events are used for power domains transitions + +power_domain_target "%s state=%lu sub_state=%lu cpu_id=%lu" + +The first parameter gives the power domain name (e.g. "mpu_pwrdm"). +The second parameter is the power domain target state. +The 'sub_state' parameter is optional and is used as a sub state in the low +power mode enter or exit, a return code, the real state the system went into etc. + diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 3d3d035..8fa317c 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -24,6 +24,7 @@ #include <linux/sched.h> #include <linux/cpuidle.h> +#include <trace/events/power.h> #include <plat/prcm.h> #include <plat/irqs.h> @@ -122,6 +123,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, struct timespec ts_preidle, ts_postidle, ts_idle; u32 mpu_state = cx->mpu_state, core_state = cx->core_state; + trace_power_switch_state(POWER_CSTATE, cx->type, 0, smp_processor_id()); + current_cx_state = *cx; /* Used to keep track of the total time in idle */ diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 7b03426..8b774b1 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -28,6 +28,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/slab.h> +#include <trace/events/power.h> #include <plat/sram.h> #include <plat/clockdomain.h> @@ -557,6 +558,8 @@ static void omap3_pm_idle(void) if (omap_irq_pending() || need_resched()) goto out; + trace_power_switch_state(POWER_CSTATE, 1, 0, smp_processor_id()); + omap_sram_idle(); out: @@ -595,6 +598,8 @@ static int omap3_pm_suspend(void) struct power_state *pwrst; int state, ret = 0; + trace_power_switch_state(POWER_SSTATE, 1, 0, smp_processor_id()); + if (wakeup_timer_seconds || wakeup_timer_milliseconds) omap2_pm_wakeup_on_timer(wakeup_timer_seconds, wakeup_timer_milliseconds); @@ -623,6 +628,10 @@ restore: printk(KERN_INFO "Powerdomain (%s) didn't enter " "target state %d\n", pwrst->pwrdm->name, pwrst->next_state); + trace_power_domain_target(pwrst->pwrdm->name, + pwrst->next_state, + state, + smp_processor_id()); ret = -1; } set_pwrdm_state(pwrst->pwrdm, pwrst->saved_state); @@ -633,6 +642,8 @@ restore: printk(KERN_INFO "Successfully put all powerdomains " "to target state\n"); + trace_power_switch_state(POWER_SSTATE, 0, 0, smp_processor_id()); + return ret; } diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 6527ec3..03d286e 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/io.h> +#include <trace/events/power.h> #include <asm/atomic.h> @@ -440,6 +441,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) pr_debug("powerdomain: setting next powerstate for %s to %0x\n", pwrdm->name, pwrst); + trace_power_domain_target(pwrdm->name, pwrst, 0, smp_processor_id()); + prm_rmw_mod_reg_bits(OMAP_POWERSTATE_MASK, (pwrst << OMAP_POWERSTATE_SHIFT), pwrdm->prcm_offs, pwrstctrl_reg_offs); diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index 7190cbd..d42739d 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/debugfs.h> #include <linux/io.h> +#include <trace/events/power.h> #include <plat/clock.h> @@ -43,8 +44,10 @@ int clk_enable(struct clk *clk) return -EINVAL; spin_lock_irqsave(&clockfw_lock, flags); - if (arch_clock->clk_enable) + if (arch_clock->clk_enable) { + trace_clock_enable(clk->name, 1, 0, smp_processor_id()); ret = arch_clock->clk_enable(clk); + } spin_unlock_irqrestore(&clockfw_lock, flags); return ret; @@ -66,8 +69,10 @@ void clk_disable(struct clk *clk) goto out; } - if (arch_clock->clk_disable) + if (arch_clock->clk_disable) { + trace_clock_disable(clk->name, 0, 0, smp_processor_id()); arch_clock->clk_disable(clk); + } out: spin_unlock_irqrestore(&clockfw_lock, flags); @@ -120,8 +125,10 @@ int clk_set_rate(struct clk *clk, unsigned long rate) return ret; spin_lock_irqsave(&clockfw_lock, flags); - if (arch_clock->clk_set_rate) + if (arch_clock->clk_set_rate) { + trace_clock_set_rate(clk->name, rate, 0, smp_processor_id()); ret = arch_clock->clk_set_rate(clk, rate); + } if (ret == 0) { if (clk->recalc) clk->rate = clk->recalc(clk); diff --git a/arch/arm/plat-omap/cpu-omap.c b/arch/arm/plat-omap/cpu-omap.c index 6d3d333..5b6aa61 100644 --- a/arch/arm/plat-omap/cpu-omap.c +++ b/arch/arm/plat-omap/cpu-omap.c @@ -21,6 +21,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> +#include <trace/events/power.h> #include <mach/hardware.h> #include <plat/clock.h> @@ -95,6 +96,8 @@ static int omap_target(struct cpufreq_policy *policy, printk(KERN_DEBUG "cpufreq-omap: transition: %u --> %u\n", freqs.old, freqs.new); #endif + trace_power_switch_state(POWER_PSTATE, freqs.new, 0, + smp_processor_id()); ret = clk_set_rate(mpu_clk, freqs.new * 1000); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e8a5ad1..496d4af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -372,7 +372,8 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 1, 0, + smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -442,7 +443,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, (ax>>4)+1, 0, + smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -458,7 +460,8 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 1, 0, + smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -479,11 +482,11 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 0, 0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(0); + trace_power_switch_state(POWER_CSTATE, 0, 0, smp_processor_id()); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 96586c3..b2415b2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -114,7 +114,8 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 0, 0, + smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b3d7a3a..64279b1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -141,7 +141,8 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 0, 0, + smp_processor_id()); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 199dcb9..013c274 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -354,7 +354,8 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); - trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); + trace_power_switch_state(POWER_PSTATE, freqs->new, freqs->cpu, + smp_processor_id()); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a507108..301261a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -106,7 +106,7 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); - trace_power_end(smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 0, 0, smp_processor_id()); } /** diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 065c804..2b122f1 100755 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -185,7 +185,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) stop_critical_timings(); #ifndef MODULE - trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); + trace_power_switch_state(POWER_CSTATE, (eax >> 4) + 1, 0, cpu); #endif if (!need_resched()) { diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 286784d..192543a 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -18,61 +18,42 @@ enum { #endif /* - * The power events are used for cpuidle & suspend (power_start, power_end) - * and for cpufreq (power_frequency) + * The power events are used for cpuidle & suspend and for cpufreq */ DECLARE_EVENT_CLASS(power, - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), + TP_PROTO(unsigned int type, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(type, state, cpu_id), + TP_ARGS(type, state, sub_state, cpu_id), TP_STRUCT__entry( __field( u64, type ) __field( u64, state ) + __field( u64, sub_state ) __field( u64, cpu_id ) ), TP_fast_assign( __entry->type = type; __entry->state = state; + __entry->sub_state = sub_state; __entry->cpu_id = cpu_id; ), - TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type, - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) + TP_printk("type=%lu state=%lu sub_state=%lu cpu_id=%lu", + (unsigned long)__entry->type, + (unsigned long)__entry->state, + (unsigned long)__entry->sub_state, + (unsigned long)__entry->cpu_id) ); -DEFINE_EVENT(power, power_start, +DEFINE_EVENT(power, power_switch_state, - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -DEFINE_EVENT(power, power_frequency, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -TRACE_EVENT(power_end, - - TP_PROTO(unsigned int cpu_id), - - TP_ARGS(cpu_id), - - TP_STRUCT__entry( - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __entry->cpu_id = cpu_id; - ), - - TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id) + TP_PROTO(unsigned int type, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), + TP_ARGS(type, state, sub_state, cpu_id) ); /* @@ -81,45 +62,53 @@ TRACE_EVENT(power_end, */ DECLARE_EVENT_CLASS(clock, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id), + TP_ARGS(name, state, sub_state, cpu_id), TP_STRUCT__entry( __string( name, name ) __field( u64, state ) + __field( u64, sub_state ) __field( u64, cpu_id ) ), TP_fast_assign( __assign_str(name, name); __entry->state = state; + __entry->sub_state = sub_state; __entry->cpu_id = cpu_id; ), - TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) + TP_printk("%s state=%lu sub_state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, + (unsigned long)__entry->sub_state, + (unsigned long)__entry->cpu_id) ); DEFINE_EVENT(clock, clock_enable, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id) + TP_ARGS(name, state, sub_state, cpu_id) ); DEFINE_EVENT(clock, clock_disable, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id) + TP_ARGS(name, state, sub_state, cpu_id) ); DEFINE_EVENT(clock, clock_set_rate, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id) + TP_ARGS(name, state, sub_state, cpu_id) ); /* @@ -127,31 +116,37 @@ DEFINE_EVENT(clock, clock_set_rate, */ DECLARE_EVENT_CLASS(power_domain, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id), + TP_ARGS(name, state, sub_state, cpu_id), TP_STRUCT__entry( __string( name, name ) __field( u64, state ) + __field( u64, sub_state ) __field( u64, cpu_id ) ), TP_fast_assign( __assign_str(name, name); __entry->state = state; + __entry->sub_state = sub_state; __entry->cpu_id = cpu_id; ), - TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) + TP_printk("%s state=%lu sub_state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, + (unsigned long)__entry->sub_state, + (unsigned long)__entry->cpu_id) ); DEFINE_EVENT(power_domain, power_domain_target, - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + TP_PROTO(const char *name, unsigned int state, + unsigned int sub_state, unsigned int cpu_id), - TP_ARGS(name, state, cpu_id) + TP_ARGS(name, state, sub_state, cpu_id) ); #endif /* _TRACE_POWER_H */ diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index a22582a..7da72b0 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,3 @@ #define CREATE_TRACE_POINTS #include <trace/events/power.h> -EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); - -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-omap" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply related [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:31 ` Jean Pihet @ 2010-09-22 15:33 ` Arjan van de Ven 2010-09-22 15:36 ` Jean Pihet ` (2 more replies) 2010-09-22 18:49 ` Rafael J. Wysocki 1 sibling, 3 replies; 47+ messages in thread From: Arjan van de Ven @ 2010-09-22 15:33 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On 9/22/2010 8:31 AM, Jean Pihet wrote: > Hi, > > Here is a patch that redefines the power events API. The advantages > are: easier maintainance of the kernel and the > user space tools, a cleaner and more generic interface, more > parameters for fine tracing and even documentation! > > Thomas, this patch has your patch above merged in ('power-trace: Use > power_switch_state instead of power_start and power_end'). The revised > ACPI patch is coming asap. > > The trace points for x86 and OMAP are also udated accordingly. > > The pytimechart tool needs an update for the new API. This can be done > as soon as the kernel code gets merged in. unfortunately this code is changing a userspace ABI... we really shouldn't do that if we can avoid it, and here we can avoid it. applications ARE using this stuff! ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:33 ` Arjan van de Ven @ 2010-09-22 15:36 ` Jean Pihet 2010-09-22 16:32 ` Arjan van de Ven 2010-09-22 17:57 ` Thomas Renninger 2010-09-22 18:09 ` Rafael J. Wysocki 2 siblings, 1 reply; 47+ messages in thread From: Jean Pihet @ 2010-09-22 15:36 UTC (permalink / raw) To: Arjan van de Ven Cc: Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, Sep 22, 2010 at 5:33 PM, Arjan van de Ven <arjan@linux.intel.com> wrote: > On 9/22/2010 8:31 AM, Jean Pihet wrote: >> >> Hi, >> >> Here is a patch that redefines the power events API. The advantages >> are: easier maintainance of the kernel and the >> user space tools, a cleaner and more generic interface, more >> parameters for fine tracing and even documentation! >> >> Thomas, this patch has your patch above merged in ('power-trace: Use >> power_switch_state instead of power_start and power_end'). The revised >> ACPI patch is coming asap. >> >> The trace points for x86 and OMAP are also udated accordingly. >> >> The pytimechart tool needs an update for the new API. This can be done >> as soon as the kernel code gets merged in. > > unfortunately this code is changing a userspace ABI... we really shouldn't > do that if we can avoid it, > and here we can avoid it. > > applications ARE using this stuff! What are the apps that are using it? I know about builtin-timechart, pytimechart. Is powertop using this as well? Is it better to go for a 3 steps approach (add new API, change tools, deprecate old API) like proposed above? Thanks, Jean -- To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:36 ` Jean Pihet @ 2010-09-22 16:32 ` Arjan van de Ven 2010-09-22 16:43 ` Peter Zijlstra 2010-09-22 16:47 ` Peter Zijlstra 0 siblings, 2 replies; 47+ messages in thread From: Arjan van de Ven @ 2010-09-22 16:32 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On 9/22/2010 8:36 AM, Jean Pihet wrote: > On Wed, Sep 22, 2010 at 5:33 PM, Arjan van de Ven<arjan@linux.intel.com> wrote: >> On 9/22/2010 8:31 AM, Jean Pihet wrote: >>> Hi, >>> >>> Here is a patch that redefines the power events API. The advantages >>> are: easier maintainance of the kernel and the >>> user space tools, a cleaner and more generic interface, more >>> parameters for fine tracing and even documentation! >>> >>> Thomas, this patch has your patch above merged in ('power-trace: Use >>> power_switch_state instead of power_start and power_end'). The revised >>> ACPI patch is coming asap. >>> >>> The trace points for x86 and OMAP are also udated accordingly. >>> >>> The pytimechart tool needs an update for the new API. This can be done >>> as soon as the kernel code gets merged in. >> unfortunately this code is changing a userspace ABI... we really shouldn't >> do that if we can avoid it, >> and here we can avoid it. >> >> applications ARE using this stuff! > What are the apps that are using it? I know about builtin-timechart, > pytimechart. Is powertop using this as well? powertop 2.x codebase is as well. and a bunch of tools we have internal here at Intel. the thing with ABIs is that you don't know how many users you have.. at least here you know the lower bound is 3 different tools that are open source. .... and likely many local tools that aren't. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 16:32 ` Arjan van de Ven @ 2010-09-22 16:43 ` Peter Zijlstra 2010-09-22 17:06 ` Arjan van de Ven 2010-09-22 16:47 ` Peter Zijlstra 1 sibling, 1 reply; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 16:43 UTC (permalink / raw) To: Arjan van de Ven Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 09:32 -0700, Arjan van de Ven wrote: > > What are the apps that are using it? I know about builtin-timechart, > > pytimechart. Is powertop using this as well? > > powertop 2.x codebase is as well. > > and a bunch of tools we have internal here at Intel. > > the thing with ABIs is that you don't know how many users you have.. at > least here you know the lower bound is 3 different tools that are open > source. > .... and likely many local tools that aren't. These tools should be smart enough to look up the tracepoint name, fail it its not available, read the tracepoint format, again fail if not compatible. I really object to treating tracepoints as ABI and being tied to any implementation details due to that. Tools really should be flexible and allow for change. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 16:43 ` Peter Zijlstra @ 2010-09-22 17:06 ` Arjan van de Ven 2010-09-22 17:30 ` Peter Zijlstra 2010-09-22 17:36 ` Thomas Renninger 0 siblings, 2 replies; 47+ messages in thread From: Arjan van de Ven @ 2010-09-22 17:06 UTC (permalink / raw) To: Peter Zijlstra Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On 9/22/2010 9:43 AM, Peter Zijlstra wrote: > On Wed, 2010-09-22 at 09:32 -0700, Arjan van de Ven wrote: >>> What are the apps that are using it? I know about builtin-timechart, >>> pytimechart. Is powertop using this as well? >> powertop 2.x codebase is as well. >> >> and a bunch of tools we have internal here at Intel. >> >> the thing with ABIs is that you don't know how many users you have.. at >> least here you know the lower bound is 3 different tools that are open >> source. >> .... and likely many local tools that aren't. > These tools should be smart enough to look up the tracepoint name, fail > it its not available, read the tracepoint format, again fail if not > compatible. it's not very useful if none of the critical information is available. you can't at the one hand push people to use perf for critical pieces (like machine checks etc etc) and on the other hand say that it's not ABI stable and should not be used really. In this case we're talking about basically a suprious rename of something that isn't really an improvement (because it makes it harder to subscribe to only one type of event)... that's not a good thing. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 17:06 ` Arjan van de Ven @ 2010-09-22 17:30 ` Peter Zijlstra 2010-09-22 18:15 ` Steven Rostedt 2010-09-22 17:36 ` Thomas Renninger 1 sibling, 1 reply; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 17:30 UTC (permalink / raw) To: Arjan van de Ven Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 10:06 -0700, Arjan van de Ven wrote: > In this case we're talking about basically a suprious rename of > something that isn't really an improvement > (because it makes it harder to subscribe to only one type of event)... > that's not a good thing. People have been talking about more/more comprehensive power tracepoints for a while, and I think that's a valid goal, if its really a rename I'm sure you can work it out. That said, I really didn't read this discussion much, but your stance seems to be that any tracepoint you use must stay valid, and I object to that. What will do you do when we include a new scheduling policy and all the scheduler tracepoints need to change? (yes that's really going to happen) I'm not going to carry double tracepoints, and I'm not going to not merge that policy. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 17:30 ` Peter Zijlstra @ 2010-09-22 18:15 ` Steven Rostedt 2010-09-22 18:23 ` Ingo Molnar 2010-09-22 18:26 ` Peter Zijlstra 0 siblings, 2 replies; 47+ messages in thread From: Steven Rostedt @ 2010-09-22 18:15 UTC (permalink / raw) To: Peter Zijlstra Cc: Arjan van de Ven, Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 19:30 +0200, Peter Zijlstra wrote: > On Wed, 2010-09-22 at 10:06 -0700, Arjan van de Ven wrote: > > That said, I really didn't read this discussion much, but your stance > seems to be that any tracepoint you use must stay valid, and I object to > that. We could add a TRACE_EVENT_ABI() as Ingo has been suggesting. If anything, it could mean that the given tracepoint will always have the same name. And perhaps the data it holds will always be there, but may also be extended. > > What will do you do when we include a new scheduling policy and all the > scheduler tracepoints need to change? (yes that's really going to > happen) The tracepoint sched_switch should stay the same. We may add more data, but the comm, pid, prio => comm, pid, prio, I don't see going away. > > I'm not going to carry double tracepoints, and I'm not going to not > merge that policy. Not sure what you mean by "double tracepoints" -- Steve ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:15 ` Steven Rostedt @ 2010-09-22 18:23 ` Ingo Molnar 2010-09-22 18:30 ` Peter Zijlstra 2010-09-22 18:26 ` Peter Zijlstra 1 sibling, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2010-09-22 18:23 UTC (permalink / raw) To: Steven Rostedt Cc: Peter Zijlstra, Arjan van de Ven, Jean Pihet, Thomas Renninger, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users * Steven Rostedt <rostedt@goodmis.org> wrote: > We could add a TRACE_EVENT_ABI() as Ingo has been suggesting. [...] That would be rather useful. We could still be flexible about experimental tracepoints (they dont hurt), but apps should stick to ABI bits - or at least not complain when non-ABI tracepoints change for good reasons. (arbitrary, avoidable changes are still bad obviously, regardless of the type of the tracepoint) We'd be more stringent with marking tracepoints an ABI. So yes, this would be lovely to have. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:23 ` Ingo Molnar @ 2010-09-22 18:30 ` Peter Zijlstra 0 siblings, 0 replies; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 18:30 UTC (permalink / raw) To: Ingo Molnar Cc: Steven Rostedt, Arjan van de Ven, Jean Pihet, Thomas Renninger, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 20:23 +0200, Ingo Molnar wrote: > * Steven Rostedt <rostedt@goodmis.org> wrote: > > > We could add a TRACE_EVENT_ABI() as Ingo has been suggesting. [...] > > That would be rather useful. > > We could still be flexible about experimental tracepoints (they dont > hurt), but apps should stick to ABI bits - or at least not complain when > non-ABI tracepoints change for good reasons. (arbitrary, avoidable > changes are still bad obviously, regardless of the type of the > tracepoint) > > We'd be more stringent with marking tracepoints an ABI. > > So yes, this would be lovely to have. Why? As long as we mandate that apps need to fully parse the format stuff anyway, it doesn't buy us much. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:15 ` Steven Rostedt 2010-09-22 18:23 ` Ingo Molnar @ 2010-09-22 18:26 ` Peter Zijlstra 2010-09-22 18:36 ` Steven Rostedt 2010-09-22 19:14 ` Frank Ch. Eigler 1 sibling, 2 replies; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 18:26 UTC (permalink / raw) To: Steven Rostedt Cc: Arjan van de Ven, Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 14:15 -0400, Steven Rostedt wrote: > On Wed, 2010-09-22 at 19:30 +0200, Peter Zijlstra wrote: > > On Wed, 2010-09-22 at 10:06 -0700, Arjan van de Ven wrote: > > > > > That said, I really didn't read this discussion much, but your stance > > seems to be that any tracepoint you use must stay valid, and I object to > > that. > > We could add a TRACE_EVENT_ABI() as Ingo has been suggesting. If > anything, it could mean that the given tracepoint will always have the > same name. And perhaps the data it holds will always be there, but may > also be extended. I still don't see why you need TRACE_EVENT_ABI for that, if its the same name and the format can be extended you get the same results with what we've got. Apps need to read/parse the format thing anyway. > > > > What will do you do when we include a new scheduling policy and all the > > scheduler tracepoints need to change? (yes that's really going to > > happen) > > The tracepoint sched_switch should stay the same. We may add more data, > but the comm, pid, prio => comm, pid, prio, I don't see going away. Right, it would need additional fields. Preferably not only at the end. > > I'm not going to carry double tracepoints, and I'm not going to not > > merge that policy. > > Not sure what you mean by "double tracepoints" Two different tracepoints in the same location. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:26 ` Peter Zijlstra @ 2010-09-22 18:36 ` Steven Rostedt 2010-09-22 18:43 ` Peter Zijlstra 2010-09-22 19:14 ` Frank Ch. Eigler 1 sibling, 1 reply; 47+ messages in thread From: Steven Rostedt @ 2010-09-22 18:36 UTC (permalink / raw) To: Peter Zijlstra Cc: Arjan van de Ven, Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 20:26 +0200, Peter Zijlstra wrote: > On Wed, 2010-09-22 at 14:15 -0400, Steven Rostedt wrote: > > On Wed, 2010-09-22 at 19:30 +0200, Peter Zijlstra wrote: > > > On Wed, 2010-09-22 at 10:06 -0700, Arjan van de Ven wrote: > > > > > > > > That said, I really didn't read this discussion much, but your stance > > > seems to be that any tracepoint you use must stay valid, and I object to > > > that. > > > > We could add a TRACE_EVENT_ABI() as Ingo has been suggesting. If > > anything, it could mean that the given tracepoint will always have the > > same name. And perhaps the data it holds will always be there, but may > > also be extended. > > I still don't see why you need TRACE_EVENT_ABI for that, if its the same > name and the format can be extended you get the same results with what > we've got. Apps need to read/parse the format thing anyway. Just a marker that these trace points are being used by apps. > > > > > > > What will do you do when we include a new scheduling policy and all the > > > scheduler tracepoints need to change? (yes that's really going to > > > happen) > > > > The tracepoint sched_switch should stay the same. We may add more data, > > but the comm, pid, prio => comm, pid, prio, I don't see going away. > > Right, it would need additional fields. Preferably not only at the end. Why not at the end? The tools should easily be able to represent them in anyway they want. The print-fmt field helps in this regard too. But then again, we present the fields in the data. The tools should use a parse library (which a generic one will soon be out too). This way, we don't need them at the "end" but the parsing tools could find the fields no matter where they are in the record. > > > > I'm not going to carry double tracepoints, and I'm not going to not "not going to not" eeek! double negative! > > > merge that policy. > > > > Not sure what you mean by "double tracepoints" > > Two different tracepoints in the same location. Agreed, that is wrong to have. -- Steve ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:36 ` Steven Rostedt @ 2010-09-22 18:43 ` Peter Zijlstra 0 siblings, 0 replies; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 18:43 UTC (permalink / raw) To: Steven Rostedt Cc: Arjan van de Ven, Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 14:36 -0400, Steven Rostedt wrote: > > I still don't see why you need TRACE_EVENT_ABI for that, if its the same > > name and the format can be extended you get the same results with what > > we've got. Apps need to read/parse the format thing anyway. > > Just a marker that these trace points are being used by apps. I really don't see any additional value in that. Its not like we won't change them if we have to. > But then again, we present the fields in the data. The tools should use > a parse library (which a generic one will soon be out too). This way, we > don't need them at the "end" but the parsing tools could find the fields > no matter where they are in the record. Right. They should use the full format specification. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:26 ` Peter Zijlstra 2010-09-22 18:36 ` Steven Rostedt @ 2010-09-22 19:14 ` Frank Ch. Eigler 1 sibling, 0 replies; 47+ messages in thread From: Frank Ch. Eigler @ 2010-09-22 19:14 UTC (permalink / raw) To: Peter Zijlstra Cc: Steven Rostedt, Arjan van de Ven, Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi - On Wed, Sep 22, 2010 at 08:26:40PM +0200, Peter Zijlstra wrote: > [...] > > Not sure what you mean by "double tracepoints" > Two different tracepoints in the same location. Another possibility may be to provide a backward-compatibility module that maps new tracepoints to old ones. On demand, it could attach to the new tracepoints, and export those callbacks as clones of the old pseudo-ABI: same old names & parameters. If the new ones supply a superset of events & data, this is technically possible. - FChE ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 17:06 ` Arjan van de Ven 2010-09-22 17:30 ` Peter Zijlstra @ 2010-09-22 17:36 ` Thomas Renninger 1 sibling, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-22 17:36 UTC (permalink / raw) To: Arjan van de Ven Cc: Peter Zijlstra, Jean Pihet, Ingo Molnar, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wednesday 22 September 2010 19:06:54 Arjan van de Ven wrote: > On 9/22/2010 9:43 AM, Peter Zijlstra wrote: > > On Wed, 2010-09-22 at 09:32 -0700, Arjan van de Ven wrote: > >>> What are the apps that are using it? I know about builtin-timechart, > >>> pytimechart. Is powertop using this as well? > >> powertop 2.x codebase is as well. > >> > >> and a bunch of tools we have internal here at Intel. > >> > >> the thing with ABIs is that you don't know how many users you have.. at > >> least here you know the lower bound is 3 different tools that are open > >> source. > >> .... and likely many local tools that aren't. > > These tools should be smart enough to look up the tracepoint name, fail > > it its not available, read the tracepoint format, again fail if not > > compatible. > it's not very useful if none of the critical information is available. > > you can't at the one hand push people to use perf for critical pieces > (like machine checks etc etc) and on > the other hand say that it's not ABI stable and should not be used really. I provided an ABI stable solution, by marking the broken parts deprecated, etc. I'll rework the CONFIG_DEPRECATED_POWER_EVENTS (or similar config). > In this case we're talking about basically a suprious rename of > something that isn't really an improvement ... > (because it makes it harder to subscribe to only one type of event)... > that's not a good thing. Finally there is some talking about the details. You say they should be differed by the name and not the type? Why does the type= attribute exist at all then? /sys/kernel/debug/tracing/:[0]# cat available_events |grep power power:power_start power:power_frequency power:power_end So which one do I set to get C-, processor sleep, state traces? What you mean is that instead of passing the type= as attribute, an additional macro layer could be put in or each type is statically defined. The type itself needs not to get passed anymore then, because this info is already in the name and you get: power:power_cstates power:power_pstates power:power_sstates or power:power_processor_sleep power:power_processor_frequency power:power_machine_suspend ... right? This more looks like an interface that can get exposed to userspace... Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 16:32 ` Arjan van de Ven 2010-09-22 16:43 ` Peter Zijlstra @ 2010-09-22 16:47 ` Peter Zijlstra 1 sibling, 0 replies; 47+ messages in thread From: Peter Zijlstra @ 2010-09-22 16:47 UTC (permalink / raw) To: Arjan van de Ven Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Len Brown, arjan, Kevin Hilman, LKML, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wed, 2010-09-22 at 09:32 -0700, Arjan van de Ven wrote: Also, please don't cross-post to subscriber only lists, that's annoying as hell. (removed the discuss@lesswatts list) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:33 ` Arjan van de Ven 2010-09-22 15:36 ` Jean Pihet @ 2010-09-22 17:57 ` Thomas Renninger 2010-09-22 18:13 ` Arjan van de Ven 2010-09-22 18:09 ` Rafael J. Wysocki 2 siblings, 1 reply; 47+ messages in thread From: Thomas Renninger @ 2010-09-22 17:57 UTC (permalink / raw) To: Arjan van de Ven, linux-pm Cc: Jean Pihet, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-omap, linux-perf-users, linux-trace-users On Wednesday 22 September 2010 17:33:01 Arjan van de Ven wrote: > On 9/22/2010 8:31 AM, Jean Pihet wrote: > > Hi, > > > > Here is a patch that redefines the power events API. The advantages > > are: easier maintainance of the kernel and the > > user space tools, a cleaner and more generic interface, more > > parameters for fine tracing and even documentation! > > > > Thomas, this patch has your patch above merged in ('power-trace: Use > > power_switch_state instead of power_start and power_end'). The revised > > ACPI patch is coming asap. > > > > The trace points for x86 and OMAP are also udated accordingly. > > > > The pytimechart tool needs an update for the new API. This can be done > > as soon as the kernel code gets merged in. > > unfortunately this code is changing a userspace ABI... we really > shouldn't do that if we can avoid it, > and here we can avoid it. Wow, this is your first post on this thread and you needed 1 min, 20 secs to reply and comment on work that probably took several weeks and which is quite an improvement. You did not even start to read the documentation/patch in this minute, did you? > applications ARE using this stuff! The current interfaces do not make sense at all. Things must be changed now before even more ARE using the broken stuff and things get more complicated. You should be happy that someone is helping/doing it. Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 17:57 ` Thomas Renninger @ 2010-09-22 18:13 ` Arjan van de Ven 2010-09-22 18:34 ` Thomas Renninger 0 siblings, 1 reply; 47+ messages in thread From: Arjan van de Ven @ 2010-09-22 18:13 UTC (permalink / raw) To: Thomas Renninger Cc: linux-pm, Jean Pihet, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-omap, linux-perf-users, linux-trace-users On 9/22/2010 10:57 AM, Thomas Renninger wrote: > On Wed >> unfortunately this code is changing a userspace ABI... we really >> shouldn't do that if we can avoid it, >> and here we can avoid it. [do you really need to get personal?] > Wow, this is your first post on this thread it isn't. (but I've been traveling too much in the last few weeks and have been rather overwhelmed in mail as a result) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:13 ` Arjan van de Ven @ 2010-09-22 18:34 ` Thomas Renninger 0 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-22 18:34 UTC (permalink / raw) To: Arjan van de Ven Cc: linux-pm, Jean Pihet, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-omap, linux-perf-users, linux-trace-users On Wednesday 22 September 2010 20:13:19 Arjan van de Ven wrote: > On 9/22/2010 10:57 AM, Thomas Renninger wrote: > > On Wed > >> unfortunately this code is changing a userspace ABI... we really > >> shouldn't do that if we can avoid it, > >> and here we can avoid it. > > [do you really need to get personal?] Is that personal already? But discussing details makes more sense and I have to appologize for the needless comment. > (but I've been traveling too much in the last few weeks and have been > rather overwhelmed in mail as a result) Yes, time is short. Sorry about that one. And thanks for your input, even your time is probably shorter than mine, Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:33 ` Arjan van de Ven 2010-09-22 15:36 ` Jean Pihet 2010-09-22 17:57 ` Thomas Renninger @ 2010-09-22 18:09 ` Rafael J. Wysocki 2 siblings, 0 replies; 47+ messages in thread From: Rafael J. Wysocki @ 2010-09-22 18:09 UTC (permalink / raw) To: Arjan van de Ven Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Wednesday, September 22, 2010, Arjan van de Ven wrote: > On 9/22/2010 8:31 AM, Jean Pihet wrote: > > Hi, > > > > Here is a patch that redefines the power events API. The advantages > > are: easier maintainance of the kernel and the > > user space tools, a cleaner and more generic interface, more > > parameters for fine tracing and even documentation! > > > > Thomas, this patch has your patch above merged in ('power-trace: Use > > power_switch_state instead of power_start and power_end'). The revised > > ACPI patch is coming asap. > > > > The trace points for x86 and OMAP are also udated accordingly. > > > > The pytimechart tool needs an update for the new API. This can be done > > as soon as the kernel code gets merged in. > > unfortunately this code is changing a userspace ABI... we really > shouldn't do that if we can avoid it, > and here we can avoid it. > > applications ARE using this stuff! Apart from this, could we rename things like POWER_CSTATE to CPU_POWER_CSTATE and state clearly in the docs that this whole thing is about CPU power? Thanks, Rafael ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 15:31 ` Jean Pihet 2010-09-22 15:33 ` Arjan van de Ven @ 2010-09-22 18:49 ` Rafael J. Wysocki 2010-09-28 8:35 ` Jean Pihet 1 sibling, 1 reply; 47+ messages in thread From: Rafael J. Wysocki @ 2010-09-22 18:49 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users [Dropping discuss@lesswatts.org from the CC list.] On Wednesday, September 22, 2010, Jean Pihet wrote: > Hi, > > Here is a patch that redefines the power events API. The advantages > are: easier maintainance of the kernel and the > user space tools, a cleaner and more generic interface, more > parameters for fine tracing and even documentation! > > Thomas, this patch has your patch above merged in ('power-trace: Use > power_switch_state instead of power_start and power_end'). The revised > ACPI patch is coming asap. > > The trace points for x86 and OMAP are also udated accordingly. > > The pytimechart tool needs an update for the new API. This can be done > as soon as the kernel code gets merged in. > Please note the point below about the existing code in builtin-timechart.c. > > On Sat, Sep 18, 2010 at 12:26 AM, Thomas Renninger <trenn@suse.de> wrote: > > On Friday 17 September 2010 18:24:12 Ingo Molnar wrote: > >> > >> * Thomas Renninger <trenn@suse.de> wrote: > >> > >> > On Friday 17 September 2010 16:24:59 Ingo Molnar wrote: > > > >> [ You dont even have to document it, as good code is self-explanatory ;-) ] > > I recently posted a patch exporting some things through /sys/kernel/debug/... > > Greg complained that a file for Documentation/ABI/{testing,stable}/* is missing > > and I fully agree. > The proposed patch has the documentation in > Documentation/trace/events-power.txt. > > > If different userspace apps should make use of this (in above case nobody > > than my debug userspace tool will...) and this should be called something like an API, > > it should be documented and if something changes, it should > > first be marked deprecated, etc. > Note: the exsiting code in tools/perf/builtin-timechart.c needs an > update for the new events API. Is this code still maintained? I not, > could pytimechart be merged in instead? > > Feedback is welcome! > > > > > Thomas > > > > Thanks, > Jean > > --- > From f37d1875050d33273b10e99497b4059b37e6680d Mon Sep 17 00:00:00 2001 > From: Jean Pihet <jean.pihet@newoldbits.com> > Date: Wed, 22 Sep 2010 17:10:47 +0200 > Subject: [PATCH] tools, perf: redefine the power events API > > Redefine the API with: > - power_switch_state for C-, P- and S-states, > - clock and power_domain events > > The new API allows easier maintainance of the kernel and the > user space tools, a cleaner and more generic interface, > more parameters for fine tracing and even documentation. > > The new events are used by the x86 and OMAP platforms. > > Signed-off-by: Jean Pihet <j-pihet@ti.com> > --- > Documentation/trace/events-power.txt | 70 +++++++++++++++++++++++++ > arch/arm/mach-omap2/cpuidle34xx.c | 3 + > arch/arm/mach-omap2/pm34xx.c | 11 ++++ > arch/arm/mach-omap2/powerdomain.c | 3 + > arch/arm/plat-omap/clock.c | 13 ++++- > arch/arm/plat-omap/cpu-omap.c | 3 + > arch/x86/kernel/process.c | 13 +++-- > arch/x86/kernel/process_32.c | 3 +- > arch/x86/kernel/process_64.c | 3 +- > drivers/cpufreq/cpufreq.c | 3 +- > drivers/cpuidle/cpuidle.c | 2 +- > drivers/idle/intel_idle.c | 2 +- > include/trace/events/power.h | 95 ++++++++++++++++------------------ > kernel/trace/power-traces.c | 2 - > 14 files changed, 161 insertions(+), 65 deletions(-) > create mode 100644 Documentation/trace/events-power.txt > > diff --git a/Documentation/trace/events-power.txt > b/Documentation/trace/events-power.txt > new file mode 100644 > index 0000000..967f842 > --- /dev/null > +++ b/Documentation/trace/events-power.txt > @@ -0,0 +1,70 @@ > + > + Subsystem Trace Points: power > + > +The power tracing system captures events related to power transitions > +within the kernel. Broadly speaking there are three major subheadings: > + > + o Power state switch which reports events related to suspend (S-states), > + cpuidle (C-states) and cpufreq (P-states) > + o System clock related changes > + o Power domains related changes and transitions > + > +This document describes what each of the tracepoints is and why they > +might be useful. > + > +Cf. include/trace/events/power.h for the events definitions. > + > +1. Power state switch events > +============================ > + > +power_switch_state type=%lu state=%lu sub_state=%lu cpu_id=%lu > + > +The 'type' parameter takes one of those macros: > + . POWER_NONE = 0, > + . POWER_CSTATE = 1, /* C-State */ > + . POWER_PSTATE = 2, /* Fequency change or DVFS */ > + . POWER_SSTATE = 3, /* Suspend */ This POWER_SSTATE thing seems to be totally artificial and omap-specific. Why do you want it to be done this way? Or is the ACPI handling added in the ACPI patch? In which case, why don't you put that power_switch_state(POWER_SSTATE, 1, 0, cpu) into kernel/power/suspend.c:suspend_enter() (and analogously for power_switch_state(POWER_SSTATE, 0, 0, cpu)). Moreover, why is the cpu argument necessary for POWER_SSTATE at all? Thanks, Rafael ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-22 18:49 ` Rafael J. Wysocki @ 2010-09-28 8:35 ` Jean Pihet 2010-09-28 21:22 ` Rafael J. Wysocki 0 siblings, 1 reply; 47+ messages in thread From: Jean Pihet @ 2010-09-28 8:35 UTC (permalink / raw) To: Rafael J. Wysocki Cc: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi, Here is what I am proposing, in reply to all your comments: 1) rename the events to match Thomas's proposal: power:power_cpu_cstate power:power_cpu_pstate power:power_cpu_sstate ... 2) introduce a new Kconfig option CONFIG_DEPRECATED_POWER_EVENTS and conditionally map a subset of the new events to the old ones for backward compatibility with the existing user apps. The apps should be converted to the new API asap, 3) update documentation Other remarks here below: On Wed, Sep 22, 2010 at 8:49 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote: ... > This POWER_SSTATE thing seems to be totally artificial and omap-specific. > > Why do you want it to be done this way? > > Or is the ACPI handling added in the ACPI patch? In which case, why don't you > put that power_switch_state(POWER_SSTATE, 1, 0, cpu) into > kernel/power/suspend.c:suspend_enter() (and analogously for > power_switch_state(POWER_SSTATE, 0, 0, cpu)). The ACPI code is not using the SSTATE event. Indeed inserting a tracepoint at kernel/power/suspend.c:suspend_enter() is more generic. I will correct this. > Moreover, why is the cpu argument necessary for POWER_SSTATE at all? The cpu_id parameter is present in all events prototypes. This is not needed. I will correct this. > > Thanks, > Rafael > A new patch should be ready in the coming days. I would like to have your opinion on the proposed rework before re-doing it all again. Thanks, Jean -- To unsubscribe from this list: send the line "unsubscribe linux-omap" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 8:35 ` Jean Pihet @ 2010-09-28 21:22 ` Rafael J. Wysocki 2010-09-28 21:43 ` Jean Pihet 2010-09-28 21:45 ` Arjan van de Ven 0 siblings, 2 replies; 47+ messages in thread From: Rafael J. Wysocki @ 2010-09-28 21:22 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Tuesday, September 28, 2010, Jean Pihet wrote: > Hi, Hi, > Here is what I am proposing, in reply to all your comments: > > 1) rename the events to match Thomas's proposal: > power:power_cpu_cstate > power:power_cpu_pstate > power:power_cpu_sstate If that sstate thing is going to mean "suspend", then please drop it. "Suspend" is not a state, let alone a CPU state. It is a procedure by which the (entire) system is put into a sleep state (that is not confined to CPUs). > ... > > 2) introduce a new Kconfig option CONFIG_DEPRECATED_POWER_EVENTS and > conditionally map a subset of the new events to the old ones for > backward compatibility with the existing user apps. The apps should be > converted to the new API asap, > > 3) update documentation Sounds reasonable. > Other remarks here below: > > On Wed, Sep 22, 2010 at 8:49 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote: > ... > > This POWER_SSTATE thing seems to be totally artificial and omap-specific. > > > > Why do you want it to be done this way? > > > > Or is the ACPI handling added in the ACPI patch? In which case, why don't you > > put that power_switch_state(POWER_SSTATE, 1, 0, cpu) into > > kernel/power/suspend.c:suspend_enter() (and analogously for > > power_switch_state(POWER_SSTATE, 0, 0, cpu)). > The ACPI code is not using the SSTATE event. > Indeed inserting a tracepoint at > kernel/power/suspend.c:suspend_enter() is more generic. I will correct > this. OK > > Moreover, why is the cpu argument necessary for POWER_SSTATE at all? > The cpu_id parameter is present in all events prototypes. This is not > needed. I will correct this. OK Thanks, Rafael ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 21:22 ` Rafael J. Wysocki @ 2010-09-28 21:43 ` Jean Pihet 2010-09-28 22:05 ` Rafael J. Wysocki 2010-09-28 21:45 ` Arjan van de Ven 1 sibling, 1 reply; 47+ messages in thread From: Jean Pihet @ 2010-09-28 21:43 UTC (permalink / raw) To: Rafael J. Wysocki Cc: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi, On Tue, Sep 28, 2010 at 11:22 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote: > On Tuesday, September 28, 2010, Jean Pihet wrote: >> Hi, > > Hi, > >> Here is what I am proposing, in reply to all your comments: >> >> 1) rename the events to match Thomas's proposal: >> power:power_cpu_cstate >> power:power_cpu_pstate >> power:power_cpu_sstate > > If that sstate thing is going to mean "suspend", then please drop it. > "Suspend" is not a state, let alone a CPU state. It is a procedure by which > the (entire) system is put into a sleep state (that is not confined to CPUs). Since suspend is tied to the power management of the system, is it ok to rename it to e.g. power:system_suspend? > >> ... >> >> 2) introduce a new Kconfig option CONFIG_DEPRECATED_POWER_EVENTS and >> conditionally map a subset of the new events to the old ones for >> backward compatibility with the existing user apps. The apps should be >> converted to the new API asap, >> >> 3) update documentation > > Sounds reasonable. OK > >> Other remarks here below: >> >> On Wed, Sep 22, 2010 at 8:49 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote: >> ... >> > This POWER_SSTATE thing seems to be totally artificial and omap-specific. >> > >> > Why do you want it to be done this way? >> > >> > Or is the ACPI handling added in the ACPI patch? In which case, why don't you >> > put that power_switch_state(POWER_SSTATE, 1, 0, cpu) into >> > kernel/power/suspend.c:suspend_enter() (and analogously for >> > power_switch_state(POWER_SSTATE, 0, 0, cpu)). >> The ACPI code is not using the SSTATE event. >> Indeed inserting a tracepoint at >> kernel/power/suspend.c:suspend_enter() is more generic. I will correct >> this. > > OK > >> > Moreover, why is the cpu argument necessary for POWER_SSTATE at all? >> The cpu_id parameter is present in all events prototypes. This is not >> needed. I will correct this. > > OK > > Thanks, > Rafael > Thanks, Jean -- To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 21:43 ` Jean Pihet @ 2010-09-28 22:05 ` Rafael J. Wysocki 0 siblings, 0 replies; 47+ messages in thread From: Rafael J. Wysocki @ 2010-09-28 22:05 UTC (permalink / raw) To: Jean Pihet Cc: Thomas Renninger, Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Tuesday, September 28, 2010, Jean Pihet wrote: > Hi, > > On Tue, Sep 28, 2010 at 11:22 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote: > > On Tuesday, September 28, 2010, Jean Pihet wrote: > >> Hi, > > > > Hi, > > > >> Here is what I am proposing, in reply to all your comments: > >> > >> 1) rename the events to match Thomas's proposal: > >> power:power_cpu_cstate > >> power:power_cpu_pstate > >> power:power_cpu_sstate > > > > If that sstate thing is going to mean "suspend", then please drop it. > > "Suspend" is not a state, let alone a CPU state. It is a procedure by which > > the (entire) system is put into a sleep state (that is not confined to CPUs). > > Since suspend is tied to the power management of the system, is it ok > to rename it to e.g. power:system_suspend? I think so. Thanks, Rafael ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 21:22 ` Rafael J. Wysocki 2010-09-28 21:43 ` Jean Pihet @ 2010-09-28 21:45 ` Arjan van de Ven 2010-09-28 22:05 ` Rafael J. Wysocki ` (2 more replies) 1 sibling, 3 replies; 47+ messages in thread From: Arjan van de Ven @ 2010-09-28 21:45 UTC (permalink / raw) To: Rafael J. Wysocki Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On 9/28/2010 2:22 PM, Rafael J. Wysocki wrote: > On Tuesday, September 28, 2010, Jean Pihet wrote: >> Hi, > Hi, > >> Here is what I am proposing, in reply to all your comments: >> >> 1) rename the events to match Thomas's proposal: >> power:power_cpu_cstate >> power:power_cpu_pstate >> power:power_cpu_sstate > If that sstate thing is going to mean "suspend", then please drop it. > "Suspend" is not a state, let alone a CPU state. It is a procedure by which > the (entire) system is put into a sleep state (that is not confined to CPUs). there are also non-suspend S states, like S0i1 and S0i3 (supported in the current Intel "Moorestown" platform) so it's slightly more complex than "just" suspend :) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 21:45 ` Arjan van de Ven @ 2010-09-28 22:05 ` Rafael J. Wysocki 2010-09-29 7:49 ` Thomas Renninger 2010-10-04 17:55 ` [linux-pm] " Pavel Machek 2 siblings, 0 replies; 47+ messages in thread From: Rafael J. Wysocki @ 2010-09-28 22:05 UTC (permalink / raw) To: Arjan van de Ven Cc: Jean Pihet, Thomas Renninger, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Tuesday, September 28, 2010, Arjan van de Ven wrote: > On 9/28/2010 2:22 PM, Rafael J. Wysocki wrote: > > On Tuesday, September 28, 2010, Jean Pihet wrote: > >> Hi, > > Hi, > > > >> Here is what I am proposing, in reply to all your comments: > >> > >> 1) rename the events to match Thomas's proposal: > >> power:power_cpu_cstate > >> power:power_cpu_pstate > >> power:power_cpu_sstate > > If that sstate thing is going to mean "suspend", then please drop it. > > "Suspend" is not a state, let alone a CPU state. It is a procedure by which > > the (entire) system is put into a sleep state (that is not confined to CPUs). > > there are also non-suspend S states, like S0i1 and S0i3 (supported in > the current Intel "Moorestown" platform) > > so it's slightly more complex than "just" suspend :) That's exactly why I used the conditional above. :-) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-28 21:45 ` Arjan van de Ven 2010-09-28 22:05 ` Rafael J. Wysocki @ 2010-09-29 7:49 ` Thomas Renninger 2010-09-29 9:25 ` Jean Pihet 2010-10-04 17:55 ` [linux-pm] " Pavel Machek 2 siblings, 1 reply; 47+ messages in thread From: Thomas Renninger @ 2010-09-29 7:49 UTC (permalink / raw) To: Arjan van de Ven Cc: Rafael J. Wysocki, Jean Pihet, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Tuesday 28 September 2010 23:45:24 Arjan van de Ven wrote: > On 9/28/2010 2:22 PM, Rafael J. Wysocki wrote: > > On Tuesday, September 28, 2010, Jean Pihet wrote: > >> Hi, > > Hi, > > > >> Here is what I am proposing, in reply to all your comments: > >> > >> 1) rename the events to match Thomas's proposal: > >> power:power_cpu_cstate > >> power:power_cpu_pstate > >> power:power_cpu_sstate I'd not name it that X86/ACPI specific. power:processor_sleep power:processor_frequency power:system_suspend This would map with X86/ACPI c/p/s states but the name would also match fine with ARM and other archs. > > If that sstate thing is going to mean "suspend", then please drop > > it. > > "Suspend" is not a state, let alone a CPU state. It is a procedure > > by which the (entire) system is put into a sleep state (that is not > > confined to CPUs). > > there are also non-suspend S states, like S0i1 and S0i3 (supported in > the current Intel "Moorestown" platform) > > so it's slightly more complex than "just" suspend :) Something specific for this arch could get introduced, similar as Jean did for the ARM specifics, e.g.: power:moorestown_suspend Intel probably invented three names for this new technique, one might fit as an event name? Depending whether extra info needs passed through this event it could also use power:system_suspend and pass a suspend state of #define S0i1 0x100, #define S0i2 0x101... I try to find time to come up with another cleanup patch. I also want to look at perf timechart then where I remember some ugly hacks with C-state accounting and the broken state_start, state_end and frequency_switch events. Hope it won't get too ugly and perf timechart can support both, the old and the cleaned up events for a while. Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-29 7:49 ` Thomas Renninger @ 2010-09-29 9:25 ` Jean Pihet 0 siblings, 0 replies; 47+ messages in thread From: Jean Pihet @ 2010-09-29 9:25 UTC (permalink / raw) To: Thomas Renninger Cc: Arjan van de Ven, Rafael J. Wysocki, Ingo Molnar, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, linux-pm, linux-omap, linux-perf-users, linux-trace-users Hi, On Wed, Sep 29, 2010 at 9:49 AM, Thomas Renninger <trenn@suse.de> wrote: > On Tuesday 28 September 2010 23:45:24 Arjan van de Ven wrote: >> On 9/28/2010 2:22 PM, Rafael J. Wysocki wrote: >> > On Tuesday, September 28, 2010, Jean Pihet wrote: >> >> Hi, >> > Hi, >> > >> >> Here is what I am proposing, in reply to all your comments: >> >> >> >> 1) rename the events to match Thomas's proposal: >> >> power:power_cpu_cstate >> >> power:power_cpu_pstate >> >> power:power_cpu_sstate > I'd not name it that X86/ACPI specific. > power:processor_sleep > power:processor_frequency > power:system_suspend > This would map with X86/ACPI c/p/s states but the name would > also match fine with ARM and other archs. Good for me! > >> > If that sstate thing is going to mean "suspend", then please drop >> > it. >> > "Suspend" is not a state, let alone a CPU state. It is a procedure >> > by which the (entire) system is put into a sleep state (that is not >> > confined to CPUs). >> >> there are also non-suspend S states, like S0i1 and S0i3 (supported in >> the current Intel "Moorestown" platform) >> >> so it's slightly more complex than "just" suspend :) > Something specific for this arch could get introduced, similar as > Jean did for the ARM specifics, e.g.: > power:moorestown_suspend I would not introduce arch specific events. Your other proposal below is more generic. > Intel probably invented three names for this new technique, one > might fit as an event name? > Depending whether extra info needs passed through this event it > could also use > power:system_suspend > and pass a suspend state of #define S0i1 0x100, #define S0i2 0x101... I am OK with that. > > I try to find time to come up with another cleanup patch. > I also want to look at perf timechart then where I remember some ugly > hacks with C-state accounting and the broken state_start, state_end and > frequency_switch events. Hope it won't get too ugly and perf timechart > can support both, the old and the cleaned up events for a while. About pytimechart, I think it should be updated with the support for the new events only. The current version is not perfect but supports the current events correctly. I am planning to celan up and upgrade that tool when the new API is out. If that could force people to upgrade to the new events API asap... > > Thomas > I know you want to see real code. Let me come with a respin of the patch asap (it is a matter of days). Thanks, Jean -- To unsubscribe from this list: send the line "unsubscribe linux-omap" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [linux-pm] [PATCH] tracing, perf: add more power related events 2010-09-28 21:45 ` Arjan van de Ven 2010-09-28 22:05 ` Rafael J. Wysocki 2010-09-29 7:49 ` Thomas Renninger @ 2010-10-04 17:55 ` Pavel Machek 2 siblings, 0 replies; 47+ messages in thread From: Pavel Machek @ 2010-10-04 17:55 UTC (permalink / raw) To: Arjan van de Ven Cc: Rafael J. Wysocki, Len Brown, linux-trace-users, Peter Zijlstra, Jean Pihet, linux-kernel, linux-perf-users, linux-pm, Ingo Molnar, linux-omap, arjan Hi! > >> Here is what I am proposing, in reply to all your comments: > >> > >> 1) rename the events to match Thomas's proposal: > >> power:power_cpu_cstate > >> power:power_cpu_pstate > >> power:power_cpu_sstate > > If that sstate thing is going to mean "suspend", then please drop it. > > "Suspend" is not a state, let alone a CPU state. It is a procedure by which > > the (entire) system is put into a sleep state (that is not confined to CPUs). > > there are also non-suspend S states, like S0i1 and S0i3 (supported in > the current Intel "Moorestown" platform) Where can one learn more? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet 2010-09-17 14:24 ` Ingo Molnar @ 2010-09-17 15:29 ` Thomas Renninger 2010-09-17 21:58 ` Thomas Renninger ` (3 subsequent siblings) 5 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 15:29 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users On Friday 17 September 2010 16:05:51 Jean Pihet wrote: > Hi Thomas, > > On Fri, Sep 17, 2010 at 3:08 PM, Thomas Renninger <trenn@suse.de> wrote: > > Hi, > > > > I had a quick look at this and it's amazing how broken > > the whole power event tracing interfaces are. > > It's not your fault, Jean, they always were and adding your stuff is > > fine. > That is the whole point! This code needs a serious clean-up and reorganization. > The patch I submitted is now merged in the tip tree, but the can be > corrected once the new power trace API is agreed on. Your generic patch is ok. Not that sure about the clock stuff. How many different clocks do exist on ARM/OMAP? Possibly this can all go into a: power_switch_state(type=, state=) interface and you define some ARM specific static ones like: + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ + POWER_OMAP_CLOCK_1 = 4, /* OMAP clock XY */ + POWER_OMAP_CLOCK_2 = 5, /* OMAP clock XYZ */ ... > > > Some questions, maybe I've overseen something: > > > > Why does this event: > > DEFINE_EVENT(power, power_frequency, > > exist and takes a C-/P-state, now also an S-state type as argument? > There is no clear link between the POWER_ types enum and the API, that > needs to change. Yep. At least for the frequency (P-state) and idle (C-state) states I have a reasonable solution, see below. > > > It should be named more generic, like: > > DEFINE_EVENT(power, power_switch, > > then it could get invoked when any P-/C-/S-/X- state > > switch happened. > Agree. We need some better definitions for the events and at the same > time a more flexible way to pass extra arguments (e.g. like a return > code or the real HW state in case the desired state is not reached). > Also it is required to be able to track sub-states. > The idea is to identify the C-states latency in the low power entry > and exit paths, using 2 new macros in the enum and a sub state > argument. Cf. http://omappedia.org/images/b/b9/Pytimechart_screenshot9_rs.jpg > (from http://omappedia.org/wiki/Power_Management_Debug_and_Profiling). IMO one does not need an exit path, see below. > > What kind of hack is this: > > TRACE_EVENT(power_end, > > showing up as: > > power_end: dummy=65535 > > What ends here? > > I know it's a workaround/hack for userspace apps to be > > able to detect leaving of a sleep state, but how would someone > > know or how would someone describe this in a proper API documentation? > That was a hack, it is now gone. The new power_end only takes the > cpu_id argument. The CPU/machine/device always is in a C-/P-/T-/S- whatever state. Typically 0 means it's in a non-power saving state. So issueing a power_switch_state(type=X, state=0) would be the exit path. There is no need for a separate: power_end(type=X) call. Compare with my S-state ACPI example, I'll send in a minute. > > Apropos documentation..., are the power trace events documented > > somewhere? > No. We need something like Documentation/trace/events-kmem.txt. I can > write that with for the new power API. > > > At least the state should still be passed, then the _start/_end thing > > can be reused by something else than C-states. > > > > I can't see the use of having _start/_end events at all. > > You are always in a state, having one: > > power_switch > > as suggested above, is enough to track everything. > Most of the events can be converted to power_switch, but not all. > Example: you need to know when the suspend ends. power_switch_state(type=POWER_SSTATE, state=0) > > > > Examples: > > > > Today's C-state tracking: > > power_start: type=1 state=1 -> C1 entered > > power_end: dummy=65535 -> C-state left > > power_start: type=1 state=2 -> C2 entered > > power_end: dummy=65535 -> C-state left > > would then be: > > power_switch: type=1 state=1 -> C1 entered > > power_switch: type=1 state=0 -> C0 entered -> means C1 left... > > power_switch: type=1 state=2 -> C2 entered > > power_switch: type=1 state=0 -> C0 entered -> means C2 left... > > ... > > > > Todays P-state tracking: > > power_frequency: type=2 state=125000000 -> P-state change to 125 MHz > > power_frequency: type=2 state=90000000 -> P-state change to 90 MHz > > would then be: > > power_switch: type=2 state=125000000 -> P-state change to 125 MHz > > power_switch: type=2 state=90000000 -> P-state change to 90 MHz > > > > The S-state and T-state tracking would fit into that without > > modification. > > Thinking one step further, a possibility to track D-states would > What are the T- and D- states? T-states are processor throttling. Should only get used in critical thermal conditions, fits nice into this interface. D-states are device states, defined in ACPI from D0 (no power savings active, up to D3 (max powersavings active). It's up to the device what it's doing with it. This one does not fit into a generic power_switch_state, because: - you want a connection to the device exported - possibly you want to be smarter than ACPI and tell what exactly a device switches off. I'd leave device specific events out of the discussion for now. > > need an additional field, a pointer to the device, best a sysfs path > > or similar. > Agree on that. As said above I would like to have extra args. > > > Jean, I do not think tracing the S-state with power_start is a > > good idea. Best would be the power_frequency gets renamed > > >(yes, breaks > > userspace, but those have to be adjusted) > BTW I can patch pytimechart. What other tools have to change? powertop ...? That would be great. AFAIK the code use trace/perf events in powertop is not yet released. > > and used for P- and S- > > state tracking (and C-state tracking as well, but this would need > > additional userspace modifications). > OK > > > How do you track when the S-states end? > Two options: 1) have new arguments that indicates enter/exit and a > sub-state; 2) a new event fot the exit. > I am in favor of 1 which is more generic. See above, I'd use: power_switch_state(type=POWER_SSTATE, state=0) I'll post patches to get C-states and S-states making use of a generic: power_switch_state(...) function with still providing old events to not break existing tools. Thomas ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet 2010-09-17 14:24 ` Ingo Molnar 2010-09-17 15:29 ` Thomas Renninger @ 2010-09-17 21:58 ` Thomas Renninger 2010-09-17 22:04 ` Thomas Renninger ` (2 subsequent siblings) 5 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 21:58 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users Some patches for cleanup... compile tested only... Should not break existing user space apps, but they should get converted asap to use power_swtich_state... --- power-trace: Rename power frequency to power_switch_state this interface/function is not intended for frequency changes only, but should get used for any P- (processor frequency), C- (processor sleep), T- (processor throttling), or S- (machine sleep) state. Since it's used in cpufreq.c which must be compiled in and not in acpi-cpufreq.c anymore there is no need to export it for modules. Signed-off-by: Thomas Renninger <trenn@suse.de> --- drivers/cpufreq/cpufreq.c | 1 + include/trace/events/power.h | 7 +++++++ kernel/trace/power-traces.c | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) Index: linux-2.6.35-master/drivers/cpufreq/cpufreq.c =================================================================== --- linux-2.6.35-master.orig/drivers/cpufreq/cpufreq.c +++ linux-2.6.35-master/drivers/cpufreq/cpufreq.c @@ -354,6 +354,7 @@ void cpufreq_notify_transition(struct cp adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); + trace_power_switch_state(POWER_PSTATE, freqs->new, freqs->cpu); trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); Index: linux-2.6.35-master/include/trace/events/power.h =================================================================== --- linux-2.6.35-master.orig/include/trace/events/power.h +++ linux-2.6.35-master/include/trace/events/power.h @@ -38,6 +38,13 @@ DECLARE_EVENT_CLASS(power, (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) ); +DEFINE_EVENT(power, power_switch_state, + + TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), + + TP_ARGS(type, state, cpu_id) +); + DEFINE_EVENT(power, power_start, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), Index: linux-2.6.35-master/kernel/trace/power-traces.c =================================================================== --- linux-2.6.35-master.orig/kernel/trace/power-traces.c +++ linux-2.6.35-master/kernel/trace/power-traces.c @@ -13,5 +13,4 @@ #define CREATE_TRACE_POINTS #include <trace/events/power.h> -EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet ` (2 preceding siblings ...) 2010-09-17 21:58 ` Thomas Renninger @ 2010-09-17 22:04 ` Thomas Renninger 2010-09-17 22:10 ` Thomas Renninger 2010-09-17 22:17 ` Thomas Renninger 5 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 22:04 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users power-trace: Use power_switch_state instead of power_start and power_end No need to have power_start and power_end. power_switch_state of state=0 means we exited power saving state. Userspace has all the information it needs to detect power enter/exit case. Export it, so that intel_idle can make use of it, even if compiled as module. Signed-off-by: Thomas Renninger <trenn@suse.de> --- arch/x86/kernel/process.c | 9 +++++++-- drivers/cpuidle/cpuidle.c | 3 +++ drivers/idle/intel_idle.c | 3 +++ kernel/trace/power-traces.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) Index: linux-2.6.35-master/arch/x86/kernel/process.c =================================================================== --- linux-2.6.35-master.orig/arch/x86/kernel/process.c +++ linux-2.6.35-master/arch/x86/kernel/process.c @@ -375,7 +375,10 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { + /* trace_power_start is deprecated, remove it after a while */ trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 1, smp_processor_id()); + current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -446,6 +449,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); + if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -462,6 +467,8 @@ static void mwait_idle(void) { if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 1, smp_processor_id()); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -482,11 +489,9 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(0); } /* Index: linux-2.6.35-master/drivers/cpuidle/cpuidle.c =================================================================== --- linux-2.6.35-master.orig/drivers/cpuidle/cpuidle.c +++ linux-2.6.35-master/drivers/cpuidle/cpuidle.c @@ -106,7 +106,10 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); + + /* trace_power_end is deprecated, remove it after a while */ trace_power_end(smp_processor_id()); + trace_power_switch_state(POWER_CSTATE, 0, smp_processor_id()); } /** Index: linux-2.6.35-master/drivers/idle/intel_idle.c =================================================================== --- linux-2.6.35-master.orig/drivers/idle/intel_idle.c +++ linux-2.6.35-master/drivers/idle/intel_idle.c @@ -192,8 +192,11 @@ static int intel_idle(struct cpuidle_dev stop_critical_timings(); #ifndef MODULE + /* trace_power_start is deprecated, remove it after a while */ trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); #endif + trace_power_switch_state(POWER_CSTATE, (eax >> 4) + 1, smp_processor_id()); + if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); Index: linux-2.6.35-master/kernel/trace/power-traces.c =================================================================== --- linux-2.6.35-master.orig/kernel/trace/power-traces.c +++ linux-2.6.35-master/kernel/trace/power-traces.c @@ -13,4 +13,4 @@ #define CREATE_TRACE_POINTS #include <trace/events/power.h> - +EXPORT_TRACEPOINT_SYMBOL_GPL(power_switch_state); ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet ` (3 preceding siblings ...) 2010-09-17 22:04 ` Thomas Renninger @ 2010-09-17 22:10 ` Thomas Renninger 2010-09-17 22:17 ` Thomas Renninger 5 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 22:10 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users power-trace: Add x86 ACPI S- (machine sleep) state events. Signed-off-by: Thomas Renninger <trenn@suse.de> --- drivers/acpi/sleep.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) Index: linux-2.6.35-master/drivers/acpi/sleep.c =================================================================== --- linux-2.6.35-master.orig/drivers/acpi/sleep.c +++ linux-2.6.35-master/drivers/acpi/sleep.c @@ -17,6 +17,8 @@ #include <linux/suspend.h> #include <linux/reboot.h> +#include <trace/events/power.h> + #include <asm/io.h> #include <acpi/acpi_bus.h> @@ -249,14 +251,19 @@ static int acpi_suspend_enter(suspend_st unsigned long flags = 0; u32 acpi_state = acpi_target_sleep_state; + trace_power_switch_state(POWER_SSTATE, acpi_state, 0); + ACPI_FLUSH_CPU_CACHE(); /* Do arch specific saving of state. */ if (acpi_state == ACPI_STATE_S3) { int error = acpi_save_state_mem(); - if (error) + if (error) { + trace_power_switch_state(POWER_SSTATE, + ACPI_STATE_S0, 0); return error; + } } local_irq_save(flags); @@ -309,6 +316,8 @@ static int acpi_suspend_enter(suspend_st suspend_nvs_restore(); + trace_power_switch_state(POWER_SSTATE, ACPI_STATE_S0, 0); + return ACPI_SUCCESS(status) ? 0 : -EFAULT; } ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH] tracing, perf: add more power related events 2010-09-17 14:05 ` Jean Pihet ` (4 preceding siblings ...) 2010-09-17 22:10 ` Thomas Renninger @ 2010-09-17 22:17 ` Thomas Renninger 5 siblings, 0 replies; 47+ messages in thread From: Thomas Renninger @ 2010-09-17 22:17 UTC (permalink / raw) To: Jean Pihet Cc: Ingo Molnar, Arjan van de Ven, Peter Zijlstra, Len Brown, arjan, Kevin Hilman, linux-kernel, discuss, linux-pm, linux-omap, linux-perf-users, linux-trace-users DO NOT APPLY THIS ONE!!! The others should go into a mainline tree if Jean is ok with them. This one does not work, due to some include dependencies or whatever else I can't see right now. The idea: Provide old trace power interfaces via .config option to not break existing perf/powertop/.. binaries. Not sure whether it's worth looking at it further. It shouldn't be needed... Thomas --- include/trace/events/power.h | 11 +++++++++++ kernel/trace/Kconfig | 4 ++++ 2 files changed, 15 insertions(+) Index: linux-2.6.35-master/kernel/trace/Kconfig =================================================================== --- linux-2.6.35-master.orig/kernel/trace/Kconfig +++ linux-2.6.35-master/kernel/trace/Kconfig @@ -64,6 +64,10 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config DEPRECATED_POWER_EVENT_TRACING + bool + default n + config CONTEXT_SWITCH_TRACER bool Index: linux-2.6.35-master/include/trace/events/power.h =================================================================== --- linux-2.6.35-master.orig/include/trace/events/power.h +++ linux-2.6.35-master/include/trace/events/power.h @@ -50,6 +50,7 @@ DEFINE_EVENT(power, power_switch_state, TP_ARGS(type, state, cpu_id) ); +#ifdef CONFIG_DEPRECATED_POWER_EVENT_TRACING DEFINE_EVENT(power, power_start, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), @@ -81,6 +82,16 @@ TRACE_EVENT(power_end, TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id) ); +#else +inline void trace_power_end(unsigned int cpu_id) +{} +inline void trace_power_start(unsigned int type, unsigned int state, + unsigned int cpu_id) +{} +inline void trace_power_frequency(unsigned int type, unsigned int state, + unsigned int cpu_id) +{} +#endif /* * The clock events are used for clock enable/disable and for ^ permalink raw reply [flat|nested] 47+ messages in thread
end of thread, other threads:[~2010-10-04 17:55 UTC | newest] Thread overview: 47+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-09-07 7:21 [PATCH] tracing, perf: add more power related events Jean Pihet 2010-09-07 8:01 ` Jean Pihet 2010-09-08 6:53 ` Ingo Molnar 2010-09-09 7:15 ` Jean Pihet 2010-09-09 7:54 ` Ingo Molnar 2010-09-15 15:45 ` Jean Pihet 2010-09-17 13:08 ` Thomas Renninger 2010-09-17 14:05 ` Jean Pihet 2010-09-17 14:24 ` Ingo Molnar 2010-09-17 15:36 ` Thomas Renninger 2010-09-17 16:24 ` Ingo Molnar 2010-09-17 22:26 ` Thomas Renninger 2010-09-22 15:31 ` Jean Pihet 2010-09-22 15:33 ` Arjan van de Ven 2010-09-22 15:36 ` Jean Pihet 2010-09-22 16:32 ` Arjan van de Ven 2010-09-22 16:43 ` Peter Zijlstra 2010-09-22 17:06 ` Arjan van de Ven 2010-09-22 17:30 ` Peter Zijlstra 2010-09-22 18:15 ` Steven Rostedt 2010-09-22 18:23 ` Ingo Molnar 2010-09-22 18:30 ` Peter Zijlstra 2010-09-22 18:26 ` Peter Zijlstra 2010-09-22 18:36 ` Steven Rostedt 2010-09-22 18:43 ` Peter Zijlstra 2010-09-22 19:14 ` Frank Ch. Eigler 2010-09-22 17:36 ` Thomas Renninger 2010-09-22 16:47 ` Peter Zijlstra 2010-09-22 17:57 ` Thomas Renninger 2010-09-22 18:13 ` Arjan van de Ven 2010-09-22 18:34 ` Thomas Renninger 2010-09-22 18:09 ` Rafael J. Wysocki 2010-09-22 18:49 ` Rafael J. Wysocki 2010-09-28 8:35 ` Jean Pihet 2010-09-28 21:22 ` Rafael J. Wysocki 2010-09-28 21:43 ` Jean Pihet 2010-09-28 22:05 ` Rafael J. Wysocki 2010-09-28 21:45 ` Arjan van de Ven 2010-09-28 22:05 ` Rafael J. Wysocki 2010-09-29 7:49 ` Thomas Renninger 2010-09-29 9:25 ` Jean Pihet 2010-10-04 17:55 ` [linux-pm] " Pavel Machek 2010-09-17 15:29 ` Thomas Renninger 2010-09-17 21:58 ` Thomas Renninger 2010-09-17 22:04 ` Thomas Renninger 2010-09-17 22:10 ` Thomas Renninger 2010-09-17 22:17 ` Thomas Renninger
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).