* [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
@ 2009-11-06 16:27 Alexander Miller
2009-11-10 19:42 ` Pallipadi, Venkatesh
[not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
0 siblings, 2 replies; 6+ messages in thread
From: Alexander Miller @ 2009-11-06 16:27 UTC (permalink / raw)
To: linux-kernel; +Cc: cpufreq, Dave Jones
Remove conversion of nice load to microseconds which caused addition
of times measured in different units and thus unreasonable behaviour
with both governors.
Signed-off-by: Alexander Miller <Miller@fmi.uni-stuttgart.de>
---
diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c linux/drivers/cpufreq/cpufreq_conservative.c
--- linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c 2009-10-10 15:56:58.010595257 +0200
+++ linux/drivers/cpufreq/cpufreq_conservative.c 2009-10-10 20:56:52.194598889 +0200
@@ -400,20 +400,10 @@ static void dbs_check_cpu(struct cpu_dbs
j_dbs_info->prev_cpu_idle = cur_idle_time;
if (dbs_tuners_ins.ignore_nice) {
- cputime64_t cur_nice;
- unsigned long cur_nice_jiffies;
-
- cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ idle_time += (unsigned int) cputime64_sub(
+ kstat_cpu(j).cpustat.nice,
j_dbs_info->prev_cpu_nice);
- /*
- * Assumption: nice time between sampling periods will
- * be less than 2^32 jiffies for 32 bit sys
- */
- cur_nice_jiffies = (unsigned long)
- cputime64_to_jiffies64(cur_nice);
-
j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
- idle_time += jiffies_to_usecs(cur_nice_jiffies);
}
if (unlikely(!wall_time || wall_time < idle_time))
diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c linux/drivers/cpufreq/cpufreq_ondemand.c
--- linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c 2009-10-10 16:00:40.380595816 +0200
+++ linux/drivers/cpufreq/cpufreq_ondemand.c 2009-10-10 20:56:52.195596654 +0200
@@ -488,20 +488,10 @@ static void dbs_check_cpu(struct cpu_dbs
j_dbs_info->prev_cpu_idle = cur_idle_time;
if (dbs_tuners_ins.ignore_nice) {
- cputime64_t cur_nice;
- unsigned long cur_nice_jiffies;
-
- cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ idle_time += (unsigned int) cputime64_sub(
+ kstat_cpu(j).cpustat.nice,
j_dbs_info->prev_cpu_nice);
- /*
- * Assumption: nice time between sampling periods will
- * be less than 2^32 jiffies for 32 bit sys
- */
- cur_nice_jiffies = (unsigned long)
- cputime64_to_jiffies64(cur_nice);
-
j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
- idle_time += jiffies_to_usecs(cur_nice_jiffies);
}
if (unlikely(!wall_time || wall_time < idle_time))
^ permalink raw reply [flat|nested] 6+ messages in thread* RE: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load 2009-11-06 16:27 [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load Alexander Miller @ 2009-11-10 19:42 ` Pallipadi, Venkatesh [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com> 1 sibling, 0 replies; 6+ messages in thread From: Pallipadi, Venkatesh @ 2009-11-10 19:42 UTC (permalink / raw) To: Alexander Miller, linux-kernel@vger.kernel.org Cc: cpufreq@vger.kernel.org, Dave Jones >-----Original Message----- >From: cpufreq-owner@vger.kernel.org >[mailto:cpufreq-owner@vger.kernel.org] On Behalf Of Alexander Miller >Sent: Friday, November 06, 2009 8:27 AM >To: linux-kernel@vger.kernel.org >Cc: cpufreq@vger.kernel.org; Dave Jones >Subject: [PATCH] cpufreq: fix conservative/ondemand behaviour >with ignore_nice_load > >Remove conversion of nice load to microseconds which caused addition >of times measured in different units and thus unreasonable behaviour >with both governors. Can you describe the "unresonable behavior" you are seeing. Is it with NO_HZ enabled or disabled? I see there can be a problem with this code when NO_HZ is disabled. But, the patch below is not the right solution as it will result in Adding times in different units with NO_HZ enabled. Thanks, Venki > >Signed-off-by: Alexander Miller <Miller@fmi.uni-stuttgart.de> >--- >diff -uprN >linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c >linux/drivers/cpufreq/cpufreq_conservative.c >--- linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c >2009-10-10 15:56:58.010595257 +0200 >+++ linux/drivers/cpufreq/cpufreq_conservative.c >2009-10-10 20:56:52.194598889 +0200 >@@ -400,20 +400,10 @@ static void dbs_check_cpu(struct cpu_dbs > j_dbs_info->prev_cpu_idle = cur_idle_time; > > if (dbs_tuners_ins.ignore_nice) { >- cputime64_t cur_nice; >- unsigned long cur_nice_jiffies; >- >- cur_nice = >cputime64_sub(kstat_cpu(j).cpustat.nice, >+ idle_time += (unsigned int) cputime64_sub( >+ kstat_cpu(j).cpustat.nice, > j_dbs_info->prev_cpu_nice); >- /* >- * Assumption: nice time between >sampling periods will >- * be less than 2^32 jiffies for 32 bit sys >- */ >- cur_nice_jiffies = (unsigned long) >- >cputime64_to_jiffies64(cur_nice); >- > j_dbs_info->prev_cpu_nice = >kstat_cpu(j).cpustat.nice; >- idle_time += jiffies_to_usecs(cur_nice_jiffies); > } > > if (unlikely(!wall_time || wall_time < idle_time)) >diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c >linux/drivers/cpufreq/cpufreq_ondemand.c >--- linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c >2009-10-10 16:00:40.380595816 +0200 >+++ linux/drivers/cpufreq/cpufreq_ondemand.c 2009-10-10 >20:56:52.195596654 +0200 >@@ -488,20 +488,10 @@ static void dbs_check_cpu(struct cpu_dbs > j_dbs_info->prev_cpu_idle = cur_idle_time; > > if (dbs_tuners_ins.ignore_nice) { >- cputime64_t cur_nice; >- unsigned long cur_nice_jiffies; >- >- cur_nice = >cputime64_sub(kstat_cpu(j).cpustat.nice, >+ idle_time += (unsigned int) cputime64_sub( >+ kstat_cpu(j).cpustat.nice, > j_dbs_info->prev_cpu_nice); >- /* >- * Assumption: nice time between >sampling periods will >- * be less than 2^32 jiffies for 32 bit sys >- */ >- cur_nice_jiffies = (unsigned long) >- >cputime64_to_jiffies64(cur_nice); >- > j_dbs_info->prev_cpu_nice = >kstat_cpu(j).cpustat.nice; >- idle_time += jiffies_to_usecs(cur_nice_jiffies); > } > > if (unlikely(!wall_time || wall_time < idle_time)) >-- >To unsubscribe from this list: send the line "unsubscribe cpufreq" in >the body of a message to majordomo@vger.kernel.org >More majordomo info at http://vger.kernel.org/majordomo-info.html > ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>]
* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com> @ 2009-11-10 22:20 ` Pallipadi, Venkatesh 2009-11-11 22:10 ` Alexander Miller 0 siblings, 1 reply; 6+ messages in thread From: Pallipadi, Venkatesh @ 2009-11-10 22:20 UTC (permalink / raw) To: Alexander Miller Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones On Tue, Nov 10, 2009 at 11:42:02AM -0800, Pallipadi, Venkatesh wrote: > >-----Original Message----- > >From: cpufreq-owner@vger.kernel.org > >[mailto:cpufreq-owner@vger.kernel.org] On Behalf Of Alexander Miller > >Sent: Friday, November 06, 2009 8:27 AM > >To: linux-kernel@vger.kernel.org > >Cc: cpufreq@vger.kernel.org; Dave Jones > >Subject: [PATCH] cpufreq: fix conservative/ondemand behaviour > >with ignore_nice_load > > > >Remove conversion of nice load to microseconds which caused addition > >of times measured in different units and thus unreasonable behaviour > >with both governors. > > Can you describe the "unresonable behavior" you are seeing. Is it > with NO_HZ enabled or disabled? > > I see there can be a problem with this code when NO_HZ is disabled. > But, the patch below is not the right solution as it will result in > Adding times in different units with NO_HZ enabled. > > Thanks, > Venki Does the below test patch (only compile tested) resolve the problem you are seeing? Thanks, Venki --- drivers/cpufreq/cpufreq_conservative.c | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index bc33ddc..c7b081b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) - *wall = cur_wall_time; + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); - return idle_time; + return (cputime64_t)jiffies_to_usecs(idle_time);; } static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 071699d..4b34ade 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) - *wall = cur_wall_time; + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); - return idle_time; + return (cputime64_t)jiffies_to_usecs(idle_time); } static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load 2009-11-10 22:20 ` Pallipadi, Venkatesh @ 2009-11-11 22:10 ` Alexander Miller 2009-11-12 0:20 ` Pallipadi, Venkatesh 2009-11-12 0:50 ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh 0 siblings, 2 replies; 6+ messages in thread From: Alexander Miller @ 2009-11-11 22:10 UTC (permalink / raw) To: Pallipadi, Venkatesh Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones On Tue, Nov 10, 2009, Pallipadi, Venkatesh wrote: > > Can you describe the "unresonable behavior" you are seeing. Is it > > with NO_HZ enabled or disabled? $ zgrep NO_HZ /proc/config.gz # CONFIG_NO_HZ is not set When there are two cpu-intense processes, one with nice 19 and the other with nice 0, then the latter will use almost 100% cpu time, of course. But the cpu has been stuck at the lowest frequency without the patch. To be exact, it would change the freq sometimes, but return to the lowest freq within a fraction of a second. I would expect it to select a freq such that the non-nice processes take <80% or the highest freq (which it does with the patch). > > I see there can be a problem with this code when NO_HZ is disabled. > > But, the patch below is not the right solution as it will result in > > Adding times in different units with NO_HZ enabled. Yes, you are right. Looks like I've patched the wrong half of the inconsistency :-( I think it's a bit irritating you are using cputime64_t to store microseconds. At least it fooled me (I'm no kernel guy though) into thinking that get_cpu_idle_time_jiffy() returning jiffies was the intended behaviour. > Does the below test patch (only compile tested) resolve the problem you > are seeing? I've just rebooted the machine with the new patched kernel, and it looks good. Thank you, Alex > --- > drivers/cpufreq/cpufreq_conservative.c | 4 ++-- > drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c > index bc33ddc..c7b081b 100644 > --- a/drivers/cpufreq/cpufreq_conservative.c > +++ b/drivers/cpufreq/cpufreq_conservative.c > @@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, > > idle_time = cputime64_sub(cur_wall_time, busy_time); > if (wall) > - *wall = cur_wall_time; > + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); > > - return idle_time; > + return (cputime64_t)jiffies_to_usecs(idle_time);; > } > > static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) > diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c > index 071699d..4b34ade 100644 > --- a/drivers/cpufreq/cpufreq_ondemand.c > +++ b/drivers/cpufreq/cpufreq_ondemand.c > @@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, > > idle_time = cputime64_sub(cur_wall_time, busy_time); > if (wall) > - *wall = cur_wall_time; > + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); > > - return idle_time; > + return (cputime64_t)jiffies_to_usecs(idle_time); > } > > static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) > -- > 1.6.0.6 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load 2009-11-11 22:10 ` Alexander Miller @ 2009-11-12 0:20 ` Pallipadi, Venkatesh 2009-11-12 0:50 ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh 1 sibling, 0 replies; 6+ messages in thread From: Pallipadi, Venkatesh @ 2009-11-12 0:20 UTC (permalink / raw) To: Alexander Miller Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones On Wed, 2009-11-11 at 14:10 -0800, Alexander Miller wrote: > On Tue, Nov 10, 2009, Pallipadi, Venkatesh wrote: > > > Can you describe the "unresonable behavior" you are seeing. Is it > > > with NO_HZ enabled or disabled? > $ zgrep NO_HZ /proc/config.gz > # CONFIG_NO_HZ is not set > > When there are two cpu-intense processes, one with nice 19 and the other > with nice 0, then the latter will use almost 100% cpu time, of course. > But the cpu has been stuck at the lowest frequency without the patch. > To be exact, it would change the freq sometimes, but return to the > lowest freq within a fraction of a second. > I would expect it to select a freq such that the non-nice processes > take <80% or the highest freq (which it does with the patch). > > > > I see there can be a problem with this code when NO_HZ is disabled. > > > But, the patch below is not the right solution as it will result in > > > Adding times in different units with NO_HZ enabled. > > Yes, you are right. Looks like I've patched the wrong half of the > inconsistency :-( > I think it's a bit irritating you are using cputime64_t to store > microseconds. At least it fooled me (I'm no kernel guy though) into > thinking that get_cpu_idle_time_jiffy() returning jiffies was the > intended behaviour. Agreed. That cputime64 is ugly. There is also some confusion with naming of get_cpu_idle_time_jiffy and get_cpu_idle_time_us and these routines being redundantly repeated across 2 files. Will add them to my todo queue. > > > Does the below test patch (only compile tested) resolve the problem you > > are seeing? > > I've just rebooted the machine with the new patched kernel, and > it looks good. Ok. Thanks for reporting (and diagnosing as well :)) and verifying the patch. I will resend the patch with a bit more description and your Reported/Tested-by. Thanks, Venki ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] Resolve time unit thinko in ondemand/conservative govs 2009-11-11 22:10 ` Alexander Miller 2009-11-12 0:20 ` Pallipadi, Venkatesh @ 2009-11-12 0:50 ` Pallipadi, Venkatesh 1 sibling, 0 replies; 6+ messages in thread From: Pallipadi, Venkatesh @ 2009-11-12 0:50 UTC (permalink / raw) To: Dave Jones Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Alexander Miller Dave, Here is the fix for the bug reported on this thread. Please Apply. Looks to be a stable candidate as well. Thanks, Venki ondemand and conservative governors are messing up time units in the code path where NO_HZ is not enabled and ignore_nice is set. The walltime idletime stored is in jiffies and nice time calculation is happening in microseconds. The problem was reported and diagnosed by Alexander here. http://marc.info/?l=linux-kernel&m=125752550404513&w=2 The patch below fixes this thinko. Reported-by: Alexander Miller <Miller@fmi.uni-stuttgart.de> Tested-by: Alexander Miller <Miller@fmi.uni-stuttgart.de> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> --- drivers/cpufreq/cpufreq_conservative.c | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index bc33ddc..c7b081b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) - *wall = cur_wall_time; + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); - return idle_time; + return (cputime64_t)jiffies_to_usecs(idle_time);; } static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 071699d..4b34ade 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) - *wall = cur_wall_time; + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); - return idle_time; + return (cputime64_t)jiffies_to_usecs(idle_time); } static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-11-12 0:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-06 16:27 [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load Alexander Miller
2009-11-10 19:42 ` Pallipadi, Venkatesh
[not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
2009-11-10 22:20 ` Pallipadi, Venkatesh
2009-11-11 22:10 ` Alexander Miller
2009-11-12 0:20 ` Pallipadi, Venkatesh
2009-11-12 0:50 ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox