public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
@ 2009-11-06 16:27 Alexander Miller
  2009-11-10 19:42 ` Pallipadi, Venkatesh
       [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
  0 siblings, 2 replies; 6+ messages in thread
From: Alexander Miller @ 2009-11-06 16:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: cpufreq, Dave Jones

Remove conversion of nice load to microseconds which caused addition
of times measured in different units and thus unreasonable behaviour
with both governors.                                                  

Signed-off-by: Alexander Miller <Miller@fmi.uni-stuttgart.de>
---
diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c linux/drivers/cpufreq/cpufreq_conservative.c
--- linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c	2009-10-10 15:56:58.010595257 +0200
+++ linux/drivers/cpufreq/cpufreq_conservative.c	2009-10-10 20:56:52.194598889 +0200
@@ -400,20 +400,10 @@ static void dbs_check_cpu(struct cpu_dbs
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
-			unsigned long cur_nice_jiffies;
-
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+			idle_time += (unsigned int) cputime64_sub(
+					 kstat_cpu(j).cpustat.nice,
 					 j_dbs_info->prev_cpu_nice);
-			/*
-			 * Assumption: nice time between sampling periods will
-			 * be less than 2^32 jiffies for 32 bit sys
-			 */
-			cur_nice_jiffies = (unsigned long)
-					cputime64_to_jiffies64(cur_nice);
-
 			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
 		if (unlikely(!wall_time || wall_time < idle_time))
diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c linux/drivers/cpufreq/cpufreq_ondemand.c
--- linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c	2009-10-10 16:00:40.380595816 +0200
+++ linux/drivers/cpufreq/cpufreq_ondemand.c	2009-10-10 20:56:52.195596654 +0200
@@ -488,20 +488,10 @@ static void dbs_check_cpu(struct cpu_dbs
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
-			unsigned long cur_nice_jiffies;
-
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+			idle_time += (unsigned int) cputime64_sub(
+					 kstat_cpu(j).cpustat.nice,
 					 j_dbs_info->prev_cpu_nice);
-			/*
-			 * Assumption: nice time between sampling periods will
-			 * be less than 2^32 jiffies for 32 bit sys
-			 */
-			cur_nice_jiffies = (unsigned long)
-					cputime64_to_jiffies64(cur_nice);
-
 			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
 		if (unlikely(!wall_time || wall_time < idle_time))

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
  2009-11-06 16:27 [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load Alexander Miller
@ 2009-11-10 19:42 ` Pallipadi, Venkatesh
       [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
  1 sibling, 0 replies; 6+ messages in thread
From: Pallipadi, Venkatesh @ 2009-11-10 19:42 UTC (permalink / raw)
  To: Alexander Miller, linux-kernel@vger.kernel.org
  Cc: cpufreq@vger.kernel.org, Dave Jones

>-----Original Message-----
>From: cpufreq-owner@vger.kernel.org 
>[mailto:cpufreq-owner@vger.kernel.org] On Behalf Of Alexander Miller
>Sent: Friday, November 06, 2009 8:27 AM
>To: linux-kernel@vger.kernel.org
>Cc: cpufreq@vger.kernel.org; Dave Jones
>Subject: [PATCH] cpufreq: fix conservative/ondemand behaviour 
>with ignore_nice_load
>
>Remove conversion of nice load to microseconds which caused addition
>of times measured in different units and thus unreasonable behaviour
>with both governors.                                                  

Can you describe the "unresonable behavior" you are seeing. Is it
with NO_HZ enabled or disabled?

I see there can be a problem with this code when NO_HZ is disabled.
But, the patch below is not the right solution as it will result in
Adding times in different units with NO_HZ enabled.

Thanks,
Venki

>
>Signed-off-by: Alexander Miller <Miller@fmi.uni-stuttgart.de>
>---
>diff -uprN 
>linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c 
>linux/drivers/cpufreq/cpufreq_conservative.c
>--- linux-2.6.git/drivers/cpufreq/cpufreq_conservative.c	
>2009-10-10 15:56:58.010595257 +0200
>+++ linux/drivers/cpufreq/cpufreq_conservative.c	
>2009-10-10 20:56:52.194598889 +0200
>@@ -400,20 +400,10 @@ static void dbs_check_cpu(struct cpu_dbs
> 		j_dbs_info->prev_cpu_idle = cur_idle_time;
> 
> 		if (dbs_tuners_ins.ignore_nice) {
>-			cputime64_t cur_nice;
>-			unsigned long cur_nice_jiffies;
>-
>-			cur_nice = 
>cputime64_sub(kstat_cpu(j).cpustat.nice,
>+			idle_time += (unsigned int) cputime64_sub(
>+					 kstat_cpu(j).cpustat.nice,
> 					 j_dbs_info->prev_cpu_nice);
>-			/*
>-			 * Assumption: nice time between 
>sampling periods will
>-			 * be less than 2^32 jiffies for 32 bit sys
>-			 */
>-			cur_nice_jiffies = (unsigned long)
>-					
>cputime64_to_jiffies64(cur_nice);
>-
> 			j_dbs_info->prev_cpu_nice = 
>kstat_cpu(j).cpustat.nice;
>-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
> 		}
> 
> 		if (unlikely(!wall_time || wall_time < idle_time))
>diff -uprN linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c 
>linux/drivers/cpufreq/cpufreq_ondemand.c
>--- linux-2.6.git/drivers/cpufreq/cpufreq_ondemand.c	
>2009-10-10 16:00:40.380595816 +0200
>+++ linux/drivers/cpufreq/cpufreq_ondemand.c	2009-10-10 
>20:56:52.195596654 +0200
>@@ -488,20 +488,10 @@ static void dbs_check_cpu(struct cpu_dbs
> 		j_dbs_info->prev_cpu_idle = cur_idle_time;
> 
> 		if (dbs_tuners_ins.ignore_nice) {
>-			cputime64_t cur_nice;
>-			unsigned long cur_nice_jiffies;
>-
>-			cur_nice = 
>cputime64_sub(kstat_cpu(j).cpustat.nice,
>+			idle_time += (unsigned int) cputime64_sub(
>+					 kstat_cpu(j).cpustat.nice,
> 					 j_dbs_info->prev_cpu_nice);
>-			/*
>-			 * Assumption: nice time between 
>sampling periods will
>-			 * be less than 2^32 jiffies for 32 bit sys
>-			 */
>-			cur_nice_jiffies = (unsigned long)
>-					
>cputime64_to_jiffies64(cur_nice);
>-
> 			j_dbs_info->prev_cpu_nice = 
>kstat_cpu(j).cpustat.nice;
>-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
> 		}
> 
> 		if (unlikely(!wall_time || wall_time < idle_time))
>--
>To unsubscribe from this list: send the line "unsubscribe cpufreq" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
       [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
@ 2009-11-10 22:20   ` Pallipadi, Venkatesh
  2009-11-11 22:10     ` Alexander Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Pallipadi, Venkatesh @ 2009-11-10 22:20 UTC (permalink / raw)
  To: Alexander Miller
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones

On Tue, Nov 10, 2009 at 11:42:02AM -0800, Pallipadi, Venkatesh wrote:
> >-----Original Message-----
> >From: cpufreq-owner@vger.kernel.org 
> >[mailto:cpufreq-owner@vger.kernel.org] On Behalf Of Alexander Miller
> >Sent: Friday, November 06, 2009 8:27 AM
> >To: linux-kernel@vger.kernel.org
> >Cc: cpufreq@vger.kernel.org; Dave Jones
> >Subject: [PATCH] cpufreq: fix conservative/ondemand behaviour 
> >with ignore_nice_load
> >
> >Remove conversion of nice load to microseconds which caused addition
> >of times measured in different units and thus unreasonable behaviour
> >with both governors.                                                  
> 
> Can you describe the "unresonable behavior" you are seeing. Is it
> with NO_HZ enabled or disabled?
> 
> I see there can be a problem with this code when NO_HZ is disabled.
> But, the patch below is not the right solution as it will result in
> Adding times in different units with NO_HZ enabled.
> 
> Thanks,
> Venki

Does the below test patch (only compile tested) resolve the problem you
are seeing?

Thanks,
Venki

---
 drivers/cpufreq/cpufreq_conservative.c |    4 ++--
 drivers/cpufreq/cpufreq_ondemand.c     |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index bc33ddc..c7b081b 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = cur_wall_time;
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
-	return idle_time;
+	return (cputime64_t)jiffies_to_usecs(idle_time);;
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 071699d..4b34ade 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = cur_wall_time;
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
-	return idle_time;
+	return (cputime64_t)jiffies_to_usecs(idle_time);
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
  2009-11-10 22:20   ` Pallipadi, Venkatesh
@ 2009-11-11 22:10     ` Alexander Miller
  2009-11-12  0:20       ` Pallipadi, Venkatesh
  2009-11-12  0:50       ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh
  0 siblings, 2 replies; 6+ messages in thread
From: Alexander Miller @ 2009-11-11 22:10 UTC (permalink / raw)
  To: Pallipadi, Venkatesh
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones

On Tue, Nov 10, 2009, Pallipadi, Venkatesh wrote:
> > Can you describe the "unresonable behavior" you are seeing. Is it
> > with NO_HZ enabled or disabled?
$ zgrep NO_HZ /proc/config.gz
# CONFIG_NO_HZ is not set

When there are two cpu-intense processes, one with nice 19 and the other
with nice 0, then the latter will use almost 100% cpu time, of course.
But the cpu has been stuck at the lowest frequency without the patch.
To be exact, it would change the freq sometimes, but return to the
lowest freq within a fraction of a second.
I would expect it to select a freq such that the non-nice processes
take <80% or the highest freq (which it does with the patch).

> > I see there can be a problem with this code when NO_HZ is disabled.
> > But, the patch below is not the right solution as it will result in
> > Adding times in different units with NO_HZ enabled.

Yes, you are right. Looks like I've patched the wrong half of the
inconsistency :-(
I think it's a bit irritating you are using cputime64_t to store
microseconds. At least it fooled me (I'm no kernel guy though) into
thinking that get_cpu_idle_time_jiffy() returning jiffies was the
intended behaviour.

> Does the below test patch (only compile tested) resolve the problem you
> are seeing?

I've just rebooted the machine with the new patched kernel, and
it looks good.

Thank you,
Alex

> ---
>  drivers/cpufreq/cpufreq_conservative.c |    4 ++--
>  drivers/cpufreq/cpufreq_ondemand.c     |    4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
> index bc33ddc..c7b081b 100644
> --- a/drivers/cpufreq/cpufreq_conservative.c
> +++ b/drivers/cpufreq/cpufreq_conservative.c
> @@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
>  
>  	idle_time = cputime64_sub(cur_wall_time, busy_time);
>  	if (wall)
> -		*wall = cur_wall_time;
> +		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
>  
> -	return idle_time;
> +	return (cputime64_t)jiffies_to_usecs(idle_time);;
>  }
>  
>  static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
> index 071699d..4b34ade 100644
> --- a/drivers/cpufreq/cpufreq_ondemand.c
> +++ b/drivers/cpufreq/cpufreq_ondemand.c
> @@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
>  
>  	idle_time = cputime64_sub(cur_wall_time, busy_time);
>  	if (wall)
> -		*wall = cur_wall_time;
> +		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
>  
> -	return idle_time;
> +	return (cputime64_t)jiffies_to_usecs(idle_time);
>  }
>  
>  static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
> -- 
> 1.6.0.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load
  2009-11-11 22:10     ` Alexander Miller
@ 2009-11-12  0:20       ` Pallipadi, Venkatesh
  2009-11-12  0:50       ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh
  1 sibling, 0 replies; 6+ messages in thread
From: Pallipadi, Venkatesh @ 2009-11-12  0:20 UTC (permalink / raw)
  To: Alexander Miller
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org, Dave Jones

On Wed, 2009-11-11 at 14:10 -0800, Alexander Miller wrote:
> On Tue, Nov 10, 2009, Pallipadi, Venkatesh wrote:
> > > Can you describe the "unresonable behavior" you are seeing. Is it
> > > with NO_HZ enabled or disabled?
> $ zgrep NO_HZ /proc/config.gz
> # CONFIG_NO_HZ is not set
> 
> When there are two cpu-intense processes, one with nice 19 and the other
> with nice 0, then the latter will use almost 100% cpu time, of course.
> But the cpu has been stuck at the lowest frequency without the patch.
> To be exact, it would change the freq sometimes, but return to the
> lowest freq within a fraction of a second.
> I would expect it to select a freq such that the non-nice processes
> take <80% or the highest freq (which it does with the patch).
> 
> > > I see there can be a problem with this code when NO_HZ is disabled.
> > > But, the patch below is not the right solution as it will result in
> > > Adding times in different units with NO_HZ enabled.
> 
> Yes, you are right. Looks like I've patched the wrong half of the
> inconsistency :-(
> I think it's a bit irritating you are using cputime64_t to store
> microseconds. At least it fooled me (I'm no kernel guy though) into
> thinking that get_cpu_idle_time_jiffy() returning jiffies was the
> intended behaviour.

Agreed. That cputime64 is ugly. There is also some confusion with naming
of get_cpu_idle_time_jiffy and get_cpu_idle_time_us and these routines
being redundantly repeated across 2 files. Will add them to my todo
queue.

> 
> > Does the below test patch (only compile tested) resolve the problem you
> > are seeing?
> 
> I've just rebooted the machine with the new patched kernel, and
> it looks good.

Ok. Thanks for reporting (and diagnosing as well :)) and verifying the
patch. I will resend the patch with a bit more description and your
Reported/Tested-by.

Thanks,
Venki



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Resolve time unit thinko in ondemand/conservative govs
  2009-11-11 22:10     ` Alexander Miller
  2009-11-12  0:20       ` Pallipadi, Venkatesh
@ 2009-11-12  0:50       ` Pallipadi, Venkatesh
  1 sibling, 0 replies; 6+ messages in thread
From: Pallipadi, Venkatesh @ 2009-11-12  0:50 UTC (permalink / raw)
  To: Dave Jones
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org,
	Alexander Miller


Dave,

Here is the fix for the bug reported on this thread. Please Apply. Looks to
be a stable candidate as well.

Thanks,
Venki

ondemand and conservative governors are messing up time units in the
code path where NO_HZ is not enabled and ignore_nice is set. The walltime
idletime stored is in jiffies and nice time calculation is happening in
microseconds.

The problem was reported and diagnosed by Alexander here.
http://marc.info/?l=linux-kernel&m=125752550404513&w=2

The patch below fixes this thinko.

Reported-by: Alexander Miller <Miller@fmi.uni-stuttgart.de>
Tested-by: Alexander Miller <Miller@fmi.uni-stuttgart.de>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c |    4 ++--
 drivers/cpufreq/cpufreq_ondemand.c     |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index bc33ddc..c7b081b 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -116,9 +116,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = cur_wall_time;
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
-	return idle_time;
+	return (cputime64_t)jiffies_to_usecs(idle_time);;
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 071699d..4b34ade 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -133,9 +133,9 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = cur_wall_time;
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
-	return idle_time;
+	return (cputime64_t)jiffies_to_usecs(idle_time);
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-11-12  0:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-06 16:27 [PATCH] cpufreq: fix conservative/ondemand behaviour with ignore_nice_load Alexander Miller
2009-11-10 19:42 ` Pallipadi, Venkatesh
     [not found] ` <B5B0CFF685D7DF46A05CF1678CFB42ED20E0C63D@orsmsx505.amr.corp.intel.com>
2009-11-10 22:20   ` Pallipadi, Venkatesh
2009-11-11 22:10     ` Alexander Miller
2009-11-12  0:20       ` Pallipadi, Venkatesh
2009-11-12  0:50       ` [PATCH] Resolve time unit thinko in ondemand/conservative govs Pallipadi, Venkatesh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox