* Possible CPUFreq governor
@ 2005-04-26 23:39 Mark Bidewell
2005-04-27 10:53 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-26 23:39 UTC (permalink / raw)
To: cpufreq
[-- Attachment #1: Type: text/plain, Size: 350 bytes --]
I am attaching a Linux kernel patch which supplies a new CPUFreq governor
for the CPUFreq interface. It allows a user or daemon to reduce the
thermal output of a mobile processor by reducing the frequency at which
non-interactive processes are run. It has been extensively tested on the
2.6.11.x kernels on a P4-M 3.2 GHz machine.
Mark Bidewell
[-- Attachment #2: tempscale-2.6.11-sysfs.patch --]
[-- Type: application/octet-stream, Size: 8945 bytes --]
diff -Nur linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c
--- linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c 1969-12-31 19:00:00.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c 2005-04-06 22:56:36.000000000 -0400
@@ -0,0 +1,199 @@
+/*
+ * linux/drivers/cpufreq/cpufreq_tempscale.c
+ *
+ * Copyright (C) 2005 Mark Bidewell <mark.bidewell@alumni.clemson.edu
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+
+#define define_one_ro(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name,store_##_name)
+
+#define TRACK_NUM 5000
+
+void set_proc_stats(struct task_struct *t);
+
+struct task_param{
+ unsigned int pid;
+ unsigned int speed;
+};
+
+unsigned compute_spd=0;
+struct cpufreq_policy *cpupolicy;
+static char enabled=0;
+
+
+
+void scale_proc_freq(struct task_struct *t, int tsk_intr)
+{
+ if(!enabled) return;
+ if(!cpupolicy) return;
+ if(t->prev_state & TASK_UNINTERRUPTIBLE){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ return;
+ }
+ if(!tsk_intr){
+ if(compute_spd && cpupolicy->cur != compute_spd){
+ __cpufreq_driver_target(cpupolicy, compute_spd, CPUFREQ_RELATION_H);
+ compute_spd = cpupolicy->cur;
+ }
+ }
+ else {
+ if(cpupolicy->cur <= compute_spd){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ }
+ }
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ set_proc_stats(t);
+ #endif
+ return;
+}
+
+/*
+ * SysFS tuning knobs and information
+ */
+
+static ssize_t show_computebound_setspeed(struct cpufreq_policy *unused, char *buf)
+{
+ return sprintf (buf, "%u\n", compute_spd);
+}
+static ssize_t store_computebound_setspeed(struct cpufreq_policy *unused, const char *buf, size_t count)
+{
+ sscanf(buf, "%u", &compute_spd);
+ return count;
+}
+
+#ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+
+ struct task_param tasks[TRACK_NUM];
+ unsigned int curtask=0;
+
+ void set_proc_stats(struct task_struct *t)
+ {
+ struct task_param *tsk = &tasks[curtask];
+ curtask = (curtask+1) % TRACK_NUM;
+ tsk->pid = t->pid;
+ tsk->speed = cpufreq_get(0);
+ }
+
+ static ssize_t show_tempscale_stats(struct cpufreq_policy *unused, char *buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+
+ for(i=0; i < numtasks; i++){
+ if(tasks[i].speed >= maxspeed){
+ maxspeed = tasks[i].speed;
+ nummax++;
+ }
+ if(tasks[i].speed <= minspeed){
+ minspeed = tasks[i].speed;
+ nummin++;
+ }
+ }
+ curtask=0;
+ sz = sprintf(buf, "Number of tasks: %u\n", numtasks);
+ sz += sprintf(buf, "%sMax speed: %u\n", buf, maxspeed);
+ sz += sprintf(buf, "%sMin speed: %u\n", buf, minspeed);
+ sz += sprintf(buf, "%sNumber max speed: %u\n", buf, nummax);
+ sz += sprintf(buf, "%sNumber min speed: %u\n", buf, nummin);
+ return sz;
+ }
+ static ssize_t show_tempscale_data(struct cpufreq_policy *unused, char *buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+ sz=sprintf(buf, "PID\tFREQ\n");
+ sz+=sprintf(buf, "%s------\t------\n", buf);
+ if(numtasks > 10) {
+ i = numtasks-10;
+ }
+ else{
+ i=0;
+ }
+ for(; i < numtasks; i++){
+ sz+=sprintf(buf, "%s%u\t%u\n",buf, tasks[i].pid, tasks[i].speed);
+ }
+ return sz;
+ }
+ define_one_ro(tempscale_stats);
+ define_one_ro(tempscale_data);
+#endif
+define_one_rw(computebound_setspeed);
+
+static struct attribute * tempscale_attributes[] = {
+ &computebound_setspeed.attr,
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ &tempscale_stats.attr,
+ &tempscale_data.attr,
+ #endif
+ NULL
+};
+
+static struct attribute_group tempscale_attr_group = {
+ .attrs = tempscale_attributes,
+ .name = "tempscale",
+};
+
+
+static int cpufreq_governor_tempscale(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ compute_spd=policy->max;
+ cpupolicy = policy;
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ enabled=1;
+ sysfs_create_group(&policy->kobj, &tempscale_attr_group);
+ break;
+ case CPUFREQ_GOV_STOP:
+ enabled=0;
+ sysfs_remove_group(&policy->kobj, &tempscale_attr_group);
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+ break;
+ }
+ return 0;
+}
+
+static struct cpufreq_governor cpufreq_gov_tempscale = {
+ .name = "tempscale",
+ .governor = cpufreq_governor_tempscale,
+ .owner = THIS_MODULE,
+};
+
+
+static int __init cpufreq_gov_tempscale_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_tempscale);
+}
+
+
+static void __exit cpufreq_gov_tempscale_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_tempscale);
+}
+
+
+MODULE_AUTHOR("Mark Bidewell <mark.bidewell@alumni.clemson.edu");
+MODULE_DESCRIPTION("CPUfreq policy governor 'tempscale'");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_gov_tempscale_init);
+module_exit(cpufreq_gov_tempscale_exit);
diff -Nur linux-2.6.11.6/drivers/cpufreq/Kconfig linux-sysfs/drivers/cpufreq/Kconfig
--- linux-2.6.11.6/drivers/cpufreq/Kconfig 2005-03-25 22:28:36.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/Kconfig 2005-04-06 22:56:36.000000000 -0400
@@ -116,3 +116,14 @@
For details, take a look at linux/Documentation/cpu-freq.
If in doubt, say N.
+config CPU_FREQ_GOV_TEMPSCALE
+ bool "Temperature Aware Scheduling support"
+ depends on CPU_FREQ
+ help
+ Enables system calls which allow scaling performance between
+ Interactive and non-interactive processes.
+config CPU_FREQ_GOV_TEMPSCALE_DBG
+ bool "Statistics for temperature scaling"
+ depends on CPU_FREQ && CPU_FREQ_GOV_TEMPSCALE
+ help
+ Collect statistics on CPU temperature scaling
diff -Nur linux-2.6.11.6/drivers/cpufreq/Makefile linux-sysfs/drivers/cpufreq/Makefile
--- linux-2.6.11.6/drivers/cpufreq/Makefile 2005-03-25 22:28:39.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/Makefile 2005-04-06 22:56:36.000000000 -0400
@@ -8,7 +8,7 @@
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
-
+obj-$(CONFIG_CPU_FREQ_GOV_TEMPSCALE) += cpufreq_tempscale.o
# CPUfreq cross-arch helpers
obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
diff -Nur linux-2.6.11.6/include/linux/sched.h linux-sysfs/include/linux/sched.h
--- linux-2.6.11.6/include/linux/sched.h 2005-03-25 22:28:15.000000000 -0500
+++ linux-sysfs/include/linux/sched.h 2005-04-06 22:55:44.000000000 -0400
@@ -112,14 +112,12 @@
#define EXIT_DEAD 32
#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- set_mb((tsk)->state, (state_value))
+ do {(tsk)->prev_state=(tsk)->state; (tsk)->state = (state_value); } while (0)
#define __set_current_state(state_value) \
- do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
- set_mb(current->state, (state_value))
+ do {current->prev_state=current->state; current->state = (state_value); } while (0)
+
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -526,7 +524,7 @@
struct mempolicy;
struct task_struct {
- volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+ volatile long prev_state, state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
atomic_t usage;
unsigned long flags; /* per process flags, defined below */
@@ -1216,4 +1214,13 @@
#endif /* CONFIG_PM */
#endif /* __KERNEL__ */
+/*Change by Mark Bidewell*/
+inline static void set_task_state(struct task_struct *tsk, unsigned state_value){
+ (tsk)->prev_state=(tsk)->state; set_mb((tsk)->state, (state_value));
+}
+/*Change by Mark Bidewell*/
+inline static void set_current_state(unsigned state_value){
+ current->prev_state=current->state; set_mb(current->state, (state_value));
+}
+
#endif
diff -Nur linux-2.6.11.6/kernel/sched.c linux-sysfs/kernel/sched.c
--- linux-2.6.11.6/kernel/sched.c 2005-03-25 22:28:26.000000000 -0500
+++ linux-sysfs/kernel/sched.c 2005-04-06 22:55:33.000000000 -0400
@@ -2822,6 +2822,12 @@
prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
goto need_resched_nonpreemptible;
+ /*
+ * Put thermal stuff here
+ */
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE
+ scale_proc_freq(next, TASK_INTERACTIVE(next));
+ #endif
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
[-- Attachment #3: Type: text/plain, Size: 147 bytes --]
_______________________________________________
Cpufreq mailing list
Cpufreq@lists.linux.org.uk
http://lists.linux.org.uk/mailman/listinfo/cpufreq
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: Possible CPUFreq governor
2005-04-26 23:39 Possible CPUFreq governor Mark Bidewell
@ 2005-04-27 10:53 ` Bruno Ducrot
2005-04-27 11:02 ` Bruno Ducrot
` (2 more replies)
0 siblings, 3 replies; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 10:53 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
Hi,
On Tue, Apr 26, 2005 at 07:39:29PM -0400, Mark Bidewell wrote:
> I am attaching a Linux kernel patch which supplies a new CPUFreq governor
> for the CPUFreq interface. It allows a user or daemon to reduce the
> thermal output of a mobile processor by reducing the frequency at which
> non-interactive processes are run. It has been extensively tested on the
> 2.6.11.x kernels on a P4-M 3.2 GHz machine.
BTW how temperature is computed?
I dont see any difference (apart the fact that you are setting frequency
only for non-interactive processes) with the userspace governor. If
really you have to get this feature, I think you should consider to
modify the userspace governor.
Some comments:
+unsigned compute_spd=0;
+struct cpufreq_policy *cpupolicy;
+static char enabled=0;
+
I think cpupolicy should be declared static. Same for compute_spd.
...
+void scale_proc_freq(struct task_struct *t, int tsk_intr)
+{
+ if(!enabled) return;
^
a space is missing.
+ if(!cpupolicy) return;
^
ditto
also its likely you should write
if (!cpupolicy)
return;
(kernel coding style)
...
+#ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+
+ struct task_param tasks[TRACK_NUM];
+ unsigned int curtask=0;
Why those indentations?
+
+ void set_proc_stats(struct task_struct *t)
+ {
+ struct task_param *tsk = &tasks[curtask];
+ curtask = (curtask+1) % TRACK_NUM;
+ tsk->pid = t->pid;
+ tsk->speed = cpufreq_get(0);
+ }
+
+ static ssize_t show_tempscale_stats(struct cpufreq_policy *unused, char *buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
^^^
You have to use a ssize_t here.
...
diff -Nur linux-2.6.11.6/include/linux/sched.h linux-sysfs/include/linux/sched.h
--- linux-2.6.11.6/include/linux/sched.h 2005-03-25 22:28:15.000000000 -0500
+++ linux-sysfs/include/linux/sched.h 2005-04-06 22:55:44.000000000 -0400
@@ -112,14 +112,12 @@
#define EXIT_DEAD 32
#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- set_mb((tsk)->state, (state_value))
Look a little bit more the macro you just removed here.
Its expected to return a value, namely 'state_value'.
But you are redefining those on other file latter as:
+/*Change by Mark Bidewell*/ <-- Why this comment? Its useless.
+inline static void set_task_state(struct task_struct *tsk, unsigned state_value){
+ (tsk)->prev_state=(tsk)->state; set_mb((tsk)->state, (state_value));
+}
This is now an inlined function returning void.
+/*Change by Mark Bidewell*/
+inline static void set_current_state(unsigned state_value){
+ current->prev_state=current->state; set_mb(current->state, (state_value));
+}
Ditto
Cheers,
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: Possible CPUFreq governor
2005-04-27 10:53 ` Bruno Ducrot
@ 2005-04-27 11:02 ` Bruno Ducrot
2005-04-27 11:08 ` Ivor Hewitt
2005-04-27 12:25 ` Mark Bidewell
2 siblings, 0 replies; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 11:02 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 12:53:50PM +0200, Bruno Ducrot wrote:
> #define __set_task_state(tsk, state_value) \
> - do { (tsk)->state = (state_value); } while (0)
> -#define set_task_state(tsk, state_value) \
> - set_mb((tsk)->state, (state_value))
>
> Look a little bit more the macro you just removed here.
> Its expected to return a value, namely 'state_value'.
Oops, its me that need to read this more carrefully.. Sorry.
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: Possible CPUFreq governor
2005-04-27 10:53 ` Bruno Ducrot
2005-04-27 11:02 ` Bruno Ducrot
@ 2005-04-27 11:08 ` Ivor Hewitt
2005-04-27 12:30 ` Mark Bidewell
2005-04-27 12:25 ` Mark Bidewell
2 siblings, 1 reply; 21+ messages in thread
From: Ivor Hewitt @ 2005-04-27 11:08 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
> Hi,
>
> On Tue, Apr 26, 2005 at 07:39:29PM -0400, Mark Bidewell wrote:
>> I am attaching a Linux kernel patch which supplies a new CPUFreq governor
>> for the CPUFreq interface. It allows a user or daemon to reduce the
>> thermal output of a mobile processor by reducing the frequency at which
>> non-interactive processes are run. It has been extensively tested on the
>> 2.6.11.x kernels on a P4-M 3.2 GHz machine.
>
> BTW how temperature is computed?
>
I think the idea is to reduce cpuspeed, with the intention of having the side
effect of reducing temperature. Rather than to throttle based on temperature
(I may be wrong). I believe P4's will throttle themselves automatically when
they get too hot anyway.
Cheers,
Ivor.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 11:08 ` Ivor Hewitt
@ 2005-04-27 12:30 ` Mark Bidewell
2005-04-27 14:07 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 12:30 UTC (permalink / raw)
To: ivor; +Cc: cpufreq
You are correct, The primary differences are that
1) the clock modulation only cuts in after dangerous temperatures have
been detected. This code prevents those high temperatures.
2) This code uses the OS Scheduler to make targeted performance cuts on
certain applications. This reduces the performance impact of throttling
to non-interactive processes.
>I think the idea is to reduce cpuspeed, with the intention of having the side
>effect of reducing temperature. Rather than to throttle based on temperature
>(I may be wrong). I believe P4's will throttle themselves automatically when
>they get too hot anyway.
>
>Cheers,
>Ivor.
>
>
>
>
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 12:30 ` Mark Bidewell
@ 2005-04-27 14:07 ` Bruno Ducrot
2005-04-27 16:20 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 14:07 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 08:30:45AM -0400, Mark Bidewell wrote:
> You are correct, The primary differences are that
> 1) the clock modulation only cuts in after dangerous temperatures have
> been detected. This code prevents those high temperatures.
> 2) This code uses the OS Scheduler to make targeted performance cuts on
> certain applications. This reduces the performance impact of throttling
> to non-interactive processes.
I disagree. If processors are under an overheat situation, we should
not consider performance anymore.
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 14:07 ` Bruno Ducrot
@ 2005-04-27 16:20 ` Mark Bidewell
2005-04-27 17:38 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 16:20 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
Bruno Ducrot wrote:
>On Wed, Apr 27, 2005 at 08:30:45AM -0400, Mark Bidewell wrote:
>
>
>>You are correct, The primary differences are that
>>1) the clock modulation only cuts in after dangerous temperatures have
>>been detected. This code prevents those high temperatures.
>>2) This code uses the OS Scheduler to make targeted performance cuts on
>>certain applications. This reduces the performance impact of throttling
>>to non-interactive processes.
>>
>>
>
>I disagree. If processors are under an overheat situation, we should
>not consider performance anymore.
>
>
>
I am in agrement that performance doesn't matter in a CPU thermal
emergency. In fact no software throttling would be reliable enough (or
fast enough) to prevent problems consistently. I see this governor as a
preventative measure to reduce day-to-day thermal stress not as an an
emergency stopgap. The possible advantages of the governor I see are:
1) The higher the temperature at which a CPU runs the lower its life.
A common rule of thumb is that increasing operating temperature by 10C
cuts CPU life in half. CPU throttling does not cut in until an
emergency occurs (which can be often on a laptop). It would be useful
to reduce this stress
2) Quieter operation by reducing the amount of time fans have to run.
3) Reducing the effect of heat on other components of a system
(particularly the hard drive in a laptop) which are near the CPU.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 16:20 ` Mark Bidewell
@ 2005-04-27 17:38 ` Bruno Ducrot
2005-04-27 18:18 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 17:38 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 12:20:01PM -0400, Mark Bidewell wrote:
> Bruno Ducrot wrote:
>
> >On Wed, Apr 27, 2005 at 08:30:45AM -0400, Mark Bidewell wrote:
> >
> >
> >>You are correct, The primary differences are that
> >>1) the clock modulation only cuts in after dangerous temperatures have
> >>been detected. This code prevents those high temperatures.
> >>2) This code uses the OS Scheduler to make targeted performance cuts on
> >>certain applications. This reduces the performance impact of throttling
> >>to non-interactive processes.
> >>
> >>
> >
> >I disagree. If processors are under an overheat situation, we should
> >not consider performance anymore.
> >
> >
> >
> I am in agrement that performance doesn't matter in a CPU thermal
> emergency. In fact no software throttling would be reliable enough (or
> fast enough) to prevent problems consistently. I see this governor as a
> preventative measure to reduce day-to-day thermal stress not as an an
> emergency stopgap. The possible advantages of the governor I see are:
>
> 1) The higher the temperature at which a CPU runs the lower its life.
> A common rule of thumb is that increasing operating temperature by 10C
> cuts CPU life in half. CPU throttling does not cut in until an
> emergency occurs (which can be often on a laptop). It would be useful
> to reduce this stress
> 2) Quieter operation by reducing the amount of time fans have to run.
> 3) Reducing the effect of heat on other components of a system
> (particularly the hard drive in a laptop) which are near the CPU.
For point 1 I'm not expert enough on processors so I can't tell (though
I tend to trust you). Of course I agree with points 2 and 3, and anyway
we need a generic solution that must be independant of ACPI thermal
passive cooling.
I have to think a little bit more about your solution though. In
theory, the ondemand governor (or any other dynamic governors)
with some kind of control of ->max for the policy should be OK (in
kernel as under ACPI thermal subsystem or in user space by tweaking
the scaling_max_freq via a daemon).
But if we have to consider interactive/non-interactive process,
the problem would be that we'll update policy when max change too
often maybe? So its have to be done by the governor you submit?
I need to think at that and I have to make some tests.
Thanks,
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 17:38 ` Bruno Ducrot
@ 2005-04-27 18:18 ` Mark Bidewell
2005-04-27 19:04 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 18:18 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
It might help to explain how the governor is used. I have a user-mode
daemon which samples the ACPI temperature every 2 seconds and communicates
the speed at which non-interactive processes should be run via the sysfs
interface. The scheduler will then determine a processes interactivity
and change the CPU speed accordingly. In theory the processor could
change speeds every process switch.
This governor is indeed similar to the ondemand governor. The distinction
is that while the ondemand governor expands CPU performance as load
increases, tempscale must limit performance. In my opinion, the ultimate
solution would be a combination of the two governors. That is, a governor
which would react in the following way:
1) If the processor is idle, run at low speeds to conserve power (ondemand).
2) Increase performance with demand as long a temperature is low (ondemand)
3) If temperatures get to high, throttle compute-bound processes (tempscale)
I have done some limited experiments along this line using a user-mode
daemon to switch governors (as well as speeds) between tempscale and
ondemand as temperatures change.
Mark Bidewell
> For point 1 I'm not expert enough on processors so I can't tell (though
> I tend to trust you). Of course I agree with points 2 and 3, and anyway
> we need a generic solution that must be independant of ACPI thermal
> passive cooling.
>
> I have to think a little bit more about your solution though. In
> theory, the ondemand governor (or any other dynamic governors)
> with some kind of control of ->max for the policy should be OK (in
> kernel as under ACPI thermal subsystem or in user space by tweaking
> the scaling_max_freq via a daemon).
>
> But if we have to consider interactive/non-interactive process,
> the problem would be that we'll update policy when max change too
> often maybe? So its have to be done by the governor you submit?
> I need to think at that and I have to make some tests.
>
> Thanks,
>
> --
> Bruno Ducrot
>
> -- Which is worse: ignorance or apathy?
> -- Don't know. Don't care.
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 18:18 ` Mark Bidewell
@ 2005-04-27 19:04 ` Bruno Ducrot
2005-05-02 12:51 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 19:04 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 02:18:27PM -0400, Mark Bidewell wrote:
> It might help to explain how the governor is used. I have a user-mode
> daemon which samples the ACPI temperature every 2 seconds and communicates
> the speed at which non-interactive processes should be run via the sysfs
> interface. The scheduler will then determine a processes interactivity
> and change the CPU speed accordingly. In theory the processor could
> change speeds every process switch.
That why I have to do tests in case there is a lot of those process
switch but when modifiying max frequency instead.
> This governor is indeed similar to the ondemand governor. The distinction
> is that while the ondemand governor expands CPU performance as load
> increases, tempscale must limit performance. In my opinion, the ultimate
> solution would be a combination of the two governors. That is, a governor
> which would react in the following way:
>
> 1) If the processor is idle, run at low speeds to conserve power (ondemand).
> 2) Increase performance with demand as long a temperature is low (ondemand)
> 3) If temperatures get to high, throttle compute-bound processes (tempscale)
>
> I have done some limited experiments along this line using a user-mode
> daemon to switch governors (as well as speeds) between tempscale and
> ondemand as temperatures change.
That's why I suggest to touch max frequency, not the actual frequency.
This must be done outside a governor, so that you can have both actually.
I'll send you a patch after writing it for testing purpose so that you will
see what I have in mind.
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 19:04 ` Bruno Ducrot
@ 2005-05-02 12:51 ` Mark Bidewell
0 siblings, 0 replies; 21+ messages in thread
From: Mark Bidewell @ 2005-05-02 12:51 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
I have been examining the structure and design of the ondemand govenor
further. Would it be accurate to characterize your concern as basically
that under heavy process load, the p-state switching latency becomes
longer than the timeslices and thus could dominate the CPU? Or is there
an issue with processor damage?
Bruno Ducrot wrote:
>On Wed, Apr 27, 2005 at 02:18:27PM -0400, Mark Bidewell wrote:
>
>
>>It might help to explain how the governor is used. I have a user-mode
>>daemon which samples the ACPI temperature every 2 seconds and communicates
>>the speed at which non-interactive processes should be run via the sysfs
>>interface. The scheduler will then determine a processes interactivity
>>and change the CPU speed accordingly. In theory the processor could
>>change speeds every process switch.
>>
>>
>
>That why I have to do tests in case there is a lot of those process
>switch but when modifiying max frequency instead.
>
>
>
>>This governor is indeed similar to the ondemand governor. The distinction
>>is that while the ondemand governor expands CPU performance as load
>>increases, tempscale must limit performance. In my opinion, the ultimate
>>solution would be a combination of the two governors. That is, a governor
>>which would react in the following way:
>>
>>1) If the processor is idle, run at low speeds to conserve power (ondemand).
>>2) Increase performance with demand as long a temperature is low (ondemand)
>>3) If temperatures get to high, throttle compute-bound processes (tempscale)
>>
>>I have done some limited experiments along this line using a user-mode
>>daemon to switch governors (as well as speeds) between tempscale and
>>ondemand as temperatures change.
>>
>>
>
>That's why I suggest to touch max frequency, not the actual frequency.
>
>This must be done outside a governor, so that you can have both actually.
>I'll send you a patch after writing it for testing purpose so that you will
>see what I have in mind.
>
>
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 10:53 ` Bruno Ducrot
2005-04-27 11:02 ` Bruno Ducrot
2005-04-27 11:08 ` Ivor Hewitt
@ 2005-04-27 12:25 ` Mark Bidewell
2005-04-27 13:54 ` Bruno Ducrot
2 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 12:25 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
Thank you very much for your imput. I will make those adjustments in
the next patch. The temperature was obtained by reading
/proc/acpi/thermal_zone/THM/temperature for a usermode daemon.
The primary problem I see with integrating this code as part of
userspace is the fact that userspace can be a module. This code
includes the addition of a function call in the schedule() function:
*diff -Nur linux-2.6.11.6/kernel/sched.c linux-sysfs/kernel/sched.c
--- linux-2.6.11.6/kernel/sched.c 2005-03-25 22:28:26.000000000 -0500
+++ linux-sysfs/kernel/sched.c 2005-04-06 22:55:33.000000000 -0400
@@ -2822,6 +2822,12 @@
prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
goto need_resched_nonpreemptible;
+ /*
+ * Put thermal stuff here
+ */
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE
+ scale_proc_freq(next, TASK_INTERACTIVE(next));
+ #endif
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
*
If the tempscale code were pulled out of the kernel (via rmmod), I think
that would invalidate the call. A second problem would be that the call
would not be valid prior to module loading. I have not been able to
come up with any workarounds for this. Are you aware of any?
Bruno Ducrot wrote:
>Hi,
>
>On Tue, Apr 26, 2005 at 07:39:29PM -0400, Mark Bidewell wrote:
>
>
>>I am attaching a Linux kernel patch which supplies a new CPUFreq governor
>>for the CPUFreq interface. It allows a user or daemon to reduce the
>>thermal output of a mobile processor by reducing the frequency at which
>>non-interactive processes are run. It has been extensively tested on the
>>2.6.11.x kernels on a P4-M 3.2 GHz machine.
>>
>>
>
>BTW how temperature is computed?
>
>I dont see any difference (apart the fact that you are setting frequency
>only for non-interactive processes) with the userspace governor. If
>really you have to get this feature, I think you should consider to
>modify the userspace governor.
>
>
>
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 12:25 ` Mark Bidewell
@ 2005-04-27 13:54 ` Bruno Ducrot
2005-04-27 16:10 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 13:54 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 08:25:12AM -0400, Mark Bidewell wrote:
> Thank you very much for your imput. I will make those adjustments in
> the next patch. The temperature was obtained by reading
> /proc/acpi/thermal_zone/THM/temperature for a usermode daemon.
>
You mean you cant use ACPI thermal stuff in order to get it right?
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 13:54 ` Bruno Ducrot
@ 2005-04-27 16:10 ` Mark Bidewell
2005-04-27 17:03 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 16:10 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
Bruno Ducrot wrote:
>On Wed, Apr 27, 2005 at 08:25:12AM -0400, Mark Bidewell wrote:
>
>
>>Thank you very much for your imput. I will make those adjustments in
>>the next patch. The temperature was obtained by reading
>>/proc/acpi/thermal_zone/THM/temperature for a usermode daemon.
>>
>>
>>
>
>You mean you cant use ACPI thermal stuff in order to get it right?
>
>
>
If I understand your question right, I have looked into this. The functions which extract the raw temperature data use ACPI control structures which appear to be created automagically as part of the file open process. I have not been able to determine were they are created and so have not been able to call them directly (I am no expert on this by any means so I could be totally off base here). This would leave opening the /proc file inside the kernel as the only other option and I am not sure that making those kind of calls from kernel mode would be a good idea.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 16:10 ` Mark Bidewell
@ 2005-04-27 17:03 ` Bruno Ducrot
2005-04-27 17:19 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 17:03 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 12:10:37PM -0400, Mark Bidewell wrote:
> Bruno Ducrot wrote:
>
> >On Wed, Apr 27, 2005 at 08:25:12AM -0400, Mark Bidewell wrote:
> >
> >
> >>Thank you very much for your imput. I will make those adjustments in
> >>the next patch. The temperature was obtained by reading
> >>/proc/acpi/thermal_zone/THM/temperature for a usermode daemon.
> >>
> >>
> >>
> >
> >You mean you cant use ACPI thermal stuff in order to get it right?
> >
> >
> >
>
> If I understand your question right, I have looked into this. The
> functions which extract the raw temperature data use ACPI control
> structures which appear to be created automagically as part of the file
> open process. I have not been able to determine were they are created and
> so have not been able to call them directly (I am no expert on this by any
> means so I could be totally off base here). This would leave opening the
> /proc file inside the kernel as the only other option and I am not sure
> that making those kind of calls from kernel mode would be a good idea.
ACPI define two modes for cooling a processor. By default, active
mode will be used. You have to enable passive mode via
/proc/acpi/thermal/*/cooling_mode and you may have to define a polling
frequency via /proc/acpi/thermal/*/polling_frequency. If also the
cpufreq infrastructure is working, the ACPI thermal subsystem will
change automagically
/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
based upon the temperature of the processor attached to that thermal
zone.
Actually this may not work on certain situation.
This was just a single question.
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 17:03 ` Bruno Ducrot
@ 2005-04-27 17:19 ` Mark Bidewell
2005-04-27 17:45 ` Bruno Ducrot
0 siblings, 1 reply; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 17:19 UTC (permalink / raw)
To: Bruno Ducrot; +Cc: cpufreq
I was not aware of this. However, when I cat
/proc/acpi/thermal_zone/THM/cooling_mode I get:
<setting not supported>
cooling mode: critical
Does the Processor P-States driver need to be installed?
One distinction between my code and the ACPI code would appear to be that
tempscale only throttles non-interactive processes as opposed to a global
cutback.
> ACPI define two modes for cooling a processor. By default, active
> mode will be used. You have to enable passive mode via
> /proc/acpi/thermal/*/cooling_mode and you may have to define a polling
> frequency via /proc/acpi/thermal/*/polling_frequency. If also the
> cpufreq infrastructure is working, the ACPI thermal subsystem will
> change automagically
> /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
> based upon the temperature of the processor attached to that thermal
> zone.
>
> Actually this may not work on certain situation.
> This was just a single question.
>
> --
> Bruno Ducrot
>
> -- Which is worse: ignorance or apathy?
> -- Don't know. Don't care.
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-04-27 17:19 ` Mark Bidewell
@ 2005-04-27 17:45 ` Bruno Ducrot
0 siblings, 0 replies; 21+ messages in thread
From: Bruno Ducrot @ 2005-04-27 17:45 UTC (permalink / raw)
To: Mark Bidewell; +Cc: cpufreq
On Wed, Apr 27, 2005 at 01:19:44PM -0400, Mark Bidewell wrote:
> I was not aware of this. However, when I cat
> /proc/acpi/thermal_zone/THM/cooling_mode I get:
>
> <setting not supported>
> cooling mode: critical
>
> Does the Processor P-States driver need to be installed?
No. It should work with cpufreq and then acpi_throttle. The processor
P-state driver is just another cpufreq driver and is not required.
The bios writer do not defined enough objects under the
thermal zone (in the DSDT), and then ACPI report that
'passive cooling' is not available :(
> One distinction between my code and the ACPI code would appear to be that
> tempscale only throttles non-interactive processes as opposed to a global
> cutback.
That what I begin to understand.
--
Bruno Ducrot
-- Which is worse: ignorance or apathy?
-- Don't know. Don't care.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Possible CPUFreq governor
@ 2005-04-27 0:12 Mark Bidewell
0 siblings, 0 replies; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 0:12 UTC (permalink / raw)
To: cpufreq
[-- Attachment #1: Type: text/plain, Size: 351 bytes --]
I am attaching a Linux kernel patch which supplies a new CPUFreq governor
for the CPUFreq interface. It allows a user or daemon to reduce the
thermal output of a mobile processor by reducing the frequency at which
non-interactive processes are run. It has been extensively tested on the
2.6.11.x kernels on a P4-M 3.2 GHz machine.
Mark Bidewell
[-- Attachment #2: tempscale-2.6.11-sysfs.patch --]
[-- Type: application/octet-stream, Size: 8945 bytes --]
diff -Nur linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c
--- linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c 1969-12-31 19:00:00.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c 2005-04-06 22:56:36.000000000 -0400
@@ -0,0 +1,199 @@
+/*
+ * linux/drivers/cpufreq/cpufreq_tempscale.c
+ *
+ * Copyright (C) 2005 Mark Bidewell <mark.bidewell@alumni.clemson.edu
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+
+#define define_one_ro(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name,store_##_name)
+
+#define TRACK_NUM 5000
+
+void set_proc_stats(struct task_struct *t);
+
+struct task_param{
+ unsigned int pid;
+ unsigned int speed;
+};
+
+unsigned compute_spd=0;
+struct cpufreq_policy *cpupolicy;
+static char enabled=0;
+
+
+
+void scale_proc_freq(struct task_struct *t, int tsk_intr)
+{
+ if(!enabled) return;
+ if(!cpupolicy) return;
+ if(t->prev_state & TASK_UNINTERRUPTIBLE){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ return;
+ }
+ if(!tsk_intr){
+ if(compute_spd && cpupolicy->cur != compute_spd){
+ __cpufreq_driver_target(cpupolicy, compute_spd, CPUFREQ_RELATION_H);
+ compute_spd = cpupolicy->cur;
+ }
+ }
+ else {
+ if(cpupolicy->cur <= compute_spd){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ }
+ }
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ set_proc_stats(t);
+ #endif
+ return;
+}
+
+/*
+ * SysFS tuning knobs and information
+ */
+
+static ssize_t show_computebound_setspeed(struct cpufreq_policy *unused, char *buf)
+{
+ return sprintf (buf, "%u\n", compute_spd);
+}
+static ssize_t store_computebound_setspeed(struct cpufreq_policy *unused, const char *buf, size_t count)
+{
+ sscanf(buf, "%u", &compute_spd);
+ return count;
+}
+
+#ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+
+ struct task_param tasks[TRACK_NUM];
+ unsigned int curtask=0;
+
+ void set_proc_stats(struct task_struct *t)
+ {
+ struct task_param *tsk = &tasks[curtask];
+ curtask = (curtask+1) % TRACK_NUM;
+ tsk->pid = t->pid;
+ tsk->speed = cpufreq_get(0);
+ }
+
+ static ssize_t show_tempscale_stats(struct cpufreq_policy *unused, char *buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+
+ for(i=0; i < numtasks; i++){
+ if(tasks[i].speed >= maxspeed){
+ maxspeed = tasks[i].speed;
+ nummax++;
+ }
+ if(tasks[i].speed <= minspeed){
+ minspeed = tasks[i].speed;
+ nummin++;
+ }
+ }
+ curtask=0;
+ sz = sprintf(buf, "Number of tasks: %u\n", numtasks);
+ sz += sprintf(buf, "%sMax speed: %u\n", buf, maxspeed);
+ sz += sprintf(buf, "%sMin speed: %u\n", buf, minspeed);
+ sz += sprintf(buf, "%sNumber max speed: %u\n", buf, nummax);
+ sz += sprintf(buf, "%sNumber min speed: %u\n", buf, nummin);
+ return sz;
+ }
+ static ssize_t show_tempscale_data(struct cpufreq_policy *unused, char *buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+ sz=sprintf(buf, "PID\tFREQ\n");
+ sz+=sprintf(buf, "%s------\t------\n", buf);
+ if(numtasks > 10) {
+ i = numtasks-10;
+ }
+ else{
+ i=0;
+ }
+ for(; i < numtasks; i++){
+ sz+=sprintf(buf, "%s%u\t%u\n",buf, tasks[i].pid, tasks[i].speed);
+ }
+ return sz;
+ }
+ define_one_ro(tempscale_stats);
+ define_one_ro(tempscale_data);
+#endif
+define_one_rw(computebound_setspeed);
+
+static struct attribute * tempscale_attributes[] = {
+ &computebound_setspeed.attr,
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ &tempscale_stats.attr,
+ &tempscale_data.attr,
+ #endif
+ NULL
+};
+
+static struct attribute_group tempscale_attr_group = {
+ .attrs = tempscale_attributes,
+ .name = "tempscale",
+};
+
+
+static int cpufreq_governor_tempscale(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ compute_spd=policy->max;
+ cpupolicy = policy;
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ enabled=1;
+ sysfs_create_group(&policy->kobj, &tempscale_attr_group);
+ break;
+ case CPUFREQ_GOV_STOP:
+ enabled=0;
+ sysfs_remove_group(&policy->kobj, &tempscale_attr_group);
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+ break;
+ }
+ return 0;
+}
+
+static struct cpufreq_governor cpufreq_gov_tempscale = {
+ .name = "tempscale",
+ .governor = cpufreq_governor_tempscale,
+ .owner = THIS_MODULE,
+};
+
+
+static int __init cpufreq_gov_tempscale_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_tempscale);
+}
+
+
+static void __exit cpufreq_gov_tempscale_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_tempscale);
+}
+
+
+MODULE_AUTHOR("Mark Bidewell <mark.bidewell@alumni.clemson.edu");
+MODULE_DESCRIPTION("CPUfreq policy governor 'tempscale'");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_gov_tempscale_init);
+module_exit(cpufreq_gov_tempscale_exit);
diff -Nur linux-2.6.11.6/drivers/cpufreq/Kconfig linux-sysfs/drivers/cpufreq/Kconfig
--- linux-2.6.11.6/drivers/cpufreq/Kconfig 2005-03-25 22:28:36.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/Kconfig 2005-04-06 22:56:36.000000000 -0400
@@ -116,3 +116,14 @@
For details, take a look at linux/Documentation/cpu-freq.
If in doubt, say N.
+config CPU_FREQ_GOV_TEMPSCALE
+ bool "Temperature Aware Scheduling support"
+ depends on CPU_FREQ
+ help
+ Enables system calls which allow scaling performance between
+ Interactive and non-interactive processes.
+config CPU_FREQ_GOV_TEMPSCALE_DBG
+ bool "Statistics for temperature scaling"
+ depends on CPU_FREQ && CPU_FREQ_GOV_TEMPSCALE
+ help
+ Collect statistics on CPU temperature scaling
diff -Nur linux-2.6.11.6/drivers/cpufreq/Makefile linux-sysfs/drivers/cpufreq/Makefile
--- linux-2.6.11.6/drivers/cpufreq/Makefile 2005-03-25 22:28:39.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/Makefile 2005-04-06 22:56:36.000000000 -0400
@@ -8,7 +8,7 @@
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
-
+obj-$(CONFIG_CPU_FREQ_GOV_TEMPSCALE) += cpufreq_tempscale.o
# CPUfreq cross-arch helpers
obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
diff -Nur linux-2.6.11.6/include/linux/sched.h linux-sysfs/include/linux/sched.h
--- linux-2.6.11.6/include/linux/sched.h 2005-03-25 22:28:15.000000000 -0500
+++ linux-sysfs/include/linux/sched.h 2005-04-06 22:55:44.000000000 -0400
@@ -112,14 +112,12 @@
#define EXIT_DEAD 32
#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- set_mb((tsk)->state, (state_value))
+ do {(tsk)->prev_state=(tsk)->state; (tsk)->state = (state_value); } while (0)
#define __set_current_state(state_value) \
- do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
- set_mb(current->state, (state_value))
+ do {current->prev_state=current->state; current->state = (state_value); } while (0)
+
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -526,7 +524,7 @@
struct mempolicy;
struct task_struct {
- volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+ volatile long prev_state, state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
atomic_t usage;
unsigned long flags; /* per process flags, defined below */
@@ -1216,4 +1214,13 @@
#endif /* CONFIG_PM */
#endif /* __KERNEL__ */
+/*Change by Mark Bidewell*/
+inline static void set_task_state(struct task_struct *tsk, unsigned state_value){
+ (tsk)->prev_state=(tsk)->state; set_mb((tsk)->state, (state_value));
+}
+/*Change by Mark Bidewell*/
+inline static void set_current_state(unsigned state_value){
+ current->prev_state=current->state; set_mb(current->state, (state_value));
+}
+
#endif
diff -Nur linux-2.6.11.6/kernel/sched.c linux-sysfs/kernel/sched.c
--- linux-2.6.11.6/kernel/sched.c 2005-03-25 22:28:26.000000000 -0500
+++ linux-sysfs/kernel/sched.c 2005-04-06 22:55:33.000000000 -0400
@@ -2822,6 +2822,12 @@
prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
goto need_resched_nonpreemptible;
+ /*
+ * Put thermal stuff here
+ */
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE
+ scale_proc_freq(next, TASK_INTERACTIVE(next));
+ #endif
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
[-- Attachment #3: Type: text/plain, Size: 147 bytes --]
_______________________________________________
Cpufreq mailing list
Cpufreq@lists.linux.org.uk
http://lists.linux.org.uk/mailman/listinfo/cpufreq
^ permalink raw reply [flat|nested] 21+ messages in thread* Possible CPUFreq governor
@ 2005-04-27 0:13 Mark Bidewell
0 siblings, 0 replies; 21+ messages in thread
From: Mark Bidewell @ 2005-04-27 0:13 UTC (permalink / raw)
To: cpufreq
I am attaching a Linux kernel patch which supplies a new CPUFreq governor
for the CPUFreq interface. It allows a user or daemon to reduce the
thermal output of a mobile processor by reducing the frequency at which
non-interactive processes are run. It has been extensively tested on the
2.6.11.x kernels on a P4-M 3.2 GHz machine.
Mark Bidewell
diff -Nur linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c
linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c
--- linux-2.6.11.6/drivers/cpufreq/cpufreq_tempscale.c 1969-12-31
19:00:00.000000000 -0500
+++ linux-sysfs/drivers/cpufreq/cpufreq_tempscale.c 2005-04-06
22:56:36.000000000 -0400
@@ -0,0 +1,199 @@
+/*
+ * linux/drivers/cpufreq/cpufreq_tempscale.c
+ *
+ * Copyright (C) 2005 Mark Bidewell <mark.bidewell@alumni.clemson.edu
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+
+#define define_one_ro(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name,store_##_name)
+
+#define TRACK_NUM 5000
+
+void set_proc_stats(struct task_struct *t);
+
+struct task_param{
+ unsigned int pid;
+ unsigned int speed;
+};
+
+unsigned compute_spd=0;
+struct cpufreq_policy *cpupolicy;
+static char enabled=0;
+
+
+
+void scale_proc_freq(struct task_struct *t, int tsk_intr)
+{
+ if(!enabled) return;
+ if(!cpupolicy) return;
+ if(t->prev_state & TASK_UNINTERRUPTIBLE){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ return;
+ }
+ if(!tsk_intr){
+ if(compute_spd && cpupolicy->cur != compute_spd){
+ __cpufreq_driver_target(cpupolicy, compute_spd, CPUFREQ_RELATION_H);
+ compute_spd = cpupolicy->cur;
+ }
+ }
+ else {
+ if(cpupolicy->cur <= compute_spd){
+ __cpufreq_driver_target(cpupolicy, cpupolicy->max, CPUFREQ_RELATION_H);
+ }
+ }
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ set_proc_stats(t);
+ #endif
+ return;
+}
+
+/*
+ * SysFS tuning knobs and information
+ */
+
+static ssize_t show_computebound_setspeed(struct cpufreq_policy *unused,
char *buf)
+{
+ return sprintf (buf, "%u\n", compute_spd);
+}
+static ssize_t store_computebound_setspeed(struct cpufreq_policy *unused,
const char *buf, size_t count)
+{
+ sscanf(buf, "%u", &compute_spd);
+ return count;
+}
+
+#ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+
+ struct task_param tasks[TRACK_NUM];
+ unsigned int curtask=0;
+
+ void set_proc_stats(struct task_struct *t)
+ {
+ struct task_param *tsk = &tasks[curtask];
+ curtask = (curtask+1) % TRACK_NUM;
+ tsk->pid = t->pid;
+ tsk->speed = cpufreq_get(0);
+ }
+
+ static ssize_t show_tempscale_stats(struct cpufreq_policy *unused, char
*buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+
+ for(i=0; i < numtasks; i++){
+ if(tasks[i].speed >= maxspeed){
+ maxspeed = tasks[i].speed;
+ nummax++;
+ }
+ if(tasks[i].speed <= minspeed){
+ minspeed = tasks[i].speed;
+ nummin++;
+ }
+ }
+ curtask=0;
+ sz = sprintf(buf, "Number of tasks: %u\n", numtasks);
+ sz += sprintf(buf, "%sMax speed: %u\n", buf, maxspeed);
+ sz += sprintf(buf, "%sMin speed: %u\n", buf, minspeed);
+ sz += sprintf(buf, "%sNumber max speed: %u\n", buf, nummax);
+ sz += sprintf(buf, "%sNumber min speed: %u\n", buf, nummin);
+ return sz;
+ }
+ static ssize_t show_tempscale_data(struct cpufreq_policy *unused, char
*buf){
+ int i;
+ int numtasks=curtask;
+ int sz=0;
+ unsigned maxspeed=0, minspeed=0xFFFFFFFF;
+ unsigned nummax=0, nummin=0;
+ sz=sprintf(buf, "PID\tFREQ\n");
+ sz+=sprintf(buf, "%s------\t------\n", buf);
+ if(numtasks > 10) {
+ i = numtasks-10;
+ }
+ else{
+ i=0;
+ }
+ for(; i < numtasks; i++){
+ sz+=sprintf(buf, "%s%u\t%u\n",buf, tasks[i].pid, tasks[i].speed);
+ }
+ return sz;
+ }
+ define_one_ro(tempscale_stats);
+ define_one_ro(tempscale_data);
+#endif
+define_one_rw(computebound_setspeed);
+
+static struct attribute * tempscale_attributes[] = {
+ &computebound_setspeed.attr,
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE_DBG
+ &tempscale_stats.attr,
+ &tempscale_data.attr,
+ #endif
+ NULL
+};
+
+static struct attribute_group tempscale_attr_group = {
+ .attrs = tempscale_attributes,
+ .name = "tempscale",
+};
+
+
+static int cpufreq_governor_tempscale(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ compute_spd=policy->max;
+ cpupolicy = policy;
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ enabled=1;
+ sysfs_create_group(&policy->kobj, &tempscale_attr_group);
+ break;
+ case CPUFREQ_GOV_STOP:
+ enabled=0;
+ sysfs_remove_group(&policy->kobj, &tempscale_attr_group);
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+ break;
+ }
+ return 0;
+}
+
+static struct cpufreq_governor cpufreq_gov_tempscale = {
+ .name = "tempscale",
+ .governor = cpufreq_governor_tempscale,
+ .owner = THIS_MODULE,
+};
+
+
+static int __init cpufreq_gov_tempscale_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_tempscale);
+}
+
+
+static void __exit cpufreq_gov_tempscale_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_tempscale);
+}
+
+
+MODULE_AUTHOR("Mark Bidewell <mark.bidewell@alumni.clemson.edu");
+MODULE_DESCRIPTION("CPUfreq policy governor 'tempscale'");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_gov_tempscale_init);
+module_exit(cpufreq_gov_tempscale_exit);
diff -Nur linux-2.6.11.6/drivers/cpufreq/Kconfig
linux-sysfs/drivers/cpufreq/Kconfig
--- linux-2.6.11.6/drivers/cpufreq/Kconfig 2005-03-25 22:28:36.000000000
-0500
+++ linux-sysfs/drivers/cpufreq/Kconfig 2005-04-06 22:56:36.000000000 -0400
@@ -116,3 +116,14 @@
For details, take a look at linux/Documentation/cpu-freq.
If in doubt, say N.
+config CPU_FREQ_GOV_TEMPSCALE
+ bool "Temperature Aware Scheduling support"
+ depends on CPU_FREQ
+ help
+ Enables system calls which allow scaling performance between
+ Interactive and non-interactive processes.
+config CPU_FREQ_GOV_TEMPSCALE_DBG
+ bool "Statistics for temperature scaling"
+ depends on CPU_FREQ && CPU_FREQ_GOV_TEMPSCALE
+ help
+ Collect statistics on CPU temperature scaling
diff -Nur linux-2.6.11.6/drivers/cpufreq/Makefile
linux-sysfs/drivers/cpufreq/Makefile
--- linux-2.6.11.6/drivers/cpufreq/Makefile 2005-03-25 22:28:39.000000000
-0500
+++ linux-sysfs/drivers/cpufreq/Makefile 2005-04-06 22:56:36.000000000 -0400
@@ -8,7 +8,7 @@
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
-
+obj-$(CONFIG_CPU_FREQ_GOV_TEMPSCALE) += cpufreq_tempscale.o
# CPUfreq cross-arch helpers
obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
diff -Nur linux-2.6.11.6/include/linux/sched.h
linux-sysfs/include/linux/sched.h
--- linux-2.6.11.6/include/linux/sched.h 2005-03-25 22:28:15.000000000 -0500
+++ linux-sysfs/include/linux/sched.h 2005-04-06 22:55:44.000000000 -0400
@@ -112,14 +112,12 @@
#define EXIT_DEAD 32
#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- set_mb((tsk)->state, (state_value))
+ do {(tsk)->prev_state=(tsk)->state; (tsk)->state = (state_value); }
while (0)
#define __set_current_state(state_value) \
- do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
- set_mb(current->state, (state_value))
+ do {current->prev_state=current->state; current->state = (state_value);
} while (0)
+
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -526,7 +524,7 @@
struct mempolicy;
struct task_struct {
- volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+ volatile long prev_state, state; /* -1 unrunnable, 0 runnable, >0
stopped */
struct thread_info *thread_info;
atomic_t usage;
unsigned long flags; /* per process flags, defined below */
@@ -1216,4 +1214,13 @@
#endif /* CONFIG_PM */
#endif /* __KERNEL__ */
+/*Change by Mark Bidewell*/
+inline static void set_task_state(struct task_struct *tsk, unsigned
state_value){
+ (tsk)->prev_state=(tsk)->state; set_mb((tsk)->state, (state_value));
+}
+/*Change by Mark Bidewell*/
+inline static void set_current_state(unsigned state_value){
+ current->prev_state=current->state; set_mb(current->state, (state_value));
+}
+
#endif
diff -Nur linux-2.6.11.6/kernel/sched.c linux-sysfs/kernel/sched.c
--- linux-2.6.11.6/kernel/sched.c 2005-03-25 22:28:26.000000000 -0500
+++ linux-sysfs/kernel/sched.c 2005-04-06 22:55:33.000000000 -0400
@@ -2822,6 +2822,12 @@
prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
goto need_resched_nonpreemptible;
+ /*
+ * Put thermal stuff here
+ */
+ #ifdef CONFIG_CPU_FREQ_GOV_TEMPSCALE
+ scale_proc_freq(next, TASK_INTERACTIVE(next));
+ #endif
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
^ permalink raw reply [flat|nested] 21+ messages in thread* RE: Possible CPUFreq governor
@ 2005-05-02 14:02 Pallipadi, Venkatesh
2005-05-02 14:30 ` Mark Bidewell
0 siblings, 1 reply; 21+ messages in thread
From: Pallipadi, Venkatesh @ 2005-05-02 14:02 UTC (permalink / raw)
To: Mark Bidewell, Bruno Ducrot; +Cc: cpufreq
>-----Original Message-----
>From: cpufreq-bounces@lists.linux.org.uk
>[mailto:cpufreq-bounces@lists.linux.org.uk] On Behalf Of Mark Bidewell
>Sent: Monday, May 02, 2005 5:51 AM
>To: Bruno Ducrot
>Cc: cpufreq@lists.linux.org.uk
>Subject: Re: Possible CPUFreq governor
>
>I have been examining the structure and design of the ondemand govenor
>further. Would it be accurate to characterize your concern as
>basically
>that under heavy process load, the p-state switching latency becomes
>longer than the timeslices and thus could dominate the CPU?
>Or is there
>an issue with processor damage?
>
P-state switching latency will be same at all times, irrespective
of processor load. IIUC, the issue here is adding the knowledge of CPU
temperature to the CPU frequency governor.
For that, As Bruno potins out, one can have a shell script/program on
top of ondemand governor, that monitors the temperature in /proc/...
And changes ondemand governor's /sys/..../cpufreq/scaling_max_freq
Thanks,
Venki
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: Possible CPUFreq governor
2005-05-02 14:02 Pallipadi, Venkatesh
@ 2005-05-02 14:30 ` Mark Bidewell
0 siblings, 0 replies; 21+ messages in thread
From: Mark Bidewell @ 2005-05-02 14:30 UTC (permalink / raw)
To: Pallipadi, Venkatesh; +Cc: cpufreq
Thank you very much for your reply. Barring a thermal emergency,
wouldn't CPU demand automatically be high if temperature was also high?
The basic intent of my code was to specifically target prcesses which
were non-interactive for performance degradation rather that the whole
system. The intent being to improve interactive performace while
reducing temperature. I'm not clear on how scripting ondemand achieves
this goal?
Mark Bidewell
Pallipadi, Venkatesh wrote:
>
>
>
>
>>-----Original Message-----
>>From: cpufreq-bounces@lists.linux.org.uk
>>[mailto:cpufreq-bounces@lists.linux.org.uk] On Behalf Of Mark Bidewell
>>Sent: Monday, May 02, 2005 5:51 AM
>>To: Bruno Ducrot
>>Cc: cpufreq@lists.linux.org.uk
>>Subject: Re: Possible CPUFreq governor
>>
>>I have been examining the structure and design of the ondemand govenor
>>further. Would it be accurate to characterize your concern as
>>basically
>>that under heavy process load, the p-state switching latency becomes
>>longer than the timeslices and thus could dominate the CPU?
>>Or is there
>>an issue with processor damage?
>>
>>
>>
>
>P-state switching latency will be same at all times, irrespective
>of processor load. IIUC, the issue here is adding the knowledge of CPU
>temperature to the CPU frequency governor.
>
>For that, As Bruno potins out, one can have a shell script/program on
>top of ondemand governor, that monitors the temperature in /proc/...
>And changes ondemand governor's /sys/..../cpufreq/scaling_max_freq
>
>Thanks,
>Venki
>
>
>
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2005-05-02 14:30 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-26 23:39 Possible CPUFreq governor Mark Bidewell
2005-04-27 10:53 ` Bruno Ducrot
2005-04-27 11:02 ` Bruno Ducrot
2005-04-27 11:08 ` Ivor Hewitt
2005-04-27 12:30 ` Mark Bidewell
2005-04-27 14:07 ` Bruno Ducrot
2005-04-27 16:20 ` Mark Bidewell
2005-04-27 17:38 ` Bruno Ducrot
2005-04-27 18:18 ` Mark Bidewell
2005-04-27 19:04 ` Bruno Ducrot
2005-05-02 12:51 ` Mark Bidewell
2005-04-27 12:25 ` Mark Bidewell
2005-04-27 13:54 ` Bruno Ducrot
2005-04-27 16:10 ` Mark Bidewell
2005-04-27 17:03 ` Bruno Ducrot
2005-04-27 17:19 ` Mark Bidewell
2005-04-27 17:45 ` Bruno Ducrot
-- strict thread matches above, loose matches on Subject: below --
2005-04-27 0:12 Mark Bidewell
2005-04-27 0:13 Mark Bidewell
2005-05-02 14:02 Pallipadi, Venkatesh
2005-05-02 14:30 ` Mark Bidewell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox