From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Ingo Molnar <mingo@elte.hu>,
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
Dipankar Sarma <dipankar@in.ibm.com>,
Balbir Singh <balbir@in.ibm.com>,
Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: linux-kernel@vger.kernel.org, Arun Bharadwaj <arun@linux.vnet.ibm.com>
Subject: [v11 PATCH 4/9]: x86: refactor x86 idle power management code, remove all instances of pm_idle
Date: Tue, 9 Feb 2010 14:02:36 +0530 [thread overview]
Message-ID: <20100209083236.GE29988@linux.vnet.ibm.com> (raw)
In-Reply-To: <20100209082815.GA29988@linux.vnet.ibm.com>
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2010-02-09 13:58:16]:
This patch cleans up x86 of all instances of pm_idle.
pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.
x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.
This is replicated for apm module and for xen, which also used pm_idle.
Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
arch/x86/kernel/apm_32.c | 45 ++++++++++++++++++++++--
arch/x86/kernel/process.c | 78 +++++++++++++++++++++++++++++++-----------
arch/x86/kernel/process_32.c | 3 +
arch/x86/kernel/process_64.c | 3 +
arch/x86/xen/setup.c | 29 +++++++++++++++
drivers/acpi/processor_core.c | 8 ++--
drivers/acpi/processor_idle.c | 44 ++++++++++-------------
7 files changed, 157 insertions(+), 53 deletions(-)
Index: linux.trees.git/arch/x86/kernel/process.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -14,6 +14,8 @@
#include <linux/utsname.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
+#include <linux/cpuidle.h>
+
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -329,12 +331,6 @@ long sys_execve(char __user *name, char
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
#ifdef CONFIG_X86_32
/*
* This halt magic was a workaround for ancient floppy DMA
@@ -414,17 +410,15 @@ static void do_nothing(void *unused)
}
/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
*
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
*/
void cpu_idle_wait(void)
{
smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
+ /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -603,15 +597,57 @@ static void c1e_idle(void)
default_idle();
}
+static void (*local_idle)(void);
+
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+ if (local_idle)
+ local_idle();
+ else
+ default_idle();
+}
+#endif
+
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+ .name = "cpuidle_default",
+};
+
+static void local_idle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st)
+{
+ local_idle();
+}
+
+static int setup_cpuidle_simple(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_default_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = local_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+ return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ if (local_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
}
#endif
- if (pm_idle)
+ if (local_idle)
return;
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
@@ -619,18 +655,20 @@ void __cpuinit select_idle_routine(const
* One CPU supports mwait => All CPUs supports mwait
*/
printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
+ local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO "using C1E aware idle routine\n");
- pm_idle = c1e_idle;
+ local_idle = c1e_idle;
} else
- pm_idle = default_idle;
+ local_idle = default_idle;
+
+ return;
}
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle)
+ if (local_idle == c1e_idle)
zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
@@ -641,7 +679,7 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
- pm_idle = poll_idle;
+ local_idle = poll_idle;
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
else if (!strcmp(str, "halt")) {
@@ -652,7 +690,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- pm_idle = default_idle;
+ local_idle = default_idle;
idle_halt = 1;
return 0;
} else if (!strcmp(str, "nomwait")) {
Index: linux.trees.git/arch/x86/kernel/process_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_32.c
+++ linux.trees.git/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -110,7 +111,7 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
Index: linux.trees.git/arch/x86/kernel/process_64.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_64.c
+++ linux.trees.git/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -137,7 +138,7 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
Index: linux.trees.git/arch/x86/kernel/apm_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/apm_32.c
+++ linux.trees.git/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/cpuidle.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -2255,6 +2256,45 @@ static struct dmi_system_id __initdata a
{ }
};
+DEFINE_PER_CPU(struct cpuidle_device, apm_idle_devices);
+
+struct cpuidle_driver cpuidle_apm_driver = {
+ .name = "cpuidle_apm",
+};
+
+static void apm_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ apm_cpu_idle();
+}
+
+static void setup_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_apm_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = apm_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
+void exit_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ cpuidle_unregister_device(dev);
+ }
+}
+
+
/*
* Just start the APM thread. We do NOT want to do APM BIOS
* calls from anything but the APM thread, if for no other reason
@@ -2392,8 +2432,7 @@ static int __init apm_init(void)
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
+ setup_cpuidle_apm();
set_pm_idle = 1;
}
@@ -2405,7 +2444,7 @@ static void __exit apm_exit(void)
int error;
if (set_pm_idle) {
- pm_idle = original_pm_idle;
+ exit_cpuidle_apm();
/*
* We are about to unload the current idle thread pm callback
* (pm_idle), Wait for all processors to update cached/local
Index: linux.trees.git/arch/x86/xen/setup.c
===================================================================
--- linux.trees.git.orig/arch/x86/xen/setup.c
+++ linux.trees.git/arch/x86/xen/setup.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/pm.h>
+#include <linux/cpuidle.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -151,6 +152,32 @@ void __cpuinit xen_enable_syscall(void)
#endif /* CONFIG_X86_64 */
}
+DEFINE_PER_CPU(struct cpuidle_device, xen_idle_devices);
+struct cpuidle_driver cpuidle_xen_driver = {
+ .name = "cpuidle_xen",
+};
+
+static void xen_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ xen_idle();
+}
+
+static void setup_cpuidle_xen(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_xen_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(xen_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = xen_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
void __init xen_arch_setup(void)
{
struct physdev_set_iopl set_iopl;
@@ -186,7 +213,7 @@ void __init xen_arch_setup(void)
MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
- pm_idle = xen_idle;
+ setup_cpuidle_xen();
paravirt_disable_iospace();
Index: linux.trees.git/drivers/acpi/processor_idle.c
===================================================================
--- linux.trees.git.orig/drivers/acpi/processor_idle.c
+++ linux.trees.git/drivers/acpi/processor_idle.c
@@ -814,18 +814,16 @@ static inline void acpi_idle_do_entry(st
*
* This is equivalent to the HALT instruction.
*/
-static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+static void acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- ktime_t kt1, kt2;
- s64 idle_time;
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
local_irq_disable();
@@ -833,20 +831,15 @@ static int acpi_idle_enter_c1(struct cpu
if (acpi_idle_suspend) {
local_irq_enable();
cpu_relax();
- return 0;
+ return;
}
lapic_timer_state_broadcast(pr, cx, 1);
- kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
- kt2 = ktime_get_real();
- idle_time = ktime_to_us(ktime_sub(kt2, kt1));
local_irq_enable();
cx->usage++;
lapic_timer_state_broadcast(pr, cx, 0);
-
- return idle_time;
}
/**
@@ -854,7 +847,7 @@ static int acpi_idle_enter_c1(struct cpu
* @dev: the target CPU
* @state: the state data
*/
-static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+static void acpi_idle_enter_simple(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
@@ -866,10 +859,12 @@ static int acpi_idle_enter_simple(struct
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
local_irq_disable();
current_thread_info()->status &= ~TS_POLLING;
@@ -882,7 +877,7 @@ static int acpi_idle_enter_simple(struct
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
/*
@@ -913,7 +908,6 @@ static int acpi_idle_enter_simple(struct
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
static int c3_cpu_count;
@@ -926,7 +920,7 @@ static DEFINE_SPINLOCK(c3_lock);
*
* If BM is detected, the deepest non-C3 idle state is entered instead.
*/
-static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+static void acpi_idle_enter_bm(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
@@ -939,20 +933,23 @@ static int acpi_idle_enter_bm(struct cpu
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
if (acpi_idle_bm_check()) {
if (dev->safe_state) {
dev->last_state = dev->safe_state;
- return dev->safe_state->enter(dev, dev->safe_state);
+ dev->safe_state->enter(dev, dev->safe_state);
+ return;
} else {
local_irq_disable();
acpi_safe_halt();
local_irq_enable();
- return 0;
+ return;
}
}
@@ -967,7 +964,7 @@ static int acpi_idle_enter_bm(struct cpu
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
acpi_unlazy_tlb(smp_processor_id());
@@ -1025,7 +1022,6 @@ static int acpi_idle_enter_bm(struct cpu
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
struct cpuidle_driver acpi_idle_driver = {
Index: linux.trees.git/drivers/acpi/processor_core.c
===================================================================
--- linux.trees.git.orig/drivers/acpi/processor_core.c
+++ linux.trees.git/drivers/acpi/processor_core.c
@@ -1081,9 +1081,11 @@ static int __init acpi_processor_init(vo
if (!acpi_processor_dir)
return -ENOMEM;
#endif
- result = cpuidle_register_driver(&acpi_idle_driver);
- if (result < 0)
- goto out_proc;
+ if (!boot_option_idle_override) {
+ result = cpuidle_register_driver(&acpi_idle_driver);
+ if (result < 0)
+ goto out_proc;
+ }
result = acpi_bus_register_driver(&acpi_processor_driver);
if (result < 0)
next prev parent reply other threads:[~2010-02-09 8:32 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-09 8:28 [v11 PATCH 0/9]: cpuidle: cleanup cpuidle/ introduce cpuidle to POWER Arun R Bharadwaj
2010-02-09 8:29 ` [v11 PATCH 1/9]: cpuidle: Design documentation patch Arun R Bharadwaj
2010-02-09 17:00 ` Randy Dunlap
2010-02-10 2:13 ` Arun R Bharadwaj
2010-02-09 8:30 ` [v11 PATCH 2/9]: cpuidle: cleanup drivers/cpuidle/cpuidle.c Arun R Bharadwaj
2010-02-09 8:31 ` [v11 PATCH 3/9]: cpuidle: implement a list based approach to register a set of idle routines Arun R Bharadwaj
2010-02-09 8:32 ` Arun R Bharadwaj [this message]
2010-02-09 8:33 ` [v11 PATCH 5/9]: POWER: enable cpuidle for POWER Arun R Bharadwaj
2010-02-09 8:34 ` [v11 PATCH 6/9]: pSeries/cpuidle: refactor pseries idle loops Arun R Bharadwaj
2010-02-09 8:34 ` [v11 PATCH 7/9]: POWER: add a default_idle idle loop for POWER Arun R Bharadwaj
2010-02-09 8:35 ` [v11 PATCH 8/9]: pSeries: implement pSeries processor idle module Arun R Bharadwaj
2010-02-09 8:36 ` [v11 PATCH 9/9]: POWER: Enable default_idle when power_save=off Arun R Bharadwaj
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100209083236.GE29988@linux.vnet.ibm.com \
--to=arun@linux.vnet.ibm.com \
--cc=a.p.zijlstra@chello.nl \
--cc=balbir@in.ibm.com \
--cc=benh@kernel.crashing.org \
--cc=dipankar@in.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=svaidy@linux.vnet.ibm.com \
--cc=venkatesh.pallipadi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.