* [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1"
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
@ 2011-06-07 16:29 ` Trinabh Gupta
2011-06-17 4:29 ` Benjamin Herrenschmidt
2011-06-07 16:29 ` [RFC PATCH V1 2/7] cpuidle: replace xen access to x86 pm_idle and default_idle Trinabh Gupta
` (5 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:29 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
From: Len Brown <len.brown@intel.com>
useful for disabling cpuidle to fall back
to architecture-default idle loop
cpuidle drivers and governors will fail to register.
on x86 they'll say so:
intel_idle: intel_idle yielding to (null)
ACPI: acpi_idle yielding to (null)
Signed-off-by: Len Brown <len.brown@intel.com>
---
Documentation/kernel-parameters.txt | 3 +++
drivers/cpuidle/cpuidle.c | 10 ++++++++++
drivers/cpuidle/cpuidle.h | 1 +
drivers/cpuidle/driver.c | 3 +++
drivers/cpuidle/governor.c | 3 +++
5 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d9a203b..5697faf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -546,6 +546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
/proc/<pid>/coredump_filter.
See also Documentation/filesystems/proc.txt.
+ cpuidle.off=1 [CPU_IDLE]
+ disable the cpuidle sub-system
+
cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 406be83..a171b9e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -28,6 +28,12 @@ LIST_HEAD(cpuidle_detected_devices);
static void (*pm_idle_old)(void);
static int enabled_devices;
+static int off __read_mostly;
+
+int cpuidle_disabled(void)
+{
+ return off;
+}
#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
static void cpuidle_kick_cpus(void)
@@ -397,6 +403,9 @@ static int __init cpuidle_init(void)
{
int ret;
+ if (cpuidle_disabled())
+ return -ENODEV;
+
pm_idle_old = pm_idle;
ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
@@ -408,4 +417,5 @@ static int __init cpuidle_init(void)
return 0;
}
+module_param(off, int, 0444);
core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 33e50d5..38c3fd8 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
extern struct list_head cpuidle_detected_devices;
extern struct mutex cpuidle_lock;
extern spinlock_t cpuidle_driver_lock;
+extern int cpuidle_disabled(void);
/* idle loop */
extern void cpuidle_install_idle_handler(void);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 33e3189..284d7af 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -50,6 +50,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
if (!drv)
return -EINVAL;
+ if (cpuidle_disabled())
+ return -ENODEV;
+
spin_lock(&cpuidle_driver_lock);
if (cpuidle_curr_driver) {
spin_unlock(&cpuidle_driver_lock);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 724c164..ea2f8e7 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
if (!gov || !gov->select)
return -EINVAL;
+ if (cpuidle_disabled())
+ return -ENODEV;
+
mutex_lock(&cpuidle_lock);
if (__cpuidle_find_governor(gov->name) == NULL) {
ret = 0;
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1"
2011-06-07 16:29 ` [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1" Trinabh Gupta
@ 2011-06-17 4:29 ` Benjamin Herrenschmidt
2011-06-21 4:36 ` Trinabh Gupta
0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-17 4:29 UTC (permalink / raw)
To: Trinabh Gupta; +Cc: linuxppc-dev, linux-pm, linux-kernel
On Tue, 2011-06-07 at 21:59 +0530, Trinabh Gupta wrote:
> From: Len Brown <len.brown@intel.com>
>
> useful for disabling cpuidle to fall back
> to architecture-default idle loop
>
> cpuidle drivers and governors will fail to register.
> on x86 they'll say so:
>
> intel_idle: intel_idle yielding to (null)
> ACPI: acpi_idle yielding to (null)
>
> Signed-off-by: Len Brown <len.brown@intel.com>
> ---
When you carry over somebody's patch like this you need to also add your
own signed-off-by.
Have those generic changes been reviewed by whoever is in charge of that
cpuidle framework ?
Cheers,
Ben.
> Documentation/kernel-parameters.txt | 3 +++
> drivers/cpuidle/cpuidle.c | 10 ++++++++++
> drivers/cpuidle/cpuidle.h | 1 +
> drivers/cpuidle/driver.c | 3 +++
> drivers/cpuidle/governor.c | 3 +++
> 5 files changed, 20 insertions(+), 0 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index d9a203b..5697faf 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -546,6 +546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> /proc/<pid>/coredump_filter.
> See also Documentation/filesystems/proc.txt.
>
> + cpuidle.off=1 [CPU_IDLE]
> + disable the cpuidle sub-system
> +
> cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
> Format:
> <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 406be83..a171b9e 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -28,6 +28,12 @@ LIST_HEAD(cpuidle_detected_devices);
> static void (*pm_idle_old)(void);
>
> static int enabled_devices;
> +static int off __read_mostly;
> +
> +int cpuidle_disabled(void)
> +{
> + return off;
> +}
>
> #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
> static void cpuidle_kick_cpus(void)
> @@ -397,6 +403,9 @@ static int __init cpuidle_init(void)
> {
> int ret;
>
> + if (cpuidle_disabled())
> + return -ENODEV;
> +
> pm_idle_old = pm_idle;
>
> ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
> @@ -408,4 +417,5 @@ static int __init cpuidle_init(void)
> return 0;
> }
>
> +module_param(off, int, 0444);
> core_initcall(cpuidle_init);
> diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
> index 33e50d5..38c3fd8 100644
> --- a/drivers/cpuidle/cpuidle.h
> +++ b/drivers/cpuidle/cpuidle.h
> @@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
> extern struct list_head cpuidle_detected_devices;
> extern struct mutex cpuidle_lock;
> extern spinlock_t cpuidle_driver_lock;
> +extern int cpuidle_disabled(void);
>
> /* idle loop */
> extern void cpuidle_install_idle_handler(void);
> diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
> index 33e3189..284d7af 100644
> --- a/drivers/cpuidle/driver.c
> +++ b/drivers/cpuidle/driver.c
> @@ -50,6 +50,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
> if (!drv)
> return -EINVAL;
>
> + if (cpuidle_disabled())
> + return -ENODEV;
> +
> spin_lock(&cpuidle_driver_lock);
> if (cpuidle_curr_driver) {
> spin_unlock(&cpuidle_driver_lock);
> diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
> index 724c164..ea2f8e7 100644
> --- a/drivers/cpuidle/governor.c
> +++ b/drivers/cpuidle/governor.c
> @@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
> if (!gov || !gov->select)
> return -EINVAL;
>
> + if (cpuidle_disabled())
> + return -ENODEV;
> +
> mutex_lock(&cpuidle_lock);
> if (__cpuidle_find_governor(gov->name) == NULL) {
> ret = 0;
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1"
2011-06-17 4:29 ` Benjamin Herrenschmidt
@ 2011-06-21 4:36 ` Trinabh Gupta
0 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-21 4:36 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel
On 06/17/2011 09:59 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2011-06-07 at 21:59 +0530, Trinabh Gupta wrote:
>> From: Len Brown<len.brown@intel.com>
>>
>> useful for disabling cpuidle to fall back
>> to architecture-default idle loop
>>
>> cpuidle drivers and governors will fail to register.
>> on x86 they'll say so:
>>
>> intel_idle: intel_idle yielding to (null)
>> ACPI: acpi_idle yielding to (null)
>>
>> Signed-off-by: Len Brown<len.brown@intel.com>
>> ---
Hi Ben,
Thanks for the review.
>
> When you carry over somebody's patch like this you need to also add your
> own signed-off-by.
Ok, thanks
>
> Have those generic changes been reviewed by whoever is in charge of that
> cpuidle framework ?
These patches were posted by Len Brown himself who is ACPI, Intel Idle
cpuidle driver maintainer. He pulled in most of the patches that were
part of that series (https://lkml.org/lkml/2011/4/2/8)
in 3.0-rc1, but these few patches are still out there. These changes
(removal of pm_idle) have already been agreed upon as they were
initially reported by Peter Zijlstra himself
(http://lkml.org/lkml/2009/8/28/43).
Thanks
-Trinabh
>
> Cheers,
> Ben.
>
>> Documentation/kernel-parameters.txt | 3 +++
>> drivers/cpuidle/cpuidle.c | 10 ++++++++++
>> drivers/cpuidle/cpuidle.h | 1 +
>> drivers/cpuidle/driver.c | 3 +++
>> drivers/cpuidle/governor.c | 3 +++
>> 5 files changed, 20 insertions(+), 0 deletions(-)
>>
>> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
>> index d9a203b..5697faf 100644
>> --- a/Documentation/kernel-parameters.txt
>> +++ b/Documentation/kernel-parameters.txt
>> @@ -546,6 +546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>> /proc/<pid>/coredump_filter.
>> See also Documentation/filesystems/proc.txt.
>>
>> + cpuidle.off=1 [CPU_IDLE]
>> + disable the cpuidle sub-system
>> +
>> cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
>> Format:
>> <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index 406be83..a171b9e 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -28,6 +28,12 @@ LIST_HEAD(cpuidle_detected_devices);
>> static void (*pm_idle_old)(void);
>>
>> static int enabled_devices;
>> +static int off __read_mostly;
>> +
>> +int cpuidle_disabled(void)
>> +{
>> + return off;
>> +}
>>
>> #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
>> static void cpuidle_kick_cpus(void)
>> @@ -397,6 +403,9 @@ static int __init cpuidle_init(void)
>> {
>> int ret;
>>
>> + if (cpuidle_disabled())
>> + return -ENODEV;
>> +
>> pm_idle_old = pm_idle;
>>
>> ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
>> @@ -408,4 +417,5 @@ static int __init cpuidle_init(void)
>> return 0;
>> }
>>
>> +module_param(off, int, 0444);
>> core_initcall(cpuidle_init);
>> diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
>> index 33e50d5..38c3fd8 100644
>> --- a/drivers/cpuidle/cpuidle.h
>> +++ b/drivers/cpuidle/cpuidle.h
>> @@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
>> extern struct list_head cpuidle_detected_devices;
>> extern struct mutex cpuidle_lock;
>> extern spinlock_t cpuidle_driver_lock;
>> +extern int cpuidle_disabled(void);
>>
>> /* idle loop */
>> extern void cpuidle_install_idle_handler(void);
>> diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
>> index 33e3189..284d7af 100644
>> --- a/drivers/cpuidle/driver.c
>> +++ b/drivers/cpuidle/driver.c
>> @@ -50,6 +50,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
>> if (!drv)
>> return -EINVAL;
>>
>> + if (cpuidle_disabled())
>> + return -ENODEV;
>> +
>> spin_lock(&cpuidle_driver_lock);
>> if (cpuidle_curr_driver) {
>> spin_unlock(&cpuidle_driver_lock);
>> diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
>> index 724c164..ea2f8e7 100644
>> --- a/drivers/cpuidle/governor.c
>> +++ b/drivers/cpuidle/governor.c
>> @@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
>> if (!gov || !gov->select)
>> return -EINVAL;
>>
>> + if (cpuidle_disabled())
>> + return -ENODEV;
>> +
>> mutex_lock(&cpuidle_lock);
>> if (__cpuidle_find_governor(gov->name) == NULL) {
>> ret = 0;
>>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC PATCH V1 2/7] cpuidle: replace xen access to x86 pm_idle and default_idle
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
2011-06-07 16:29 ` [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1" Trinabh Gupta
@ 2011-06-07 16:29 ` Trinabh Gupta
2011-06-07 16:29 ` [RFC PATCH V1 3/7] cpuidle: stop using pm_idle Trinabh Gupta
` (4 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:29 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
From: Len Brown <len.brown@intel.com>
When a Xen Dom0 kernel boots on a hypervisor, it gets access
to the raw-hardware ACPI tables. While it parses the idle tables
for the hypervisor's beneift, it uses HLT for its own idle.
Rather than have xen scribble on pm_idle and access default_idle,
have it simply disable_cpuidle() so acpi_idle will not load and
architecture default HLT will be used.
cc: xen-devel@lists.xensource.com
Signed-off-by: Len Brown <len.brown@intel.com>
---
arch/x86/xen/setup.c | 3 ++-
drivers/cpuidle/cpuidle.c | 4 ++++
include/linux/cpuidle.h | 2 ++
3 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index be1a464..ab1a916 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/memblock.h>
+#include <linux/cpuidle.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -424,7 +425,7 @@ void __init xen_arch_setup(void)
#ifdef CONFIG_X86_32
boot_cpu_data.hlt_works_ok = 1;
#endif
- pm_idle = default_idle;
+ disable_cpuidle();
boot_option_idle_override = IDLE_HALT;
fiddle_vdso();
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index a171b9e..8d7303b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -34,6 +34,10 @@ int cpuidle_disabled(void)
{
return off;
}
+void disable_cpuidle(void)
+{
+ off = 1;
+}
#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
static void cpuidle_kick_cpus(void)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1e85538..2786787 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -127,6 +127,7 @@ struct cpuidle_driver {
};
#ifdef CONFIG_CPU_IDLE
+extern void disable_cpuidle(void);
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
struct cpuidle_driver *cpuidle_get_driver(void);
@@ -140,6 +141,7 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
extern void cpuidle_disable_device(struct cpuidle_device *dev);
#else
+static inline void disable_cpuidle(void) { }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [RFC PATCH V1 3/7] cpuidle: stop using pm_idle
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
2011-06-07 16:29 ` [RFC PATCH V1 1/7] cpuidle: create bootparam "cpuidle.off=1" Trinabh Gupta
2011-06-07 16:29 ` [RFC PATCH V1 2/7] cpuidle: replace xen access to x86 pm_idle and default_idle Trinabh Gupta
@ 2011-06-07 16:29 ` Trinabh Gupta
2011-08-03 17:45 ` Len Brown
2011-06-07 16:30 ` [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines Trinabh Gupta
` (3 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:29 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
From: Len Brown <len.brown@intel.com>
pm_idle does not scale as an idle handler registration mechanism.
Don't use it for cpuidle. Instead, call cpuidle directly, and
allow architectures to use pm_idle as an arch-specific default
if they need it. ie.
cpu_idle()
...
if(cpuidle_call_idle())
pm_idle();
cc: x86@kernel.org
cc: Kevin Hilman <khilman@deeprootsystems.com>
cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
arch/arm/kernel/process.c | 4 +++-
arch/sh/kernel/idle.c | 6 ++++--
arch/x86/kernel/process_32.c | 4 +++-
arch/x86/kernel/process_64.c | 4 +++-
drivers/cpuidle/cpuidle.c | 39 ++++++++++++++++++---------------------
include/linux/cpuidle.h | 2 ++
6 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 5e1e541..d7ee0d4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -30,6 +30,7 @@
#include <linux/uaccess.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
+#include <linux/cpuidle.h>
#include <asm/cacheflush.h>
#include <asm/leds.h>
@@ -196,7 +197,8 @@ void cpu_idle(void)
cpu_relax();
} else {
stop_critical_timings();
- pm_idle();
+ if (cpuidle_call_idle())
+ pm_idle();
start_critical_timings();
/*
* This will eventually be removed - pm_idle
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 425d604..9c7099e 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -16,12 +16,13 @@
#include <linux/thread_info.h>
#include <linux/irqflags.h>
#include <linux/smp.h>
+#include <linux/cpuidle.h>
#include <asm/pgalloc.h>
#include <asm/system.h>
#include <asm/atomic.h>
#include <asm/smp.h>
-void (*pm_idle)(void) = NULL;
+static void (*pm_idle)(void);
static int hlt_counter;
@@ -100,7 +101,8 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_call_idle())
+ pm_idle();
/*
* Sanity check to ensure that pm_idle() returns
* with IRQs enabled
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d12878..61fadbe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6c9dd92..62c219a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
/* In many cases the interrupt that ended idle
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8d7303b..304e378 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -25,10 +25,10 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DEFINE_MUTEX(cpuidle_lock);
LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
static int enabled_devices;
static int off __read_mostly;
+static int initialized __read_mostly;
int cpuidle_disabled(void)
{
@@ -56,27 +56,24 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
* cpuidle_idle_call - the main idle loop
*
* NOTE: no locks or semaphores should be used here
+ * return non-zero on failure
*/
-static void cpuidle_idle_call(void)
+int cpuidle_idle_call(void)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_driver();
struct cpuidle_state *target_state;
int next_state, entered_state;
- /* check if the device is ready */
- if (!dev || !dev->enabled) {
- if (pm_idle_old)
- pm_idle_old();
- else
-#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
- default_idle();
-#else
- local_irq_enable();
-#endif
- return;
- }
+ if (off)
+ return -ENODEV;
+
+ if (!initialized)
+ return -ENODEV;
+ /* check if the device is ready */
+ if (!dev || !dev->enabled)
+ return -EBUSY;
#if 0
/* shows regressions, re-enable for 2.6.29 */
/*
@@ -90,7 +87,7 @@ static void cpuidle_idle_call(void)
next_state = cpuidle_curr_governor->select(drv, dev);
if (need_resched()) {
local_irq_enable();
- return;
+ return 0;
}
target_state = &drv->states[next_state];
@@ -116,6 +113,8 @@ static void cpuidle_idle_call(void)
/* give the governor an opportunity to reflect on the outcome */
if (cpuidle_curr_governor->reflect)
cpuidle_curr_governor->reflect(dev, entered_state);
+
+ return 0;
}
/**
@@ -123,10 +122,10 @@ static void cpuidle_idle_call(void)
*/
void cpuidle_install_idle_handler(void)
{
- if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
+ if (enabled_devices) {
/* Make sure all changes finished before we switch to new idle */
smp_wmb();
- pm_idle = cpuidle_idle_call;
+ initialized = 1;
}
}
@@ -135,8 +134,8 @@ void cpuidle_install_idle_handler(void)
*/
void cpuidle_uninstall_idle_handler(void)
{
- if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
- pm_idle = pm_idle_old;
+ if (enabled_devices) {
+ initialized = 0;
cpuidle_kick_cpus();
}
}
@@ -410,8 +409,6 @@ static int __init cpuidle_init(void)
if (cpuidle_disabled())
return -ENODEV;
- pm_idle_old = pm_idle;
-
ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
if (ret)
return ret;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 2786787..c904188 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -128,6 +128,7 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
+extern int cpuidle_idle_call(void);
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
struct cpuidle_driver *cpuidle_get_driver(void);
@@ -142,6 +143,7 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
+static inline int cpuidle_idle_call(void) { return -ENODEV; }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 3/7] cpuidle: stop using pm_idle
2011-06-07 16:29 ` [RFC PATCH V1 3/7] cpuidle: stop using pm_idle Trinabh Gupta
@ 2011-08-03 17:45 ` Len Brown
0 siblings, 0 replies; 15+ messages in thread
From: Len Brown @ 2011-08-03 17:45 UTC (permalink / raw)
To: Trinabh Gupta; +Cc: linuxppc-dev, linux-pm, linux-kernel
On Tue, 7 Jun 2011, Trinabh Gupta wrote:
> From: Len Brown <len.brown@intel.com>
>
> pm_idle does not scale as an idle handler registration mechanism.
> Don't use it for cpuidle. Instead, call cpuidle directly, and
> allow architectures to use pm_idle as an arch-specific default
> if they need it. ie.
>
> cpu_idle()
> ...
> if(cpuidle_call_idle())
Looks like you forgot to correct my typo that you pointed out earlier,
s/cpuidle_call_idle/cpuidle_idle_call/
both in the comment here and for arm and sh below.
Thanks for including the From: above, that is correct form.
But note in the future that when you modify somebody else's patch,
you should append a note about what you changed,
and also add your signed-off-by, so we can
track the changes.
thanks,
-Len
> pm_idle();
>
> cc: x86@kernel.org
> cc: Kevin Hilman <khilman@deeprootsystems.com>
> cc: Paul Mundt <lethal@linux-sh.org>
> Signed-off-by: Len Brown <len.brown@intel.com>
>
> ---
>
> arch/arm/kernel/process.c | 4 +++-
> arch/sh/kernel/idle.c | 6 ++++--
> arch/x86/kernel/process_32.c | 4 +++-
> arch/x86/kernel/process_64.c | 4 +++-
> drivers/cpuidle/cpuidle.c | 39 ++++++++++++++++++---------------------
> include/linux/cpuidle.h | 2 ++
> 6 files changed, 33 insertions(+), 26 deletions(-)
>
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 5e1e541..d7ee0d4 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -30,6 +30,7 @@
> #include <linux/uaccess.h>
> #include <linux/random.h>
> #include <linux/hw_breakpoint.h>
> +#include <linux/cpuidle.h>
>
> #include <asm/cacheflush.h>
> #include <asm/leds.h>
> @@ -196,7 +197,8 @@ void cpu_idle(void)
> cpu_relax();
> } else {
> stop_critical_timings();
> - pm_idle();
> + if (cpuidle_call_idle())
> + pm_idle();
> start_critical_timings();
> /*
> * This will eventually be removed - pm_idle
> diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
> index 425d604..9c7099e 100644
> --- a/arch/sh/kernel/idle.c
> +++ b/arch/sh/kernel/idle.c
> @@ -16,12 +16,13 @@
> #include <linux/thread_info.h>
> #include <linux/irqflags.h>
> #include <linux/smp.h>
> +#include <linux/cpuidle.h>
> #include <asm/pgalloc.h>
> #include <asm/system.h>
> #include <asm/atomic.h>
> #include <asm/smp.h>
>
> -void (*pm_idle)(void) = NULL;
> +static void (*pm_idle)(void);
>
> static int hlt_counter;
>
> @@ -100,7 +101,8 @@ void cpu_idle(void)
> local_irq_disable();
> /* Don't trace irqs off for idle */
> stop_critical_timings();
> - pm_idle();
> + if (cpuidle_call_idle())
> + pm_idle();
> /*
> * Sanity check to ensure that pm_idle() returns
> * with IRQs enabled
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index 8d12878..61fadbe 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -38,6 +38,7 @@
> #include <linux/uaccess.h>
> #include <linux/io.h>
> #include <linux/kdebug.h>
> +#include <linux/cpuidle.h>
>
> #include <asm/pgtable.h>
> #include <asm/system.h>
> @@ -109,7 +110,8 @@ void cpu_idle(void)
> local_irq_disable();
> /* Don't trace irqs off for idle */
> stop_critical_timings();
> - pm_idle();
> + if (cpuidle_idle_call())
> + pm_idle();
> start_critical_timings();
> }
> tick_nohz_restart_sched_tick();
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 6c9dd92..62c219a 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -37,6 +37,7 @@
> #include <linux/uaccess.h>
> #include <linux/io.h>
> #include <linux/ftrace.h>
> +#include <linux/cpuidle.h>
>
> #include <asm/pgtable.h>
> #include <asm/system.h>
> @@ -136,7 +137,8 @@ void cpu_idle(void)
> enter_idle();
> /* Don't trace irqs off for idle */
> stop_critical_timings();
> - pm_idle();
> + if (cpuidle_idle_call())
> + pm_idle();
> start_critical_timings();
>
> /* In many cases the interrupt that ended idle
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 8d7303b..304e378 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -25,10 +25,10 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
>
> DEFINE_MUTEX(cpuidle_lock);
> LIST_HEAD(cpuidle_detected_devices);
> -static void (*pm_idle_old)(void);
>
> static int enabled_devices;
> static int off __read_mostly;
> +static int initialized __read_mostly;
>
> int cpuidle_disabled(void)
> {
> @@ -56,27 +56,24 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
> * cpuidle_idle_call - the main idle loop
> *
> * NOTE: no locks or semaphores should be used here
> + * return non-zero on failure
> */
> -static void cpuidle_idle_call(void)
> +int cpuidle_idle_call(void)
> {
> struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
> struct cpuidle_driver *drv = cpuidle_get_driver();
> struct cpuidle_state *target_state;
> int next_state, entered_state;
>
> - /* check if the device is ready */
> - if (!dev || !dev->enabled) {
> - if (pm_idle_old)
> - pm_idle_old();
> - else
> -#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
> - default_idle();
> -#else
> - local_irq_enable();
> -#endif
> - return;
> - }
> + if (off)
> + return -ENODEV;
> +
> + if (!initialized)
> + return -ENODEV;
>
> + /* check if the device is ready */
> + if (!dev || !dev->enabled)
> + return -EBUSY;
> #if 0
> /* shows regressions, re-enable for 2.6.29 */
> /*
> @@ -90,7 +87,7 @@ static void cpuidle_idle_call(void)
> next_state = cpuidle_curr_governor->select(drv, dev);
> if (need_resched()) {
> local_irq_enable();
> - return;
> + return 0;
> }
>
> target_state = &drv->states[next_state];
> @@ -116,6 +113,8 @@ static void cpuidle_idle_call(void)
> /* give the governor an opportunity to reflect on the outcome */
> if (cpuidle_curr_governor->reflect)
> cpuidle_curr_governor->reflect(dev, entered_state);
> +
> + return 0;
> }
>
> /**
> @@ -123,10 +122,10 @@ static void cpuidle_idle_call(void)
> */
> void cpuidle_install_idle_handler(void)
> {
> - if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
> + if (enabled_devices) {
> /* Make sure all changes finished before we switch to new idle */
> smp_wmb();
> - pm_idle = cpuidle_idle_call;
> + initialized = 1;
> }
> }
>
> @@ -135,8 +134,8 @@ void cpuidle_install_idle_handler(void)
> */
> void cpuidle_uninstall_idle_handler(void)
> {
> - if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
> - pm_idle = pm_idle_old;
> + if (enabled_devices) {
> + initialized = 0;
> cpuidle_kick_cpus();
> }
> }
> @@ -410,8 +409,6 @@ static int __init cpuidle_init(void)
> if (cpuidle_disabled())
> return -ENODEV;
>
> - pm_idle_old = pm_idle;
> -
> ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
> if (ret)
> return ret;
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 2786787..c904188 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -128,6 +128,7 @@ struct cpuidle_driver {
>
> #ifdef CONFIG_CPU_IDLE
> extern void disable_cpuidle(void);
> +extern int cpuidle_idle_call(void);
>
> extern int cpuidle_register_driver(struct cpuidle_driver *drv);
> struct cpuidle_driver *cpuidle_get_driver(void);
> @@ -142,6 +143,7 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
>
> #else
> static inline void disable_cpuidle(void) { }
> +static inline int cpuidle_idle_call(void) { return -ENODEV; }
>
> static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
> {return -ENODEV; }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
` (2 preceding siblings ...)
2011-06-07 16:29 ` [RFC PATCH V1 3/7] cpuidle: stop using pm_idle Trinabh Gupta
@ 2011-06-07 16:30 ` Trinabh Gupta
2011-06-17 4:32 ` Benjamin Herrenschmidt
2011-06-07 16:30 ` [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
` (2 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:30 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
This patch provides cpu_idle_wait() routine required
by the cpuidle subsystem. Almost all the code is borrowed
from x86.
Signed-off-by: Trinabh Gupta <trinabh@linux.vnet.ibm.com>
Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
arch/powerpc/Kconfig | 4 ++++
arch/powerpc/include/asm/system.h | 1 +
arch/powerpc/kernel/idle.c | 18 ++++++++++++++++++
3 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2729c66..518beda 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -87,6 +87,10 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
+config ARCH_HAS_CPU_IDLE_WAIT
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 2dc595d..811cdf1 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -222,6 +222,7 @@ extern unsigned long klimit;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
+void cpu_idle_wait(void);
/*
* Atomic exchange
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa..932392b 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -102,6 +102,24 @@ void cpu_idle(void)
}
}
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+ smp_mb();
+ /* kick all the CPUs so that they exit out of old idle routine */
+ smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
int powersave_nap;
#ifdef CONFIG_SYSCTL
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines
2011-06-07 16:30 ` [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines Trinabh Gupta
@ 2011-06-17 4:32 ` Benjamin Herrenschmidt
2011-06-21 6:00 ` Trinabh Gupta
0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-17 4:32 UTC (permalink / raw)
To: Trinabh Gupta; +Cc: linuxppc-dev, linux-pm, linux-kernel
On Tue, 2011-06-07 at 22:00 +0530, Trinabh Gupta wrote:
> diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
> index 39a2baa..932392b 100644
> --- a/arch/powerpc/kernel/idle.c
> +++ b/arch/powerpc/kernel/idle.c
> @@ -102,6 +102,24 @@ void cpu_idle(void)
> }
> }
>
> +static void do_nothing(void *unused)
> +{
> +}
> +
> +/*
> + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
> + * idle loop and start using the new idle loop.
> + * Required while changing idle handler on SMP systems.
> + * Caller must have changed idle handler to the new value before the call.
> + */
> +void cpu_idle_wait(void)
> +{
> + smp_mb();
> + /* kick all the CPUs so that they exit out of old idle routine */
> + smp_call_function(do_nothing, NULL, 1);
> +}
> +EXPORT_SYMBOL_GPL(cpu_idle_wait);
> +
> int powersave_nap;
>
> #ifdef CONFIG_SYSCTL
This is gross :-)
Do you need to absolutely ensure the idle task has changed or just
kicking it with a send reschedule is enough ?
Cheers,
Ben.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines
2011-06-17 4:32 ` Benjamin Herrenschmidt
@ 2011-06-21 6:00 ` Trinabh Gupta
0 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-21 6:00 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel
On 06/17/2011 10:02 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2011-06-07 at 22:00 +0530, Trinabh Gupta wrote:
>
>> diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
>> index 39a2baa..932392b 100644
>> --- a/arch/powerpc/kernel/idle.c
>> +++ b/arch/powerpc/kernel/idle.c
>> @@ -102,6 +102,24 @@ void cpu_idle(void)
>> }
>> }
>>
>> +static void do_nothing(void *unused)
>> +{
>> +}
>> +
>> +/*
>> + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
>> + * idle loop and start using the new idle loop.
>> + * Required while changing idle handler on SMP systems.
>> + * Caller must have changed idle handler to the new value before the call.
>> + */
>> +void cpu_idle_wait(void)
>> +{
>> + smp_mb();
>> + /* kick all the CPUs so that they exit out of old idle routine */
>> + smp_call_function(do_nothing, NULL, 1);
>> +}
>> +EXPORT_SYMBOL_GPL(cpu_idle_wait);
>> +
>> int powersave_nap;
>>
>> #ifdef CONFIG_SYSCTL
>
> This is gross :-)
Well this is what exists today for x86; so didn't think too
much into this. Maybe there is cleaner way. The requirement
is to completely exit the idle loop and call cpuidle_idle_call()
again. I think sending reschedule may be enough.
With respect to current implementation the arch-independent cpuidle
code needs a cpu_idle_wait() function for any architecture where
CONFIG_SMP is defined. This cpu_idle_wait function is called
whenever we have to pause usage of cpuidle; to switch driver
or governor etc. So maybe there is a cleaner implementation of
cpu_idle_wait instead of smp_call_function(do_nothing...); sending
reschedule may work.
Thanks
-Trinabh
>
> Do you need to absolutely ensure the idle task has changed or just
> kicking it with a send reschedule is enough ?
>
> Cheers,
> Ben.
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
` (3 preceding siblings ...)
2011-06-07 16:30 ` [RFC PATCH V1 4/7] cpuidle: (powerpc) Add cpu_idle_wait() to allow switching idle routines Trinabh Gupta
@ 2011-06-07 16:30 ` Trinabh Gupta
2011-06-17 4:36 ` Benjamin Herrenschmidt
2011-06-07 16:30 ` [RFC PATCH V1 6/7] cpuidle: (POWER) Enable cpuidle and directly call cpuidle_idle_call() " Trinabh Gupta
2011-06-07 16:30 ` [RFC PATCH V1 7/7] cpuidle: (POWER) Handle power_save=off Trinabh Gupta
6 siblings, 1 reply; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:30 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
This patch implements a cpuidle driver for pSeries based on
routines pseries_dedicated_idle_loop and pseries_shared_idle_loop.
The driver is built only if CONFIG_CPU_IDLE is set. This
cpuidle driver uses global registration of idle states and
not per-cpu.
Signed-off-by: Trinabh Gupta <trinabh@linux.vnet.ibm.com>
Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/system.h | 8 +
arch/powerpc/kernel/sysfs.c | 2
arch/powerpc/platforms/pseries/Kconfig | 9 +
arch/powerpc/platforms/pseries/Makefile | 1
arch/powerpc/platforms/pseries/processor_idle.c | 331 +++++++++++++++++++++++
arch/powerpc/platforms/pseries/pseries.h | 3
arch/powerpc/platforms/pseries/setup.c | 3
arch/powerpc/platforms/pseries/smp.c | 1
8 files changed, 355 insertions(+), 3 deletions(-)
create mode 100644 arch/powerpc/platforms/pseries/processor_idle.c
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 811cdf1..b5b4fc4 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -224,6 +224,14 @@ extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
void cpu_idle_wait(void);
+#ifdef CONFIG_PSERIES_IDLE
+extern void update_smt_snooze_delay(int snooze);
+extern int pseries_notify_cpuidle_add_cpu(int cpu);
+#else
+static inline void update_smt_snooze_delay(int snooze) {}
+static inline int pseries_notify_cpuidle_add_cpu(int cpu) { }
+#endif
+
/*
* Atomic exchange
*
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index f0f2199..fbb666f 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
+#include <asm/system.h>
#include "cacheinfo.h"
@@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
return -EINVAL;
per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+ update_smt_snooze_delay(snooze);
return count;
}
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 71af4c5..877bac6 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -118,3 +118,12 @@ config DTL
which are accessible through a debugfs file.
Say N if you are unsure.
+
+config PSERIES_IDLE
+ tristate "Cpuidle driver for pSeries platforms"
+ depends on CPU_IDLE
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 3556e40..236db46 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
+obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
new file mode 100644
index 0000000..ff44b49
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,331 @@
+/*
+ * processor_idle - idle state cpuidle driver.
+ * Adapted from drivers/idle/intel_idle.c and
+ * drivers/acpi/processor_idle.c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+
+#include <asm/paca.h>
+#include <asm/reg.h>
+#include <asm/system.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "plpar_wrappers.h"
+#include "pseries.h"
+
+struct cpuidle_driver pseries_idle_driver = {
+ .name = "pseries_idle",
+ .owner = THIS_MODULE,
+};
+
+#define MAX_IDLE_STATE_COUNT 2
+
+static int max_cstate = MAX_IDLE_STATE_COUNT - 1;
+static struct cpuidle_device __percpu *pseries_idle_cpuidle_devices;
+static struct cpuidle_state *cpuidle_state_table;
+
+void update_smt_snooze_delay(int snooze)
+{
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ if (drv)
+ drv->states[0].target_residency = snooze;
+}
+
+static int snooze_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr, out_purr;
+ ktime_t kt_before, kt_after;
+ s64 usec_delta;
+
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+ get_lppaca()->donate_dedicated_cpu = 1;
+ in_purr = mfspr(SPRN_PURR);
+
+ kt_before = ktime_get_real();
+
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+ while (!need_resched()) {
+ ppc64_runlatch_off();
+ HMT_low();
+ HMT_very_low();
+ }
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ local_irq_disable();
+
+ kt_after = ktime_get_real();
+ usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+ out_purr = mfspr(SPRN_PURR);
+ get_lppaca()->wait_state_cycles += out_purr - in_purr;
+ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+
+ dev->last_residency = (int)usec_delta;
+
+ return index;
+}
+
+static int dedicated_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr, out_purr;
+ ktime_t kt_before, kt_after;
+ s64 usec_delta;
+
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+ get_lppaca()->donate_dedicated_cpu = 1;
+ in_purr = mfspr(SPRN_PURR);
+
+ kt_before = ktime_get_real();
+
+ ppc64_runlatch_off();
+ HMT_medium();
+ cede_processor();
+
+ kt_after = ktime_get_real();
+ usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+ out_purr = mfspr(SPRN_PURR);
+ get_lppaca()->wait_state_cycles += out_purr - in_purr;
+ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+
+ dev->last_residency = (int)usec_delta;
+
+ return index;
+}
+
+static int shared_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr, out_purr;
+ ktime_t kt_before, kt_after;
+ s64 usec_delta;
+
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+ get_lppaca()->donate_dedicated_cpu = 1;
+ in_purr = mfspr(SPRN_PURR);
+
+ kt_before = ktime_get_real();
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ */
+ cede_processor();
+
+ kt_after = ktime_get_real();
+
+ usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+ out_purr = mfspr(SPRN_PURR);
+ get_lppaca()->wait_state_cycles += out_purr - in_purr;
+ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+
+ dev->last_residency = (int)usec_delta;
+
+ return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
+ { /* CEDE */
+ .name = "CEDE",
+ .desc = "CEDE",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 10,
+ .enter = &dedicated_cede_loop },
+};
+
+/*
+ * States for shared partition case.
+ */
+static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+ { /* Shared Cede */
+ .name = "Shared Cede",
+ .desc = "Shared Cede",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &shared_cede_loop },
+};
+
+int pseries_notify_cpuidle_add_cpu(int cpu)
+{
+ struct cpuidle_device *dev =
+ per_cpu_ptr(pseries_idle_cpuidle_devices, cpu);
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_disable_device(dev);
+ cpuidle_enable_device(dev);
+ }
+ return 0;
+}
+
+/*
+ * pseries_idle_cpuidle_driver_init()
+ */
+static int pseries_idle_cpuidle_driver_init(void)
+{
+ int cstate;
+ struct cpuidle_driver *drv = &pseries_idle_driver;
+
+ drv->state_count = 0;
+
+ for (cstate = 0; cstate < MAX_IDLE_STATE_COUNT; ++cstate) {
+
+ if (cstate > max_cstate)
+ break;
+
+ /* is the state not enabled? */
+ if (cpuidle_state_table[cstate].enter == NULL)
+ continue;
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[cstate];
+
+ if (cpuidle_state_table == dedicated_states)
+ drv->states[drv->state_count].target_residency =
+ __get_cpu_var(smt_snooze_delay);
+
+ drv->state_count += 1;
+ }
+
+ return 0;
+}
+
+/* pseries_idle_devices_uninit(void)
+ * unregister cpuidle devices and de-allocate memory
+ */
+static void pseries_idle_devices_uninit(void)
+{
+ int i;
+ struct cpuidle_device *dev;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
+ cpuidle_unregister_device(dev);
+ }
+
+ free_percpu(pseries_idle_cpuidle_devices);
+ return;
+}
+
+/* pseries_idle_devices_init()
+ * allocate, initialize and register cpuidle device
+ */
+static int pseries_idle_devices_init(void)
+{
+ int i;
+ struct cpuidle_driver *drv = &pseries_idle_driver;
+ struct cpuidle_device *dev;
+
+ pseries_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (pseries_idle_cpuidle_devices == NULL)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
+ dev->state_count = drv->state_count;
+ dev->cpu = i;
+ if (cpuidle_register_device(dev)) {
+ printk(KERN_DEBUG "cpuidle_register_device %d failed!\n",
+ i);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * pseries_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int pseries_idle_probe(void)
+{
+ if (max_cstate == 0) {
+ printk(KERN_DEBUG "pseries processor idle disabled.\n");
+ return -EPERM;
+ }
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ printk(KERN_DEBUG "Using default idle\n");
+ return -ENODEV;
+ }
+
+ if (get_lppaca()->shared_proc)
+ cpuidle_state_table = shared_states;
+ else
+ cpuidle_state_table = dedicated_states;
+
+ return 0;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+ int retval;
+
+ retval = pseries_idle_probe();
+ if (retval)
+ return retval;
+
+ pseries_idle_cpuidle_driver_init();
+ retval = cpuidle_register_driver(&pseries_idle_driver);
+ if (retval) {
+ printk(KERN_DEBUG "Registration of pseries driver failed.\n");
+ return retval;
+ }
+
+ retval = pseries_idle_devices_init();
+ if (retval) {
+ pseries_idle_devices_uninit();
+ cpuidle_unregister_driver(&pseries_idle_driver);
+ return retval;
+ }
+
+ printk(KERN_DEBUG "pseries_idle_driver registered\n");
+
+ return 0;
+}
+
+device_initcall(pseries_processor_idle_init);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index e9f6d28..7c60380 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,4 +56,7 @@ extern struct device_node *dlpar_configure_connector(u32);
extern int dlpar_attach_node(struct device_node *);
extern int dlpar_detach_node(struct device_node *);
+/* Snooze Delay, pseries_idle */
+DECLARE_PER_CPU(long, smt_snooze_delay);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 593acce..6893a0c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -584,9 +584,6 @@ static int __init pSeries_probe(void)
return 1;
}
-
-DECLARE_PER_CPU(long, smt_snooze_delay);
-
static void pseries_dedicated_idle_sleep(void)
{
unsigned int cpu = smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index fbffd7e..2e46883 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -150,6 +150,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
#endif
+ pseries_notify_cpuidle_add_cpu(cpu);
}
static int __devinit smp_pSeries_kick_cpu(int nr)
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries
2011-06-07 16:30 ` [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
@ 2011-06-17 4:36 ` Benjamin Herrenschmidt
2011-06-21 9:00 ` Trinabh Gupta
0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-17 4:36 UTC (permalink / raw)
To: Trinabh Gupta; +Cc: linuxppc-dev, linux-pm, linux-kernel
On Tue, 2011-06-07 at 22:00 +0530, Trinabh Gupta wrote:
> +static int snooze_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + unsigned long in_purr, out_purr;
> + ktime_t kt_before, kt_after;
> + s64 usec_delta;
> +
> + /*
> + * Indicate to the HV that we are idle. Now would be
> + * a good time to find other work to dispatch.
> + */
> + get_lppaca()->idle = 1;
> + get_lppaca()->donate_dedicated_cpu = 1;
> + in_purr = mfspr(SPRN_PURR);
> +
> + kt_before = ktime_get_real();
Don't you want to timestamp before you tell the HV that you are idle ?
Or is the above stuff only polled by phyp when partition interrupts are
enabled ?
> + local_irq_enable();
> + set_thread_flag(TIF_POLLING_NRFLAG);
> + while (!need_resched()) {
> + ppc64_runlatch_off();
> + HMT_low();
> + HMT_very_low();
> + }
> + HMT_medium();
> + clear_thread_flag(TIF_POLLING_NRFLAG);
> + smp_mb();
> + local_irq_disable();
> +
> + kt_after = ktime_get_real();
> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
> +
> + out_purr = mfspr(SPRN_PURR);
> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
> + get_lppaca()->donate_dedicated_cpu = 0;
> + get_lppaca()->idle = 0;
> +
> + dev->last_residency = (int)usec_delta;
> +
> + return index;
> +}
> +
> +static int dedicated_cede_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + unsigned long in_purr, out_purr;
> + ktime_t kt_before, kt_after;
> + s64 usec_delta;
> +
> + /*
> + * Indicate to the HV that we are idle. Now would be
> + * a good time to find other work to dispatch.
> + */
> + get_lppaca()->idle = 1;
> + get_lppaca()->donate_dedicated_cpu = 1;
> + in_purr = mfspr(SPRN_PURR);
> +
> + kt_before = ktime_get_real();
There's a bit too much code duplication for my taste here between the
two functions. Not sure if it can be helped, maybe with some inlines
for the prolog/epilogue ... Looks like stuff that's easy to "fix" in one
place and forget the other...
> + ppc64_runlatch_off();
> + HMT_medium();
> + cede_processor();
> +
> + kt_after = ktime_get_real();
> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
> +
> + out_purr = mfspr(SPRN_PURR);
> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
> + get_lppaca()->donate_dedicated_cpu = 0;
> + get_lppaca()->idle = 0;
> +
> + dev->last_residency = (int)usec_delta;
> +
> + return index;
> +}
> +
> +static int shared_cede_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + unsigned long in_purr, out_purr;
> + ktime_t kt_before, kt_after;
> + s64 usec_delta;
> +
> + /*
> + * Indicate to the HV that we are idle. Now would be
> + * a good time to find other work to dispatch.
> + */
> + get_lppaca()->idle = 1;
> + get_lppaca()->donate_dedicated_cpu = 1;
> + in_purr = mfspr(SPRN_PURR);
> +
> + kt_before = ktime_get_real();
> + /*
> + * Yield the processor to the hypervisor. We return if
> + * an external interrupt occurs (which are driven prior
> + * to returning here) or if a prod occurs from another
> + * processor. When returning here, external interrupts
> + * are enabled.
> + */
> + cede_processor();
> +
> + kt_after = ktime_get_real();
> +
> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
> +
> + out_purr = mfspr(SPRN_PURR);
> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
> + get_lppaca()->donate_dedicated_cpu = 0;
> + get_lppaca()->idle = 0;
> +
> + dev->last_residency = (int)usec_delta;
> +
> + return index;
> +}
> +
> +/*
> + * States for dedicated partition case.
> + */
> +static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
> + { /* Snooze */
> + .name = "snooze",
> + .desc = "snooze",
> + .flags = CPUIDLE_FLAG_TIME_VALID,
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = &snooze_loop },
> + { /* CEDE */
> + .name = "CEDE",
> + .desc = "CEDE",
> + .flags = CPUIDLE_FLAG_TIME_VALID,
> + .exit_latency = 1,
> + .target_residency = 10,
> + .enter = &dedicated_cede_loop },
> +};
> +
> +/*
> + * States for shared partition case.
> + */
> +static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
> + { /* Shared Cede */
> + .name = "Shared Cede",
> + .desc = "Shared Cede",
> + .flags = CPUIDLE_FLAG_TIME_VALID,
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = &shared_cede_loop },
> +};
> +
> +int pseries_notify_cpuidle_add_cpu(int cpu)
> +{
> + struct cpuidle_device *dev =
> + per_cpu_ptr(pseries_idle_cpuidle_devices, cpu);
> + if (dev && cpuidle_get_driver()) {
> + cpuidle_disable_device(dev);
> + cpuidle_enable_device(dev);
> + }
> + return 0;
> +}
> +
> +/*
> + * pseries_idle_cpuidle_driver_init()
> + */
> +static int pseries_idle_cpuidle_driver_init(void)
> +{
> + int cstate;
> + struct cpuidle_driver *drv = &pseries_idle_driver;
> +
> + drv->state_count = 0;
> +
> + for (cstate = 0; cstate < MAX_IDLE_STATE_COUNT; ++cstate) {
> +
> + if (cstate > max_cstate)
> + break;
> +
> + /* is the state not enabled? */
> + if (cpuidle_state_table[cstate].enter == NULL)
> + continue;
> +
> + drv->states[drv->state_count] = /* structure copy */
> + cpuidle_state_table[cstate];
> +
> + if (cpuidle_state_table == dedicated_states)
> + drv->states[drv->state_count].target_residency =
> + __get_cpu_var(smt_snooze_delay);
> +
> + drv->state_count += 1;
> + }
> +
> + return 0;
> +}
> +
> +/* pseries_idle_devices_uninit(void)
> + * unregister cpuidle devices and de-allocate memory
> + */
> +static void pseries_idle_devices_uninit(void)
> +{
> + int i;
> + struct cpuidle_device *dev;
> +
> + for_each_possible_cpu(i) {
> + dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
> + cpuidle_unregister_device(dev);
> + }
> +
> + free_percpu(pseries_idle_cpuidle_devices);
> + return;
> +}
> +
> +/* pseries_idle_devices_init()
> + * allocate, initialize and register cpuidle device
> + */
> +static int pseries_idle_devices_init(void)
> +{
> + int i;
> + struct cpuidle_driver *drv = &pseries_idle_driver;
> + struct cpuidle_device *dev;
> +
> + pseries_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
> + if (pseries_idle_cpuidle_devices == NULL)
> + return -ENOMEM;
> +
> + for_each_possible_cpu(i) {
> + dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
> + dev->state_count = drv->state_count;
> + dev->cpu = i;
> + if (cpuidle_register_device(dev)) {
> + printk(KERN_DEBUG "cpuidle_register_device %d failed!\n",
> + i);
> + return -EIO;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * pseries_idle_probe()
> + * Choose state table for shared versus dedicated partition
> + */
> +static int pseries_idle_probe(void)
> +{
> + if (max_cstate == 0) {
> + printk(KERN_DEBUG "pseries processor idle disabled.\n");
> + return -EPERM;
> + }
> +
> + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
> + printk(KERN_DEBUG "Using default idle\n");
> + return -ENODEV;
> + }
> +
> + if (get_lppaca()->shared_proc)
> + cpuidle_state_table = shared_states;
> + else
> + cpuidle_state_table = dedicated_states;
> +
> + return 0;
> +}
> +
> +static int __init pseries_processor_idle_init(void)
> +{
> + int retval;
> +
> + retval = pseries_idle_probe();
> + if (retval)
> + return retval;
> +
> + pseries_idle_cpuidle_driver_init();
> + retval = cpuidle_register_driver(&pseries_idle_driver);
> + if (retval) {
> + printk(KERN_DEBUG "Registration of pseries driver failed.\n");
> + return retval;
> + }
> +
> + retval = pseries_idle_devices_init();
> + if (retval) {
> + pseries_idle_devices_uninit();
> + cpuidle_unregister_driver(&pseries_idle_driver);
> + return retval;
> + }
> +
> + printk(KERN_DEBUG "pseries_idle_driver registered\n");
> +
> + return 0;
> +}
> +
> +device_initcall(pseries_processor_idle_init);
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index e9f6d28..7c60380 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -56,4 +56,7 @@ extern struct device_node *dlpar_configure_connector(u32);
> extern int dlpar_attach_node(struct device_node *);
> extern int dlpar_detach_node(struct device_node *);
>
> +/* Snooze Delay, pseries_idle */
> +DECLARE_PER_CPU(long, smt_snooze_delay);
> +
> #endif /* _PSERIES_PSERIES_H */
> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> index 593acce..6893a0c 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -584,9 +584,6 @@ static int __init pSeries_probe(void)
> return 1;
> }
>
> -
> -DECLARE_PER_CPU(long, smt_snooze_delay);
> -
> static void pseries_dedicated_idle_sleep(void)
> {
> unsigned int cpu = smp_processor_id();
> diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
> index fbffd7e..2e46883 100644
> --- a/arch/powerpc/platforms/pseries/smp.c
> +++ b/arch/powerpc/platforms/pseries/smp.c
> @@ -150,6 +150,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
> set_cpu_current_state(cpu, CPU_STATE_ONLINE);
> set_default_offline_state(cpu);
> #endif
> + pseries_notify_cpuidle_add_cpu(cpu);
> }
>
> static int __devinit smp_pSeries_kick_cpu(int nr)
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries
2011-06-17 4:36 ` Benjamin Herrenschmidt
@ 2011-06-21 9:00 ` Trinabh Gupta
0 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-21 9:00 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel
On 06/17/2011 10:06 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2011-06-07 at 22:00 +0530, Trinabh Gupta wrote:
>
>> +static int snooze_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + unsigned long in_purr, out_purr;
>> + ktime_t kt_before, kt_after;
>> + s64 usec_delta;
>> +
>> + /*
>> + * Indicate to the HV that we are idle. Now would be
>> + * a good time to find other work to dispatch.
>> + */
>> + get_lppaca()->idle = 1;
>> + get_lppaca()->donate_dedicated_cpu = 1;
>> + in_purr = mfspr(SPRN_PURR);
>> +
>> + kt_before = ktime_get_real();
>
> Don't you want to timestamp before you tell the HV that you are idle ?
> Or is the above stuff only polled by phyp when partition interrupts are
> enabled ?
Hi Ben,
Yes, timestamp should be before telling HV that we are idle. Thanks
>
>> + local_irq_enable();
>> + set_thread_flag(TIF_POLLING_NRFLAG);
>> + while (!need_resched()) {
>> + ppc64_runlatch_off();
>> + HMT_low();
>> + HMT_very_low();
>> + }
>> + HMT_medium();
>> + clear_thread_flag(TIF_POLLING_NRFLAG);
>> + smp_mb();
>> + local_irq_disable();
>> +
>> + kt_after = ktime_get_real();
>> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
>> +
>> + out_purr = mfspr(SPRN_PURR);
>> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
>> + get_lppaca()->donate_dedicated_cpu = 0;
>> + get_lppaca()->idle = 0;
>> +
>> + dev->last_residency = (int)usec_delta;
>> +
>> + return index;
>> +}
>> +
>> +static int dedicated_cede_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + unsigned long in_purr, out_purr;
>> + ktime_t kt_before, kt_after;
>> + s64 usec_delta;
>> +
>> + /*
>> + * Indicate to the HV that we are idle. Now would be
>> + * a good time to find other work to dispatch.
>> + */
>> + get_lppaca()->idle = 1;
>> + get_lppaca()->donate_dedicated_cpu = 1;
>> + in_purr = mfspr(SPRN_PURR);
>> +
>> + kt_before = ktime_get_real();
>
> There's a bit too much code duplication for my taste here between the
> two functions. Not sure if it can be helped, maybe with some inlines
> for the prolog/epilogue ... Looks like stuff that's easy to "fix" in one
> place and forget the other...
>
Yes, I agree that there is too much code duplication in these idle
routines; will fix this.
Thanks
-Trinabh
>> + ppc64_runlatch_off();
>> + HMT_medium();
>> + cede_processor();
>> +
>> + kt_after = ktime_get_real();
>> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
>> +
>> + out_purr = mfspr(SPRN_PURR);
>> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
>> + get_lppaca()->donate_dedicated_cpu = 0;
>> + get_lppaca()->idle = 0;
>> +
>> + dev->last_residency = (int)usec_delta;
>> +
>> + return index;
>> +}
>> +
>> +static int shared_cede_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + unsigned long in_purr, out_purr;
>> + ktime_t kt_before, kt_after;
>> + s64 usec_delta;
>> +
>> + /*
>> + * Indicate to the HV that we are idle. Now would be
>> + * a good time to find other work to dispatch.
>> + */
>> + get_lppaca()->idle = 1;
>> + get_lppaca()->donate_dedicated_cpu = 1;
>> + in_purr = mfspr(SPRN_PURR);
>> +
>> + kt_before = ktime_get_real();
>> + /*
>> + * Yield the processor to the hypervisor. We return if
>> + * an external interrupt occurs (which are driven prior
>> + * to returning here) or if a prod occurs from another
>> + * processor. When returning here, external interrupts
>> + * are enabled.
>> + */
>> + cede_processor();
>> +
>> + kt_after = ktime_get_real();
>> +
>> + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
>> +
>> + out_purr = mfspr(SPRN_PURR);
>> + get_lppaca()->wait_state_cycles += out_purr - in_purr;
>> + get_lppaca()->donate_dedicated_cpu = 0;
>> + get_lppaca()->idle = 0;
>> +
>> + dev->last_residency = (int)usec_delta;
>> +
>> + return index;
>> +}
>> +
>> +/*
>> + * States for dedicated partition case.
>> + */
>> +static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
>> + { /* Snooze */
>> + .name = "snooze",
>> + .desc = "snooze",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 0,
>> + .target_residency = 0,
>> + .enter =&snooze_loop },
>> + { /* CEDE */
>> + .name = "CEDE",
>> + .desc = "CEDE",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 1,
>> + .target_residency = 10,
>> + .enter =&dedicated_cede_loop },
>> +};
>> +
>> +/*
>> + * States for shared partition case.
>> + */
>> +static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
>> + { /* Shared Cede */
>> + .name = "Shared Cede",
>> + .desc = "Shared Cede",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 0,
>> + .target_residency = 0,
>> + .enter =&shared_cede_loop },
>> +};
>> +
>> +int pseries_notify_cpuidle_add_cpu(int cpu)
>> +{
>> + struct cpuidle_device *dev =
>> + per_cpu_ptr(pseries_idle_cpuidle_devices, cpu);
>> + if (dev&& cpuidle_get_driver()) {
>> + cpuidle_disable_device(dev);
>> + cpuidle_enable_device(dev);
>> + }
>> + return 0;
>> +}
>> +
>> +/*
>> + * pseries_idle_cpuidle_driver_init()
>> + */
>> +static int pseries_idle_cpuidle_driver_init(void)
>> +{
>> + int cstate;
>> + struct cpuidle_driver *drv =&pseries_idle_driver;
>> +
>> + drv->state_count = 0;
>> +
>> + for (cstate = 0; cstate< MAX_IDLE_STATE_COUNT; ++cstate) {
>> +
>> + if (cstate> max_cstate)
>> + break;
>> +
>> + /* is the state not enabled? */
>> + if (cpuidle_state_table[cstate].enter == NULL)
>> + continue;
>> +
>> + drv->states[drv->state_count] = /* structure copy */
>> + cpuidle_state_table[cstate];
>> +
>> + if (cpuidle_state_table == dedicated_states)
>> + drv->states[drv->state_count].target_residency =
>> + __get_cpu_var(smt_snooze_delay);
>> +
>> + drv->state_count += 1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/* pseries_idle_devices_uninit(void)
>> + * unregister cpuidle devices and de-allocate memory
>> + */
>> +static void pseries_idle_devices_uninit(void)
>> +{
>> + int i;
>> + struct cpuidle_device *dev;
>> +
>> + for_each_possible_cpu(i) {
>> + dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
>> + cpuidle_unregister_device(dev);
>> + }
>> +
>> + free_percpu(pseries_idle_cpuidle_devices);
>> + return;
>> +}
>> +
>> +/* pseries_idle_devices_init()
>> + * allocate, initialize and register cpuidle device
>> + */
>> +static int pseries_idle_devices_init(void)
>> +{
>> + int i;
>> + struct cpuidle_driver *drv =&pseries_idle_driver;
>> + struct cpuidle_device *dev;
>> +
>> + pseries_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
>> + if (pseries_idle_cpuidle_devices == NULL)
>> + return -ENOMEM;
>> +
>> + for_each_possible_cpu(i) {
>> + dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
>> + dev->state_count = drv->state_count;
>> + dev->cpu = i;
>> + if (cpuidle_register_device(dev)) {
>> + printk(KERN_DEBUG "cpuidle_register_device %d failed!\n",
>> + i);
>> + return -EIO;
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * pseries_idle_probe()
>> + * Choose state table for shared versus dedicated partition
>> + */
>> +static int pseries_idle_probe(void)
>> +{
>> + if (max_cstate == 0) {
>> + printk(KERN_DEBUG "pseries processor idle disabled.\n");
>> + return -EPERM;
>> + }
>> +
>> + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
>> + printk(KERN_DEBUG "Using default idle\n");
>> + return -ENODEV;
>> + }
>> +
>> + if (get_lppaca()->shared_proc)
>> + cpuidle_state_table = shared_states;
>> + else
>> + cpuidle_state_table = dedicated_states;
>> +
>> + return 0;
>> +}
>> +
>> +static int __init pseries_processor_idle_init(void)
>> +{
>> + int retval;
>> +
>> + retval = pseries_idle_probe();
>> + if (retval)
>> + return retval;
>> +
>> + pseries_idle_cpuidle_driver_init();
>> + retval = cpuidle_register_driver(&pseries_idle_driver);
>> + if (retval) {
>> + printk(KERN_DEBUG "Registration of pseries driver failed.\n");
>> + return retval;
>> + }
>> +
>> + retval = pseries_idle_devices_init();
>> + if (retval) {
>> + pseries_idle_devices_uninit();
>> + cpuidle_unregister_driver(&pseries_idle_driver);
>> + return retval;
>> + }
>> +
>> + printk(KERN_DEBUG "pseries_idle_driver registered\n");
>> +
>> + return 0;
>> +}
>> +
>> +device_initcall(pseries_processor_idle_init);
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>> index e9f6d28..7c60380 100644
>> --- a/arch/powerpc/platforms/pseries/pseries.h
>> +++ b/arch/powerpc/platforms/pseries/pseries.h
>> @@ -56,4 +56,7 @@ extern struct device_node *dlpar_configure_connector(u32);
>> extern int dlpar_attach_node(struct device_node *);
>> extern int dlpar_detach_node(struct device_node *);
>>
>> +/* Snooze Delay, pseries_idle */
>> +DECLARE_PER_CPU(long, smt_snooze_delay);
>> +
>> #endif /* _PSERIES_PSERIES_H */
>> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
>> index 593acce..6893a0c 100644
>> --- a/arch/powerpc/platforms/pseries/setup.c
>> +++ b/arch/powerpc/platforms/pseries/setup.c
>> @@ -584,9 +584,6 @@ static int __init pSeries_probe(void)
>> return 1;
>> }
>>
>> -
>> -DECLARE_PER_CPU(long, smt_snooze_delay);
>> -
>> static void pseries_dedicated_idle_sleep(void)
>> {
>> unsigned int cpu = smp_processor_id();
>> diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
>> index fbffd7e..2e46883 100644
>> --- a/arch/powerpc/platforms/pseries/smp.c
>> +++ b/arch/powerpc/platforms/pseries/smp.c
>> @@ -150,6 +150,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
>> set_cpu_current_state(cpu, CPU_STATE_ONLINE);
>> set_default_offline_state(cpu);
>> #endif
>> + pseries_notify_cpuidle_add_cpu(cpu);
>> }
>>
>> static int __devinit smp_pSeries_kick_cpu(int nr)
>>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RFC PATCH V1 6/7] cpuidle: (POWER) Enable cpuidle and directly call cpuidle_idle_call() for pSeries
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
` (4 preceding siblings ...)
2011-06-07 16:30 ` [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
@ 2011-06-07 16:30 ` Trinabh Gupta
2011-06-07 16:30 ` [RFC PATCH V1 7/7] cpuidle: (POWER) Handle power_save=off Trinabh Gupta
6 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:30 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
This patch enables cpuidle for pSeries and cpuidle_idle_call() is
directly called from the idle loop. As a result pseries_idle cpuidle
driver registered with cpuidle subsystem comes into action. This patch
also removes the routines pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep as they are now implemented as part of
pseries_idle cpuidle driver.
Signed-off-by: Trinabh Gupta <trinabh@linux.vnet.ibm.com>
Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
arch/powerpc/platforms/Kconfig | 6 ++
arch/powerpc/platforms/pseries/Kconfig | 2 -
arch/powerpc/platforms/pseries/setup.c | 86 +-------------------------------
include/linux/cpuidle.h | 2 -
4 files changed, 9 insertions(+), 87 deletions(-)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f970ca2..80e3592 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -206,6 +206,12 @@ config PPC_PASEMI_CPUFREQ
endmenu
+menu "CPUIdle driver"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
depends on 6xx && (PPC_PREP || PPC_PMAC)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 877bac6..9729086 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -121,7 +121,7 @@ config DTL
config PSERIES_IDLE
tristate "Cpuidle driver for pSeries platforms"
- depends on CPU_IDLE
+ select CPU_IDLE
depends on PPC_PSERIES
default y
help
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6893a0c..75d024b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -39,6 +39,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
+#include <linux/cpuidle.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -74,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
static struct device_node *pSeries_mpic_node;
static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -373,18 +371,9 @@ static void __init pSeries_setup_arch(void)
pSeries_nvram_init();
- /* Choose an idle loop */
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
vpa_init(boot_cpuid);
- if (get_lppaca()->shared_proc) {
- printk(KERN_DEBUG "Using shared processor idle loop\n");
- ppc_md.power_save = pseries_shared_idle_sleep;
- } else {
- printk(KERN_DEBUG "Using dedicated idle loop\n");
- ppc_md.power_save = pseries_dedicated_idle_sleep;
- }
- } else {
- printk(KERN_DEBUG "Using default idle loop\n");
+ ppc_md.power_save = (void *)cpuidle_idle_call;
}
if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -584,77 +573,6 @@ static int __init pSeries_probe(void)
return 1;
}
-static void pseries_dedicated_idle_sleep(void)
-{
- unsigned int cpu = smp_processor_id();
- unsigned long start_snooze;
- unsigned long in_purr, out_purr;
- long snooze = __get_cpu_var(smt_snooze_delay);
-
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
- get_lppaca()->donate_dedicated_cpu = 1;
- in_purr = mfspr(SPRN_PURR);
-
- /*
- * We come in with interrupts disabled, and need_resched()
- * has been checked recently. If we should poll for a little
- * while, do so.
- */
- if (snooze) {
- start_snooze = get_tb() + snooze * tb_ticks_per_usec;
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- while ((snooze < 0) || (get_tb() < start_snooze)) {
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- ppc64_runlatch_off();
- HMT_low();
- HMT_very_low();
- }
-
- HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
- local_irq_disable();
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- }
-
- cede_processor();
-
-out:
- HMT_medium();
- out_purr = mfspr(SPRN_PURR);
- get_lppaca()->wait_state_cycles += out_purr - in_purr;
- get_lppaca()->donate_dedicated_cpu = 0;
- get_lppaca()->idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
-
- /*
- * Yield the processor to the hypervisor. We return if
- * an external interrupt occurs (which are driven prior
- * to returning here) or if a prod occurs from another
- * processor. When returning here, external interrupts
- * are enabled.
- */
- cede_processor();
-
- get_lppaca()->idle = 0;
-}
-
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
if (firmware_has_feature(FW_FEATURE_LPAR))
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c904188..701bc9b 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -129,7 +129,6 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
extern int cpuidle_idle_call(void);
-
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
struct cpuidle_driver *cpuidle_get_driver(void);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
@@ -144,7 +143,6 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_idle_call(void) { return -ENODEV; }
-
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [RFC PATCH V1 7/7] cpuidle: (POWER) Handle power_save=off
2011-06-07 16:29 [RFC PATCH V1 0/7] cpuidle: (POWER) cpuidle driver for pSeries Trinabh Gupta
` (5 preceding siblings ...)
2011-06-07 16:30 ` [RFC PATCH V1 6/7] cpuidle: (POWER) Enable cpuidle and directly call cpuidle_idle_call() " Trinabh Gupta
@ 2011-06-07 16:30 ` Trinabh Gupta
6 siblings, 0 replies; 15+ messages in thread
From: Trinabh Gupta @ 2011-06-07 16:30 UTC (permalink / raw)
To: linux-pm, linuxppc-dev; +Cc: linux-kernel
This patch makes pseries_idle_driver to be not registered when
power_save=off kernel boot option is specified. For this
boot_option_idle_override is used similar to how it is used for x86.
Signed-off-by: Trinabh Gupta <trinabh@linux.vnet.ibm.com>
Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/processor.h | 3 +++
arch/powerpc/kernel/idle.c | 4 ++++
arch/powerpc/platforms/pseries/processor_idle.c | 4 ++++
3 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index d50c2b6..0ce167e 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -377,6 +377,9 @@ static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32)
}
#endif
+extern unsigned long boot_option_idle_override;
+enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PROCESSOR_H */
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 932392b..61515f4 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -39,9 +39,13 @@
#define cpu_should_die() 0
#endif
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(boot_option_idle_override);
+
static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
+ boot_option_idle_override = IDLE_POWERSAVE_OFF;
return 0;
}
__setup("powersave=off", powersave_off);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index ff44b49..c4c3383 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -288,6 +288,10 @@ static int pseries_idle_probe(void)
return -EPERM;
}
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE) {
+ return -ENODEV;
+ }
+
if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
printk(KERN_DEBUG "Using default idle\n");
return -ENODEV;
^ permalink raw reply related [flat|nested] 15+ messages in thread