* [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-22 17:37 [RFC][PATCH 0/2] Rework disabling " Rafael J. Wysocki
@ 2009-02-22 17:39 ` Rafael J. Wysocki
[not found] ` <200902221839.50357.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-22 17:39 UTC (permalink / raw)
To: LKML
Cc: Jeremy Fitzhardinge, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
From: Rafael J. Wysocki <rjw@sisk.pl>
Introduce two helper functions allowing us to disable device
interrupts (at the IO-APIC level) during suspend or hibernation
and enable them during the subsequent resume, respectively, so that
the timer interrupts are enabled while "late" suspend callbacks and
"early" resume callbacks provided by device drivers are being
executed.
Use these functions to rework the handling of interrupts during
suspend (hibernation) and resume. Namely, interrupts will only be
disabled on the CPU right before suspending sysdevs, while device
interrupts will be disabled (at the IO-APIC level), with the help of
the new helper function, before calling "late" suspend callbacks
provided by device drivers and analogously during resume.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
arch/x86/kernel/apm_32.c | 20 ++++++++--
drivers/xen/manage.c | 37 ++++++++++++--------
include/linux/interrupt.h | 3 +
kernel/irq/manage.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/kexec.c | 11 ++++-
kernel/power/disk.c | 56 +++++++++++++++++++++++++++---
kernel/power/main.c | 27 +++++++++++---
7 files changed, 208 insertions(+), 31 deletions(-)
Index: linux-2.6/kernel/irq/manage.c
===================================================================
--- linux-2.6.orig/kernel/irq/manage.c
+++ linux-2.6/kernel/irq/manage.c
@@ -746,3 +746,88 @@ int request_irq(unsigned int irq, irq_ha
return retval;
}
EXPORT_SYMBOL(request_irq);
+
+#ifdef CONFIG_PM_SLEEP
+struct disabled_irq {
+ struct list_head list;
+ int irq;
+};
+
+static LIST_HEAD(resume_irqs_list);
+
+/**
+ * enable_device_irqs - enable interrupts disabled by disable_device_irqs()
+ *
+ * Enable all interrupt lines previously disabled by disable_device_irqs()
+ * that are on resume_irqs_list.
+ */
+void enable_device_irqs(void)
+{
+ struct disabled_irq *resume_irq, *tmp;
+
+ list_for_each_entry_safe(resume_irq, tmp, &resume_irqs_list, list) {
+ enable_irq(resume_irq->irq);
+ list_del(&resume_irq->list);
+ kfree(resume_irq);
+ }
+}
+
+/**
+ * disable_device_irqs - disable all enabled interrupt lines
+ *
+ * During system-wide suspend or hibernation device interrupts need to be
+ * disabled at the chip level and this function is provided for this
+ * purpose. It disables all interrupt lines that are enabled at the
+ * moment and saves their numbers for enable_device_irqs().
+ */
+int disable_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ unsigned long flags;
+ struct disabled_irq *resume_irq;
+ struct irqaction *action;
+ bool is_timer_irq;
+
+ resume_irq = kzalloc(sizeof(*resume_irq), GFP_NOIO);
+ if (!resume_irq) {
+ enable_device_irqs();
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ is_timer_irq = false;
+ action = desc->action;
+ while (action) {
+ if (action->flags | IRQF_TIMER) {
+ is_timer_irq = true;
+ break;
+ }
+ action = action->next;
+ }
+
+ if (!is_timer_irq && !desc->depth) {
+ desc->depth++;
+ desc->status |= IRQ_DISABLED;
+ desc->chip->disable(irq);
+ } else {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ kfree(resume_irq);
+ continue;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if (desc->action)
+ synchronize_irq(irq);
+
+ resume_irq->irq = irq;
+ list_add(&resume_irq->list, &resume_irqs_list);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
Index: linux-2.6/include/linux/interrupt.h
===================================================================
--- linux-2.6.orig/include/linux/interrupt.h
+++ linux-2.6/include/linux/interrupt.h
@@ -470,4 +470,7 @@ extern int early_irq_init(void);
extern int arch_early_irq_init(void);
extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int disable_device_irqs(void);
+extern void enable_device_irqs(void);
+
#endif
Index: linux-2.6/kernel/power/main.c
===================================================================
--- linux-2.6.orig/kernel/power/main.c
+++ linux-2.6/kernel/power/main.c
@@ -22,6 +22,7 @@
#include <linux/freezer.h>
#include <linux/vmstat.h>
#include <linux/syscalls.h>
+#include <linux/interrupt.h>
#include "power.h"
@@ -287,17 +288,25 @@ void __attribute__ ((weak)) arch_suspend
*/
static int suspend_enter(suspend_state_t state)
{
- int error = 0;
+ int error;
device_pm_lock();
- arch_suspend_disable_irqs();
- BUG_ON(!irqs_disabled());
- if ((error = device_power_down(PMSG_SUSPEND))) {
+ error = disable_device_irqs();
+ if (error) {
+ printk(KERN_ERR "PM: Failed to disable device interrupts\n");
+ goto Unlock;
+ }
+
+ error = device_power_down(PMSG_SUSPEND);
+ if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Done;
}
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
if (!suspend_test(TEST_CORE))
@@ -305,11 +314,17 @@ static int suspend_enter(suspend_state_t
sysdev_resume();
}
- device_power_up(PMSG_RESUME);
- Done:
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
+
+ device_power_up(PMSG_RESUME);
+
+ Done:
+ enable_device_irqs();
+
+ Unlock:
device_pm_unlock();
+
return error;
}
Index: linux-2.6/kernel/power/disk.c
===================================================================
--- linux-2.6.orig/kernel/power/disk.c
+++ linux-2.6/kernel/power/disk.c
@@ -22,6 +22,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/interrupt.h>
#include "power.h"
@@ -214,7 +215,13 @@ static int create_image(int platform_mod
return error;
device_pm_lock();
- local_irq_disable();
+
+ error = disable_device_irqs();
+ if (error) {
+ printk(KERN_ERR "PM: Failed to disable device interrupts\n");
+ goto Unlock;
+ }
+
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -227,6 +234,9 @@ static int create_image(int platform_mod
"aborting hibernation\n");
goto Enable_irqs;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_FREEZE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
@@ -252,11 +262,17 @@ static int create_image(int platform_mod
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
+
Power_up_devices:
+ local_irq_enable();
+
device_power_up(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+
Enable_irqs:
- local_irq_enable();
+ enable_device_irqs();
+
+ Unlock:
device_pm_unlock();
return error;
}
@@ -336,13 +352,22 @@ static int resume_target_kernel(void)
int error;
device_pm_lock();
- local_irq_disable();
+
+ error = disable_device_irqs();
+ if (error) {
+ printk(KERN_ERR "PM: Failed to disable device interrupts\n");
+ goto Unlock;
+ }
+
error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
goto Enable_irqs;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_QUIESCE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
@@ -366,11 +391,19 @@ static int resume_target_kernel(void)
swsusp_free();
restore_processor_state();
touch_softlockup_watchdog();
+
sysdev_resume();
+
+ local_irq_enable();
+
device_power_up(PMSG_RECOVER);
+
Enable_irqs:
- local_irq_enable();
+ enable_device_irqs();
+
+ Unlock:
device_pm_unlock();
+
return error;
}
@@ -447,15 +480,23 @@ int hibernation_platform_enter(void)
goto Finish;
device_pm_lock();
- local_irq_disable();
+
+ error = disable_device_irqs();
+ if (error)
+ goto Unlock;
+
error = device_power_down(PMSG_HIBERNATE);
if (!error) {
+ local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
hibernation_ops->enter();
/* We should never get here */
while (1);
}
- local_irq_enable();
+
+ enable_device_irqs();
+
+ Unlock:
device_pm_unlock();
/*
@@ -464,12 +505,15 @@ int hibernation_platform_enter(void)
*/
Finish:
hibernation_ops->finish();
+
Resume_devices:
entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
+
Close:
hibernation_ops->end();
+
return error;
}
Index: linux-2.6/arch/x86/kernel/apm_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apm_32.c
+++ linux-2.6/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/interrupt.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -1190,8 +1191,11 @@ static int suspend(int vetoable)
struct apm_user *as;
device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+
+ disable_device_irqs();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1209,9 +1213,13 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
+
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+ enable_device_irqs();
+
device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
@@ -1228,8 +1236,10 @@ static void standby(void)
{
int err;
- local_irq_disable();
+ disable_device_irqs();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1239,8 +1249,10 @@ static void standby(void)
local_irq_disable();
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+ enable_device_irqs();
}
static apm_event_t get_event(void)
Index: linux-2.6/drivers/xen/manage.c
===================================================================
--- linux-2.6.orig/drivers/xen/manage.c
+++ linux-2.6/drivers/xen/manage.c
@@ -39,12 +39,6 @@ static int xen_suspend(void *data)
BUG_ON(!irqs_disabled());
- err = device_power_down(PMSG_SUSPEND);
- if (err) {
- printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
- err);
- return err;
- }
err = sysdev_suspend(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
@@ -69,13 +63,6 @@ static int xen_suspend(void *data)
xen_mm_unpin_all();
sysdev_resume();
- device_power_up(PMSG_RESUME);
-
- if (!*cancelled) {
- xen_irq_resume();
- xen_console_resume();
- xen_timer_resume();
- }
return 0;
}
@@ -108,6 +95,18 @@ static void do_suspend(void)
/* XXX use normal device tree? */
xenbus_suspend();
+ err = disable_device_irqs();
+ if (err) {
+ printk(KERN_ERR "disable_device_irqs failed: %d\n", err);
+ goto resume_devices;
+ }
+
+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "device_power_down failed: %d\n", err);
+ goto enable_irqs;
+ }
+
err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
@@ -120,6 +119,18 @@ static void do_suspend(void)
} else
xenbus_suspend_cancel();
+ device_power_up(PMSG_RESUME);
+
+ if (!cancelled) {
+ xen_irq_resume();
+ xen_console_resume();
+ xen_timer_resume();
+ }
+
+enable_irqs:
+ enable_device_irqs();
+
+resume_devices:
device_resume(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c
+++ linux-2.6/kernel/kexec.c
@@ -1454,7 +1454,11 @@ int kernel_kexec(void)
if (error)
goto Resume_devices;
device_pm_lock();
- local_irq_disable();
+
+ error = disable_device_irqs();
+ if (error)
+ goto Unlock_pm;
+
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
* device_power_down() now. Otherwise, drivers for
@@ -1466,6 +1470,7 @@ int kernel_kexec(void)
if (error)
goto Enable_irqs;
+ local_irq_disable();
/* Suspend system devices */
error = sysdev_suspend(PMSG_FREEZE);
if (error)
@@ -1484,9 +1489,11 @@ int kernel_kexec(void)
if (kexec_image->preserve_context) {
sysdev_resume();
Power_up_devices:
+ local_irq_enable();
device_power_up(PMSG_RESTORE);
Enable_irqs:
- local_irq_enable();
+ enable_device_irqs();
+ Unlock_pm:
device_pm_unlock();
enable_nonboot_cpus();
Resume_devices:
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902221839.50357.rjw@sisk.pl>
@ 2009-02-22 18:01 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902220946070.3111@localhost.localdomain>
` (2 subsequent siblings)
3 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-22 18:01 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
>
> Use these functions to rework the handling of interrupts during
> suspend (hibernation) and resume. Namely, interrupts will only be
> disabled on the CPU right before suspending sysdevs, while device
> interrupts will be disabled (at the IO-APIC level), with the help of
> the new helper function, before calling "late" suspend callbacks
> provided by device drivers and analogously during resume.
I think this patch is actually a bit too complicated.
> +struct disabled_irq {
> + struct list_head list;
> + int irq;
> +};
> +
> +static LIST_HEAD(resume_irqs_list);
> +
> +/**
> + * enable_device_irqs - enable interrupts disabled by disable_device_irqs()
> + *
> + * Enable all interrupt lines previously disabled by disable_device_irqs()
> + * that are on resume_irqs_list.
> + */
> +void enable_device_irqs(void)
> +{
> + struct disabled_irq *resume_irq, *tmp;
> +
> + list_for_each_entry_safe(resume_irq, tmp, &resume_irqs_list, list) {
> + enable_irq(resume_irq->irq);
> + list_del(&resume_irq->list);
> + kfree(resume_irq);
> + }
> +}
Don't do this whole separate list. Instead, just add a per-irq-descriptor
flag to the desc->status field that says "suspended". IOW, just do
something like
diff --git a/include/linux/irq.h b/include/linux/irq.h
index f899b50..7bc2a31 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
+#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
#ifdef CONFIG_IRQ_PER_CPU
# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
and then just make the suspend sequence do
for_each_irq_desc(irq, desc) {
.. check desc if we should disable it ..
disable_irq(irq);
desc->status |= IRQ_SUSPENDED;
}
and the resume sequence do
for_each_irq_desc(irq, desc) {
if (!(desc->status & IRQ_SUSPENDED))
continue;
desc->status &= ~IRQ_SUSPENDED;
enabled_irq(irq);
}
And that simplifcation then gets rid of
> +/**
> + * disable_device_irqs - disable all enabled interrupt lines
> + *
> + * During system-wide suspend or hibernation device interrupts need to be
> + * disabled at the chip level and this function is provided for this
> + * purpose. It disables all interrupt lines that are enabled at the
> + * moment and saves their numbers for enable_device_irqs().
> + */
> +int disable_device_irqs(void)
> +{
> + struct irq_desc *desc;
> + int irq;
> +
> + for_each_irq_desc(irq, desc) {
> + unsigned long flags;
> + struct disabled_irq *resume_irq;
> + struct irqaction *action;
> + bool is_timer_irq;
> +
> + resume_irq = kzalloc(sizeof(*resume_irq), GFP_NOIO);
> + if (!resume_irq) {
> + enable_device_irqs();
> + return -ENOMEM;
> + }
this just goes away.
> + is_timer_irq = false;
> + action = desc->action;
> + while (action) {
> + if (action->flags | IRQF_TIMER) {
> + is_timer_irq = true;
> + break;
> + }
> + action = action->next;
> + }
This is also pointless and wrong (and buggy). You should use '&' to
test that flag, not '|', but more importantly, if you share interrupts
with a timer irq, there's nothing sane the irq layer can do ANYWAY, so
just ignore the whole problem. Just look at the first one, don't try to be
clever, because your clever code doesn't buy anything at all.
So get rid of the loop, and just do
if (desc->action && !(desc->action->flags & IRQF_TIMER)) {
desc->depth++;
desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
desc->chip->disable(irq);
}
spin_unlock_irqrestore(&desc->lock, flags);
and you're done.
Also, I'd actually suggest that the whole "synchronize_irq()" be handled
in a separate loop after the main one, so make that one just be
for_each_irq_desc(irq, desc) {
if (desc->status & IRQ_SUSPENDED)
serialize_irq(irq);
}
at the end. No need for desc->lock, since the IRQ_SUSPENDED bit is stable.
Finally:
> +extern int disable_device_irqs(void);
> +extern void enable_device_irqs(void);
I think the naming is not great. It's not about disable/enable, it's very
much about suspend/resume. In your version, it had that global
"disabled_irq" list, and in mine it has that IRQ_SUSPENDED bit - and in
both cases you can't nest things, and you can't consider them in any way
"generic" enable/disable things, they are very specialized "shut up
everything but the timer irq".
I also don't think there is any reasonable error case, so just make the
"suspend" thing return 'void', and don't complicate the caller. We don't
error out on the simple "disable_irq()" either. It's a imperative
statement, not a "please can you try to do that" thing.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902220946070.3111@localhost.localdomain>
@ 2009-02-22 22:42 ` Rafael J. Wysocki
[not found] ` <200902222342.08285.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-22 22:42 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Sunday 22 February 2009, Linus Torvalds wrote:
>
> On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
> >
> > Use these functions to rework the handling of interrupts during
> > suspend (hibernation) and resume. Namely, interrupts will only be
> > disabled on the CPU right before suspending sysdevs, while device
> > interrupts will be disabled (at the IO-APIC level), with the help of
> > the new helper function, before calling "late" suspend callbacks
> > provided by device drivers and analogously during resume.
>
> I think this patch is actually a bit too complicated.
>
> > +struct disabled_irq {
> > + struct list_head list;
> > + int irq;
> > +};
> > +
> > +static LIST_HEAD(resume_irqs_list);
> > +
> > +/**
> > + * enable_device_irqs - enable interrupts disabled by disable_device_irqs()
> > + *
> > + * Enable all interrupt lines previously disabled by disable_device_irqs()
> > + * that are on resume_irqs_list.
> > + */
> > +void enable_device_irqs(void)
> > +{
> > + struct disabled_irq *resume_irq, *tmp;
> > +
> > + list_for_each_entry_safe(resume_irq, tmp, &resume_irqs_list, list) {
> > + enable_irq(resume_irq->irq);
> > + list_del(&resume_irq->list);
> > + kfree(resume_irq);
> > + }
> > +}
>
> Don't do this whole separate list. Instead, just add a per-irq-descriptor
> flag to the desc->status field that says "suspended". IOW, just do
> something like
OK
> diff --git a/include/linux/irq.h b/include/linux/irq.h
> index f899b50..7bc2a31 100644
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
> #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
> #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
> #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
> +#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
>
> #ifdef CONFIG_IRQ_PER_CPU
> # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
>
> and then just make the suspend sequence do
>
> for_each_irq_desc(irq, desc) {
> .. check desc if we should disable it ..
> disable_irq(irq);
> desc->status |= IRQ_SUSPENDED;
> }
>
> and the resume sequence do
>
> for_each_irq_desc(irq, desc) {
> if (!(desc->status & IRQ_SUSPENDED))
> continue;
> desc->status &= ~IRQ_SUSPENDED;
> enabled_irq(irq);
> }
>
> And that simplifcation then gets rid of
>
> > +/**
> > + * disable_device_irqs - disable all enabled interrupt lines
> > + *
> > + * During system-wide suspend or hibernation device interrupts need to be
> > + * disabled at the chip level and this function is provided for this
> > + * purpose. It disables all interrupt lines that are enabled at the
> > + * moment and saves their numbers for enable_device_irqs().
> > + */
> > +int disable_device_irqs(void)
> > +{
> > + struct irq_desc *desc;
> > + int irq;
> > +
> > + for_each_irq_desc(irq, desc) {
> > + unsigned long flags;
> > + struct disabled_irq *resume_irq;
> > + struct irqaction *action;
> > + bool is_timer_irq;
> > +
> > + resume_irq = kzalloc(sizeof(*resume_irq), GFP_NOIO);
> > + if (!resume_irq) {
> > + enable_device_irqs();
> > + return -ENOMEM;
> > + }
>
> this just goes away.
>
> > + is_timer_irq = false;
> > + action = desc->action;
> > + while (action) {
> > + if (action->flags | IRQF_TIMER) {
> > + is_timer_irq = true;
> > + break;
> > + }
> > + action = action->next;
> > + }
>
> This is also pointless and wrong (and buggy). You should use '&' to
> test that flag, not '|',
Ouch, sorry.
> but more importantly, if you share interrupts with a timer irq, there's
> nothing sane the irq layer can do ANYWAY, so just ignore the whole problem.
> Just look at the first one, don't try to be clever, because your clever code
> doesn't buy anything at all.
>
> So get rid of the loop, and just do
>
> if (desc->action && !(desc->action->flags & IRQF_TIMER)) {
> desc->depth++;
> desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
> desc->chip->disable(irq);
> }
> spin_unlock_irqrestore(&desc->lock, flags);
>
> and you're done.
OK
> Also, I'd actually suggest that the whole "synchronize_irq()" be handled
> in a separate loop after the main one, so make that one just be
>
> for_each_irq_desc(irq, desc) {
> if (desc->status & IRQ_SUSPENDED)
> serialize_irq(irq);
> }
>
> at the end. No need for desc->lock, since the IRQ_SUSPENDED bit is stable.
OK
> Finally:
>
> > +extern int disable_device_irqs(void);
> > +extern void enable_device_irqs(void);
>
> I think the naming is not great. It's not about disable/enable, it's very
> much about suspend/resume. In your version, it had that global
> "disabled_irq" list, and in mine it has that IRQ_SUSPENDED bit - and in
> both cases you can't nest things, and you can't consider them in any way
> "generic" enable/disable things, they are very specialized "shut up
> everything but the timer irq".
OK, would
extern void suspend_device_irqs(void);
extern void resume_device_irqs(void);
be better?
> I also don't think there is any reasonable error case, so just make the
> "suspend" thing return 'void', and don't complicate the caller. We don't
> error out on the simple "disable_irq()" either. It's a imperative
> statement, not a "please can you try to do that" thing.
The error is there just because the memory allocation can fail. With the
IRQ_SUSPENDED flag as per your suggestion it won't be necessary any more.
Thanks a lot for your comments, I'll send an updated patch shortly.
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902222342.08285.rjw@sisk.pl>
@ 2009-02-22 23:48 ` Rafael J. Wysocki
[not found] ` <200902230048.33635.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-22 23:48 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Sunday 22 February 2009, Rafael J. Wysocki wrote:
> On Sunday 22 February 2009, Linus Torvalds wrote:
> >
> > On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
[--snip--]
>
> Thanks a lot for your comments, I'll send an updated patch shortly.
The updated patch is appended.
It has been initially tested, but requires more testing, especially with APM,
XEN, kexec jump etc.
Thanks,
Rafael
---
From: Rafael J. Wysocki <rjw@sisk.pl>
Subject: PM: Rework handling of interrupts during suspend-resume (rev. 2)
Introduce two helper functions allowing us to disable device
interrupts (at the IO-APIC level) during suspend or hibernation
and enable them during the subsequent resume, respectively, so that
the timer interrupts are enabled while "late" suspend callbacks and
"early" resume callbacks provided by device drivers are being
executed.
Use these functions to rework the handling of interrupts during
suspend (hibernation) and resume. Namely, interrupts will only be
disabled on the CPU right before suspending sysdevs, while device
interrupts will be disabled (at the IO-APIC level), with the help of
the new helper function, before calling "late" suspend callbacks
provided by device drivers and analogously during resume.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
arch/x86/kernel/apm_32.c | 20 ++++++++++++----
drivers/xen/manage.c | 32 +++++++++++++++----------
include/linux/interrupt.h | 3 ++
include/linux/irq.h | 1
kernel/irq/manage.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/kexec.c | 10 ++++----
kernel/power/disk.c | 46 +++++++++++++++++++++++++++++--------
kernel/power/main.c | 20 +++++++++++-----
8 files changed, 152 insertions(+), 37 deletions(-)
Index: linux-2.6/kernel/irq/manage.c
===================================================================
--- linux-2.6.orig/kernel/irq/manage.c
+++ linux-2.6/kernel/irq/manage.c
@@ -746,3 +746,60 @@ int request_irq(unsigned int irq, irq_ha
return retval;
}
EXPORT_SYMBOL(request_irq);
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * suspend_device_irqs - disable all currently enabled interrupt lines
+ *
+ * During system-wide suspend or hibernation device interrupts need to be
+ * disabled at the chip level and this function is provided for this
+ * purpose. It disables all interrupt lines that are enabled at the
+ * moment and sets the IRQ_SUSPENDED flag for them.
+ */
+void suspend_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if (!desc->depth && desc->action
+ && !(desc->action->flags & IRQF_TIMER)) {
+ desc->depth++;
+ desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
+ desc->chip->disable(irq);
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ for_each_irq_desc(irq, desc) {
+ if (desc->status & IRQ_SUSPENDED)
+ synchronize_irq(irq);
+ }
+}
+EXPORT_SYMBOL_GPL(suspend_device_irqs);
+
+/**
+ * resume_device_irqs - enable interrupts disabled by suspend_device_irqs()
+ *
+ * Enable all interrupt lines previously disabled by suspend_device_irqs()
+ * that have the IRQ_SUSPENDED flag set.
+ */
+void resume_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ if (!(desc->status & IRQ_SUSPENDED))
+ continue;
+ desc->status &= ~IRQ_SUSPENDED;
+ enable_irq(irq);
+ }
+}
+EXPORT_SYMBOL_GPL(resume_device_irqs);
+#endif /* CONFIG_PM_SLEEP */
Index: linux-2.6/include/linux/interrupt.h
===================================================================
--- linux-2.6.orig/include/linux/interrupt.h
+++ linux-2.6/include/linux/interrupt.h
@@ -470,4 +470,7 @@ extern int early_irq_init(void);
extern int arch_early_irq_init(void);
extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern void suspend_device_irqs(void);
+extern void resume_device_irqs(void);
+
#endif
Index: linux-2.6/kernel/power/main.c
===================================================================
--- linux-2.6.orig/kernel/power/main.c
+++ linux-2.6/kernel/power/main.c
@@ -22,6 +22,7 @@
#include <linux/freezer.h>
#include <linux/vmstat.h>
#include <linux/syscalls.h>
+#include <linux/interrupt.h>
#include "power.h"
@@ -287,17 +288,20 @@ void __attribute__ ((weak)) arch_suspend
*/
static int suspend_enter(suspend_state_t state)
{
- int error = 0;
+ int error;
device_pm_lock();
- arch_suspend_disable_irqs();
- BUG_ON(!irqs_disabled());
+ suspend_device_irqs();
- if ((error = device_power_down(PMSG_SUSPEND))) {
+ error = device_power_down(PMSG_SUSPEND);
+ if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Done;
}
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
if (!suspend_test(TEST_CORE))
@@ -305,11 +309,15 @@ static int suspend_enter(suspend_state_t
sysdev_resume();
}
- device_power_up(PMSG_RESUME);
- Done:
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
+
+ device_power_up(PMSG_RESUME);
+
+ Done:
+ resume_device_irqs();
device_pm_unlock();
+
return error;
}
Index: linux-2.6/kernel/power/disk.c
===================================================================
--- linux-2.6.orig/kernel/power/disk.c
+++ linux-2.6/kernel/power/disk.c
@@ -22,6 +22,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/interrupt.h>
#include "power.h"
@@ -214,7 +215,8 @@ static int create_image(int platform_mod
return error;
device_pm_lock();
- local_irq_disable();
+ suspend_device_irqs();
+
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -225,8 +227,11 @@ static int create_image(int platform_mod
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting hibernation\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_FREEZE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
@@ -252,12 +257,17 @@ static int create_image(int platform_mod
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
+
Power_up_devices:
+ local_irq_enable();
+
device_power_up(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
- Enable_irqs:
- local_irq_enable();
+
+ Unlock:
+ resume_device_irqs();
device_pm_unlock();
+
return error;
}
@@ -336,13 +346,17 @@ static int resume_target_kernel(void)
int error;
device_pm_lock();
- local_irq_disable();
+ suspend_device_irqs();
+
error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_QUIESCE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
@@ -366,11 +380,17 @@ static int resume_target_kernel(void)
swsusp_free();
restore_processor_state();
touch_softlockup_watchdog();
+
sysdev_resume();
- device_power_up(PMSG_RECOVER);
- Enable_irqs:
+
local_irq_enable();
+
+ device_power_up(PMSG_RECOVER);
+
+ Unlock:
+ resume_device_irqs();
device_pm_unlock();
+
return error;
}
@@ -447,15 +467,18 @@ int hibernation_platform_enter(void)
goto Finish;
device_pm_lock();
- local_irq_disable();
+ suspend_device_irqs();
+
error = device_power_down(PMSG_HIBERNATE);
if (!error) {
+ local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
hibernation_ops->enter();
/* We should never get here */
while (1);
}
- local_irq_enable();
+
+ resume_device_irqs();
device_pm_unlock();
/*
@@ -464,12 +487,15 @@ int hibernation_platform_enter(void)
*/
Finish:
hibernation_ops->finish();
+
Resume_devices:
entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
+
Close:
hibernation_ops->end();
+
return error;
}
Index: linux-2.6/arch/x86/kernel/apm_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apm_32.c
+++ linux-2.6/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/interrupt.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -1190,8 +1191,11 @@ static int suspend(int vetoable)
struct apm_user *as;
device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+
+ suspend_device_irqs();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1209,9 +1213,13 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
+
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+ resume_device_irqs();
+
device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
@@ -1228,8 +1236,10 @@ static void standby(void)
{
int err;
- local_irq_disable();
+ suspend_device_irqs();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1239,8 +1249,10 @@ static void standby(void)
local_irq_disable();
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+ resume_device_irqs();
}
static apm_event_t get_event(void)
Index: linux-2.6/drivers/xen/manage.c
===================================================================
--- linux-2.6.orig/drivers/xen/manage.c
+++ linux-2.6/drivers/xen/manage.c
@@ -39,12 +39,6 @@ static int xen_suspend(void *data)
BUG_ON(!irqs_disabled());
- err = device_power_down(PMSG_SUSPEND);
- if (err) {
- printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
- err);
- return err;
- }
err = sysdev_suspend(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
@@ -69,13 +63,6 @@ static int xen_suspend(void *data)
xen_mm_unpin_all();
sysdev_resume();
- device_power_up(PMSG_RESUME);
-
- if (!*cancelled) {
- xen_irq_resume();
- xen_console_resume();
- xen_timer_resume();
- }
return 0;
}
@@ -108,6 +95,14 @@ static void do_suspend(void)
/* XXX use normal device tree? */
xenbus_suspend();
+ suspend_device_irqs();
+
+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "device_power_down failed: %d\n", err);
+ goto resume_devices;
+ }
+
err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
@@ -120,6 +115,17 @@ static void do_suspend(void)
} else
xenbus_suspend_cancel();
+ device_power_up(PMSG_RESUME);
+
+ if (!cancelled) {
+ xen_irq_resume();
+ xen_console_resume();
+ xen_timer_resume();
+ }
+
+resume_devices:
+ resume_device_irqs();
+
device_resume(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c
+++ linux-2.6/kernel/kexec.c
@@ -1454,7 +1454,7 @@ int kernel_kexec(void)
if (error)
goto Resume_devices;
device_pm_lock();
- local_irq_disable();
+ suspend_device_irqs();
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
* device_power_down() now. Otherwise, drivers for
@@ -1464,8 +1464,9 @@ int kernel_kexec(void)
*/
error = device_power_down(PMSG_FREEZE);
if (error)
- goto Enable_irqs;
+ goto Resume_irqs;
+ local_irq_disable();
/* Suspend system devices */
error = sysdev_suspend(PMSG_FREEZE);
if (error)
@@ -1484,9 +1485,10 @@ int kernel_kexec(void)
if (kexec_image->preserve_context) {
sysdev_resume();
Power_up_devices:
- device_power_up(PMSG_RESTORE);
- Enable_irqs:
local_irq_enable();
+ device_power_up(PMSG_RESTORE);
+ Resume_irqs:
+ resume_device_irqs();
device_pm_unlock();
enable_nonboot_cpus();
Resume_devices:
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsig
#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
+#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
#ifdef CONFIG_IRQ_PER_CPU
# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902230048.33635.rjw@sisk.pl>
@ 2009-02-23 0:05 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902221603440.3111@localhost.localdomain>
` (4 subsequent siblings)
5 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-23 0:05 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Mon, 23 Feb 2009, Rafael J. Wysocki wrote:
>
> The updated patch is appended.
Ok, looks sane to me. I'll try it on my poor eeepc, although right now
Fedora-11 rawhide (that poor laptop gets _all_ the crazy stuff thrown at
it, and runs btrfs to boot) has broken something in X by enabling DRI2, so
I may not get around to it today.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902221603440.3111@localhost.localdomain>
@ 2009-02-23 1:23 ` Linus Torvalds
0 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-23 1:23 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Sun, 22 Feb 2009, Linus Torvalds wrote:
>
> Ok, looks sane to me. I'll try it on my poor eeepc, although right now
> Fedora-11 rawhide (that poor laptop gets _all_ the crazy stuff thrown at
> it, and runs btrfs to boot) has broken something in X by enabling DRI2, so
> I may not get around to it today.
Well, the suspend/resume part seems to work for me. My X issues keep my
from testing it with compiz, but here's an ack for v2 of the 2/2 patch at
least on my EeePC.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902230048.33635.rjw@sisk.pl>
2009-02-23 0:05 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902221603440.3111@localhost.localdomain>
@ 2009-02-23 3:04 ` Eric W. Biederman
2009-02-23 8:36 ` Ingo Molnar
` (2 subsequent siblings)
5 siblings, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-23 3:04 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
"Rafael J. Wysocki" <rjw@sisk.pl> writes:
> On Sunday 22 February 2009, Rafael J. Wysocki wrote:
>> On Sunday 22 February 2009, Linus Torvalds wrote:
>> >
>> > On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
> [--snip--]
>>
>> Thanks a lot for your comments, I'll send an updated patch shortly.
>
> The updated patch is appended.
>
> It has been initially tested, but requires more testing, especially with APM,
> XEN, kexec jump etc.
>
> Thanks,
> Rafael
>
> ---
> From: Rafael J. Wysocki <rjw@sisk.pl>
> Subject: PM: Rework handling of interrupts during suspend-resume (rev. 2)
>
> Introduce two helper functions allowing us to disable device
> interrupts (at the IO-APIC level) during suspend or hibernation
> and enable them during the subsequent resume, respectively, so that
> the timer interrupts are enabled while "late" suspend callbacks and
> "early" resume callbacks provided by device drivers are being
> executed.
>
> Use these functions to rework the handling of interrupts during
> suspend (hibernation) and resume. Namely, interrupts will only be
> disabled on the CPU right before suspending sysdevs, while device
> interrupts will be disabled (at the IO-APIC level), with the help of
> the new helper function, before calling "late" suspend callbacks
> provided by device drivers and analogously during resume.
I don't have an issue with the code, but I do have an issue with
this description of it.
Calling disable especially for ioapics does nothing directly.
It simply arranges for the irq to be marked pending and for the
irq to be masked if the irq happens.
So what you are doing is arranging so that no interrupts will be
delivered to drivers. Not really disabling interrupts at the IO-APIC
level.
In addition not all interrupts (even on x86) go through an IO-APIC anymore
so describing the patch in terms of an IO-APIC makes it a bit hard to
understand what your intent actually is.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902230048.33635.rjw@sisk.pl>
` (2 preceding siblings ...)
2009-02-23 3:04 ` Eric W. Biederman
@ 2009-02-23 8:36 ` Ingo Molnar
[not found] ` <m1ab8ddfbr.fsf@fess.ebiederm.org>
[not found] ` <20090223083645.GA9582@elte.hu>
5 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 8:36 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Sunday 22 February 2009, Rafael J. Wysocki wrote:
> > On Sunday 22 February 2009, Linus Torvalds wrote:
> > >
> > > On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
> [--snip--]
> >
> > Thanks a lot for your comments, I'll send an updated patch shortly.
>
> The updated patch is appended.
>
> It has been initially tested, but requires more testing,
> especially with APM, XEN, kexec jump etc.
> arch/x86/kernel/apm_32.c | 20 ++++++++++++----
> drivers/xen/manage.c | 32 +++++++++++++++----------
> include/linux/interrupt.h | 3 ++
> include/linux/irq.h | 1
> kernel/irq/manage.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++
> kernel/kexec.c | 10 ++++----
> kernel/power/disk.c | 46 +++++++++++++++++++++++++++++--------
> kernel/power/main.c | 20 +++++++++++-----
> 8 files changed, 152 insertions(+), 37 deletions(-)
>
> Index: linux-2.6/kernel/irq/manage.c
> ===================================================================
> --- linux-2.6.orig/kernel/irq/manage.c
> +++ linux-2.6/kernel/irq/manage.c
> @@ -746,3 +746,60 @@ int request_irq(unsigned int irq, irq_ha
> return retval;
> }
> EXPORT_SYMBOL(request_irq);
> +
> +#ifdef CONFIG_PM_SLEEP
> +/**
> + * suspend_device_irqs - disable all currently enabled interrupt lines
Code placement nit: please dont put new #ifdef blocks into the
core IRQ code, add a kernel/irq/power.c file instead and make
the kbuild rule depend on PM_SLEEP.
The new suspend_device_irqs() and resume_device_irqs() doesnt
use any manage.c internals so this should work straight away.
> + *
> + * During system-wide suspend or hibernation device interrupts need to be
> + * disabled at the chip level and this function is provided for this
> + * purpose. It disables all interrupt lines that are enabled at the
> + * moment and sets the IRQ_SUSPENDED flag for them.
> + */
> +void suspend_device_irqs(void)
> +{
> + struct irq_desc *desc;
> + int irq;
> +
> + for_each_irq_desc(irq, desc) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&desc->lock, flags);
> +
> + if (!desc->depth && desc->action
> + && !(desc->action->flags & IRQF_TIMER)) {
> + desc->depth++;
> + desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
> + desc->chip->disable(irq);
> + }
> +
> + spin_unlock_irqrestore(&desc->lock, flags);
> + }
> +
> + for_each_irq_desc(irq, desc) {
> + if (desc->status & IRQ_SUSPENDED)
> + synchronize_irq(irq);
> + }
Optimization/code-flow nit: a possibility might be to do a
single loop, i.e. i think it's safe to couple the disable+sync
bits [as in 99.99% of the cases there will be no in-execution
irq handlers when we execute this.]
Something like:
int do_sync = 0;
spin_lock_irqsave(&desc->lock, flags);
if (!desc->depth && desc->action
&& !(desc->action->flags & IRQF_TIMER)) {
desc->depth++;
desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
desc->chip->disable(irq);
do_sync = 1;
}
spin_unlock_irqrestore(&desc->lock, flags);
if (do_sync)
synchronize_irq(irq);
In fact i'd suggest to factor out this logic into a separate
__suspend_irq(irq) / __resume_irq(irq) inline helper functions.
(They should be inline for the time being as they are not
shared-irq-safe so they shouldnt really be exposed to drivers in
such a singular capacity.)
Doing so will also fix the line-break ugliness of the first
branch - as in a standalone function the condition fits into a
single line.
There's a performance reason as well: especially when we have a
lot of IRQ descriptors that will be about twice as fast. (with a
large iteration scope this function is cachemiss-limited and
doing this passes doubles the cachemiss rate.)
> +}
> +EXPORT_SYMBOL_GPL(suspend_device_irqs);
> +
> +/**
> + * resume_device_irqs - enable interrupts disabled by suspend_device_irqs()
> + *
> + * Enable all interrupt lines previously disabled by suspend_device_irqs()
> + * that have the IRQ_SUSPENDED flag set.
> + */
> +void resume_device_irqs(void)
> +{
> + struct irq_desc *desc;
> + int irq;
> +
> + for_each_irq_desc(irq, desc) {
> + if (!(desc->status & IRQ_SUSPENDED))
> + continue;
> + desc->status &= ~IRQ_SUSPENDED;
> + enable_irq(irq);
> + }
Robustness+optimization nit: this will work but could be done in
a nicer way: enable_irq() should auto-clear IRQ_SUSPENDED. (We
already clear flags there so it's even a tiny bit faster this
way.)
We definitely dont want IRQ_SUSPENDED to 'leak' out into an
enabled line, should something call enable_irq() on a suspended
line. So either make it auto-unsuspend in enable_irq(), or add
an extra WARN_ON() to enable_irq(), to make sure IRQ_SUSPENDED
is always off by that time.
> + arch_suspend_disable_irqs();
> + BUG_ON(!irqs_disabled());
Please. We just disabled all devices - a BUG_ON() is a very
counter-productive thing to do here - chances are the user will
never see anything but a hang. So please turn this into a nice
WARN_ONCE().
> --- linux-2.6.orig/include/linux/interrupt.h
> +++ linux-2.6/include/linux/interrupt.h
> @@ -470,4 +470,7 @@ extern int early_irq_init(void);
> extern int arch_early_irq_init(void);
> extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
>
> +extern void suspend_device_irqs(void);
> +extern void resume_device_irqs(void);
Header cleanliness nit: please dont just throw new prototypes to
the tail of headers, but think about where they fit in best,
logically.
These two new prototypes should go straight after the normal irq
line state management functions:
extern void disable_irq_nosync(unsigned int irq);
extern void disable_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
Perhaps also with a comment like this:
/*
* Note: dont use these functions in driver code - they are for
* core kernel use only.
*/
> +++ linux-2.6/kernel/power/main.c
[...]
> +
> + Unlock:
> + resume_device_irqs();
Small drive-by style nit: while at it could you please fix the
capitalization and the naming of the label (and all labels in
this file)? The standard label is "out_unlock". [and
"err_unlock" for failure cases - but this isnt a failure case.]
There's 43 such bad label names in kernel/power/*.c, see the
output of:
git grep '^ [A-Z][a-z].*:$' kernel/power/
> Index: linux-2.6/arch/x86/kernel/apm_32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/apm_32.c
> +++ linux-2.6/arch/x86/kernel/apm_32.c
> +
> + suspend_device_irqs();
> device_power_down(PMSG_SUSPEND);
> +
> + local_irq_disable();
hm, this is a very repetitive pattern, all around the various
suspend/resume variants. Might make sense to make:
device_power_down(PMSG_SUSPEND);
do the irq line disabling plus the local irq disabling
automatically. That also means it cannot be forgotten. The
symmetric action should happen for PMSG_RESUME.
Is there ever a case where we want a different pattern?
> Index: linux-2.6/drivers/xen/manage.c
> ===================================================================
> --- linux-2.6.orig/drivers/xen/manage.c
> +++ linux-2.6/drivers/xen/manage.c
> @@ -39,12 +39,6 @@ static int xen_suspend(void *data)
> - if (!*cancelled) {
> - xen_irq_resume();
> - xen_console_resume();
> - xen_timer_resume();
This change needs a second look. xen_suspend() is a
stop_machine() handler and as such executes on specific CPUs,
and your change modifies this. OTOH, i had a look at these
handlers and it all looks safe. Jeremy?
> +resume_devices:
> + resume_device_irqs();
Small style nit: labels should start with a space character.
I.e. it should be:
> + resume_devices:
> + resume_device_irqs();
> +++ linux-2.6/kernel/kexec.c
> @@ -1454,7 +1454,7 @@ int kernel_kexec(void)
> if (error)
> goto Resume_devices;
> device_pm_lock();
> - local_irq_disable();
> + suspend_device_irqs();
> /* At this point, device_suspend() has been called,
> * but *not* device_power_down(). We *must*
> * device_power_down() now. Otherwise, drivers for
> @@ -1464,8 +1464,9 @@ int kernel_kexec(void)
> */
> error = device_power_down(PMSG_FREEZE);
> if (error)
> - goto Enable_irqs;
> + goto Resume_irqs;
>
> + local_irq_disable();
> /* Suspend system devices */
> error = sysdev_suspend(PMSG_FREEZE);
> if (error)
> @@ -1484,9 +1485,10 @@ int kernel_kexec(void)
> if (kexec_image->preserve_context) {
> sysdev_resume();
> Power_up_devices:
> - device_power_up(PMSG_RESTORE);
> - Enable_irqs:
> local_irq_enable();
> + device_power_up(PMSG_RESTORE);
> + Resume_irqs:
> + resume_device_irqs();
> device_pm_unlock();
> enable_nonboot_cpus();
> Resume_devices:
(same comment about label style applies here too.)
> Index: linux-2.6/include/linux/irq.h
> ===================================================================
> --- linux-2.6.orig/include/linux/irq.h
> +++ linux-2.6/include/linux/irq.h
> @@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsig
> #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
> #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
> #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
> +#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
>
> #ifdef CONFIG_IRQ_PER_CPU
> # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
Note, you should probably make PM_SLEEP depend on
GENERIC_HARDIRQS - as this change will break the build on all
non-genirq architectures. (sparc, alpha, etc.)
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m1ab8ddfbr.fsf@fess.ebiederm.org>
@ 2009-02-23 8:44 ` Ingo Molnar
[not found] ` <20090223084400.GB9582@elte.hu>
1 sibling, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 8:44 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, pm list, Linus Torvalds,
Thomas Gleixner
* Eric W. Biederman <ebiederm@xmission.com> wrote:
> "Rafael J. Wysocki" <rjw@sisk.pl> writes:
>
> > On Sunday 22 February 2009, Rafael J. Wysocki wrote:
> >> On Sunday 22 February 2009, Linus Torvalds wrote:
> >> >
> >> > On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
> > [--snip--]
> >>
> >> Thanks a lot for your comments, I'll send an updated patch shortly.
> >
> > The updated patch is appended.
> >
> > It has been initially tested, but requires more testing, especially with APM,
> > XEN, kexec jump etc.
> >
> > Thanks,
> > Rafael
> >
> > ---
> > From: Rafael J. Wysocki <rjw@sisk.pl>
> > Subject: PM: Rework handling of interrupts during suspend-resume (rev. 2)
> >
> > Introduce two helper functions allowing us to disable device
> > interrupts (at the IO-APIC level) during suspend or
> > hibernation and enable them during the subsequent resume,
> > respectively, so that the timer interrupts are enabled while
> > "late" suspend callbacks and "early" resume callbacks
> > provided by device drivers are being executed.
> >
> > Use these functions to rework the handling of interrupts
> > during suspend (hibernation) and resume. Namely, interrupts
> > will only be disabled on the CPU right before suspending
> > sysdevs, while device interrupts will be disabled (at the
> > IO-APIC level), with the help of the new helper function,
> > before calling "late" suspend callbacks provided by device
> > drivers and analogously during resume.
>
> I don't have an issue with the code, but I do have an issue
> with this description of it.
>
> Calling disable especially for ioapics does nothing directly.
> It simply arranges for the irq to be marked pending and for
> the irq to be masked if the irq happens.
>
> So what you are doing is arranging so that no interrupts will
> be delivered to drivers. Not really disabling interrupts at
> the IO-APIC level.
>
> In addition not all interrupts (even on x86) go through an
> IO-APIC anymore so describing the patch in terms of an IO-APIC
> makes it a bit hard to understand what your intent actually
> is.
I think this aspect has been well-understood during the
discussion of this topic and it's just a slightly misleading
changelog.
The new suspend code does not rely on truly disabling IRQs on
the low level. The purpose is to not get IRQs to drivers - which
might crash/hang/race/misbehave.
Still, it might make sense to not just use the ->disable
sequence but primarily the ->shutdown irqchip method (when it's
available in the irqchip).
While we obviously cannot turn off the PIC that delivers timer
IRQs at this stage - there's no theoretical reason why the
suspend sequence couldnt power down some secondary PICs as well
- in some arch code, or maybe even in the generic driver suspend
sequence if the device tree is structured carefully enough so
that the PIC gets turned off last.
So turning off all device IRQs in the most lowlevel way possible
would be prudent. I.e. the suspend stage should do:
if (desc->chip->shutdown)
desc->chip->shutdown(irq);
else
desc->chip->disable(irq);
(there's no change needed for the resume stage)
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223084400.GB9582@elte.hu>
@ 2009-02-23 9:22 ` Eric W. Biederman
[not found] ` <m17i3ha4nk.fsf@fess.ebiederm.org>
1 sibling, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-23 9:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, pm list, Linus Torvalds,
Thomas Gleixner
Ingo Molnar <mingo@elte.hu> writes:
> I think this aspect has been well-understood during the
> discussion of this topic and it's just a slightly misleading
> changelog.
As I was a member of that discussion I did not see that.
It took me several passes through the patches to realize
the goal is to allow drivers to be able to sleep while they
are in their late pm shutdown routines.
Why we want this I don't know. But it seems simple enough
to implement, and it makes it harder to get the late pm
suspend routines wrong, which is always good.
> The new suspend code does not rely on truly disabling IRQs on
> the low level. The purpose is to not get IRQs to drivers - which
> might crash/hang/race/misbehave.
Reasonable. I expect one of the problems with drivers getting it
wrong is that the interface is too complex for mortal humans to
understand.
> Still, it might make sense to not just use the ->disable
> sequence but primarily the ->shutdown irqchip method (when it's
> available in the irqchip).
Disable seems fine to me. This is interesting in the context
of all of the irqs that will when masked show up somewhere
else (think boot interrupts).
> While we obviously cannot turn off the PIC that delivers timer
> IRQs at this stage - there's no theoretical reason why the
> suspend sequence couldnt power down some secondary PICs as well
> - in some arch code, or maybe even in the generic driver suspend
> sequence if the device tree is structured carefully enough so
> that the PIC gets turned off last.
If the point is simply to prevent deliver of irqs to the drivers
I don't see the point of anything more than what the patch does
now.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m17i3ha4nk.fsf@fess.ebiederm.org>
@ 2009-02-23 9:44 ` Ingo Molnar
2009-02-23 10:13 ` Benjamin Herrenschmidt
[not found] ` <20090223094441.GM9582@elte.hu>
2 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 9:44 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, pm list, Linus Torvalds,
Thomas Gleixner
* Eric W. Biederman <ebiederm@xmission.com> wrote:
> Ingo Molnar <mingo@elte.hu> writes:
>
> > I think this aspect has been well-understood during the
> > discussion of this topic and it's just a slightly misleading
> > changelog.
>
> As I was a member of that discussion I did not see that.
>
> It took me several passes through the patches to realize the
> goal is to allow drivers to be able to sleep while they are in
> their late pm shutdown routines.
>
> Why we want this I don't know. But it seems simple enough to
> implement, and it makes it harder to get the late pm suspend
> routines wrong, which is always good.
That's not the only goal. The other goal is to further shrink a
particular window of suspend fragility: the irqs-disabled stage
of the suspend/resume sequence.
Since suspend/resume is a mini-reboot sequence, there's a large
amount of code executed - and the variety of code is large as
well. We had repeat cases of random drivers re-enabling
interrupts and thus breaking other drivers - and these are nasty
to debug.
So this patchset disables device IRQs centrally and serializes
with pending work - so there's no races with pending IRQs
anymore.
The fact that we keep the timer irq running is two-fold: firstly
the timer code is special and not really part of the regular
suspend/resume sequence.
Drivers want to take timestamps, sometimes they even want to do
a small usleep(), etc. Ideally the suspend/resume code is pretty
much _the same_ as a regular bootup (and shutdown) code - so we
want to provide a similar environment to how drivers initialize
and deinitialize, and we want to enable them to share code
between bootup/shutdown and suspend/resume agressively.
So the more generic kernel environment we give these fragile
handlers, the better we are off in the end. Since we already had
IRQS_TIMER, that was just the natural thing to do.
> > The new suspend code does not rely on truly disabling IRQs
> > on the low level. The purpose is to not get IRQs to drivers
> > - which might crash/hang/race/misbehave.
>
> Reasonable. I expect one of the problems with drivers getting
> it wrong is that the interface is too complex for mortal
> humans to understand.
The suspend/resume state machine certainly used to be a piece of
code that makes a seasoned kernel developer weep in fear.
That has changed drastically in the past few months. The
suspend+hibernation logic got unified (at least as far as driver
methods go), and all the flow and ordering has been cleaned up
and has been made more robust.
What makes s2ram fragile is not human failure but the
combination of a handful of physical property:
1) Psychology: shutting the lid or pushing the suspend button is
a deceivingly 'simple' action to the user. But under the
hood, a ton of stuff happens: we deinitialize a lot of
things, we go through _all hardware state_, and we do so in a
serial fashion. If just one piece fails to do the right
thing, the box might not resume. Still, the user expects this
'simple' thing to just work, all the time. No excuses
accepted.
2) Length of code: To get a successful s2ram sequence the kernel
runs through tens of thousands of lines of code. Code which
never gets executed on a normal box - only if we s2ram. If
just one step fails, we get a hung box.
3) Debuggability: a lot of s2ram code runs with the console off,
making any bugs hard to debug. Furthermore we have no
meaningful persistent storage either for kernel bug messages.
The RTC trick of PM_DEBUG works but is a very narrow channel
of information and it takes a lot of time to debug a bug via
that method.
The combination of these factors really makes up for a perfect
storm in terms of kernel technology: we have this
very-deceivingly-simple-looking but complex-and-rarely-executed
piece of code, which is very hard to debug.
Even just one of these factors would be enough to make an
otherwise healthy subsystem fragile - no wonder s2ram has been a
problem ever since it existed in the upstream kernel.
So now we need just one thing: patience and more of the same
good stuff that happened lately.
> > Still, it might make sense to not just use the ->disable
> > sequence but primarily the ->shutdown irqchip method (when
> > it's available in the irqchip).
>
> Disable seems fine to me. This is interesting in the context
> of all of the irqs that will when masked show up somewhere
> else (think boot interrupts).
>
> > While we obviously cannot turn off the PIC that delivers
> > timer IRQs at this stage - there's no theoretical reason why
> > the suspend sequence couldnt power down some secondary PICs
> > as well - in some arch code, or maybe even in the generic
> > driver suspend sequence if the device tree is structured
> > carefully enough so that the PIC gets turned off last.
>
> If the point is simply to prevent deliver of irqs to the
> drivers I don't see the point of anything more than what the
> patch does now.
... except for the usecase i described above. Say some PIC sits
on a piece of silicon which gets turned off. I'm not talking
about x86 but some custom device. We really dont want that IRQ
line to send half of an IRQ message (un-ACK-ed) when it gets
turned off. So physically 'suspending' all IRQ lines does make a
certain level of long-term sense.
Especially if it's just 3 extra lines of code to the existing
patch.
There _might_ be one downside: overhead of ->shutdown() methods.
With a typical IRQ count on the typical netbook i doubt it's
more than ~50 usecs combined.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m17i3ha4nk.fsf@fess.ebiederm.org>
2009-02-23 9:44 ` Ingo Molnar
@ 2009-02-23 10:13 ` Benjamin Herrenschmidt
[not found] ` <20090223094441.GM9582@elte.hu>
2 siblings, 0 replies; 77+ messages in thread
From: Benjamin Herrenschmidt @ 2009-02-23 10:13 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
On Mon, 2009-02-23 at 01:22 -0800, Eric W. Biederman wrote:
> Ingo Molnar <mingo@elte.hu> writes:
>
>
> > I think this aspect has been well-understood during the
> > discussion of this topic and it's just a slightly misleading
> > changelog.
>
> As I was a member of that discussion I did not see that.
>
> It took me several passes through the patches to realize
> the goal is to allow drivers to be able to sleep while they
> are in their late pm shutdown routines.
>
> Why we want this I don't know. But it seems simple enough
> to implement, and it makes it harder to get the late pm
> suspend routines wrong, which is always good.
To simplify (it's really all in the discussion we had the last few
weeks) It boils down to being able to do the proper ACPI calls (which
require core interrupts to be on, ie, ACPI uses mutexes, sleeps, etc...)
after we have saved and before we restore the PCI config space, in the
late suspend or early resume stages of devices.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223094441.GM9582@elte.hu>
@ 2009-02-23 10:42 ` Eric W. Biederman
[not found] ` <m1bpst77te.fsf@fess.ebiederm.org>
1 sibling, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-23 10:42 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, pm list, Linus Torvalds,
Thomas Gleixner
Ingo Molnar <mingo@elte.hu> writes:
> * Eric W. Biederman <ebiederm@xmission.com> wrote:
>
>> Ingo Molnar <mingo@elte.hu> writes:
>>
>> > I think this aspect has been well-understood during the
>> > discussion of this topic and it's just a slightly misleading
>> > changelog.
>>
>> As I was a member of that discussion I did not see that.
>>
>> It took me several passes through the patches to realize the
>> goal is to allow drivers to be able to sleep while they are in
>> their late pm shutdown routines.
>>
>> Why we want this I don't know. But it seems simple enough to
>> implement, and it makes it harder to get the late pm suspend
>> routines wrong, which is always good.
>
> That's not the only goal. The other goal is to further shrink a
> particular window of suspend fragility: the irqs-disabled stage
> of the suspend/resume sequence.
>
> Since suspend/resume is a mini-reboot sequence, there's a large
> amount of code executed - and the variety of code is large as
> well. We had repeat cases of random drivers re-enabling
> interrupts and thus breaking other drivers - and these are nasty
> to debug.
>
> So this patchset disables device IRQs centrally and serializes
> with pending work - so there's no races with pending IRQs
> anymore.
>
> The fact that we keep the timer irq running is two-fold: firstly
> the timer code is special and not really part of the regular
> suspend/resume sequence.
>
> Drivers want to take timestamps, sometimes they even want to do
> a small usleep(), etc. Ideally the suspend/resume code is pretty
> much _the same_ as a regular bootup (and shutdown) code - so we
> want to provide a similar environment to how drivers initialize
> and deinitialize, and we want to enable them to share code
> between bootup/shutdown and suspend/resume agressively.
>
> So the more generic kernel environment we give these fragile
> handlers, the better we are off in the end. Since we already had
> IRQS_TIMER, that was just the natural thing to do.
I am all for sharing code, especially if we can factor if
we can find common factors that do the same thing.
I don't know how many times I have found drivers doing something
weird in their shutdown routines that they don't know how
to get the device out of. The e1000 driver has shown up several
times because it likes to suspend the device on shutdown.
The fact that the methods exposed to drivers were only defined
to be usable on the s2ram/hibernate path is something I have
brought up on more than one occasion as a bad choice.
I'm really not convinced that the rational for separating
out the shutdown methods from the remove methods has
been very good. That of we don't need to clean up the in-kernel
data structures on reboot so why do something extra that can
introduce instability.
So having been watching a smaller form of this drama on the
reboot path for several years. Having had a device method
with fixed semantics, and not the dwm sematics of the historical
suspend routing. I expect there is still a ways to go before
it is simple and easy for drivers to figure out what they need
to implement out of the confusing variety of possible device
methods.
>> > The new suspend code does not rely on truly disabling IRQs
>> > on the low level. The purpose is to not get IRQs to drivers
>> > - which might crash/hang/race/misbehave.
>>
>> Reasonable. I expect one of the problems with drivers getting
>> it wrong is that the interface is too complex for mortal
>> humans to understand.
>
> The suspend/resume state machine certainly used to be a piece of
> code that makes a seasoned kernel developer weep in fear.
>
> That has changed drastically in the past few months. The
> suspend+hibernation logic got unified (at least as far as driver
> methods go), and all the flow and ordering has been cleaned up
> and has been made more robust.
I will have to look again. My impression is that overloading
a single method is part of what got us into this mess in the
first place.
No that I don't see things getting better.
> What makes s2ram fragile is not human failure but the
> combination of a handful of physical property:
>
> 1) Psychology: shutting the lid or pushing the suspend button is
> a deceivingly 'simple' action to the user. But under the
> hood, a ton of stuff happens: we deinitialize a lot of
> things, we go through _all hardware state_, and we do so in a
> serial fashion. If just one piece fails to do the right
> thing, the box might not resume. Still, the user expects this
> 'simple' thing to just work, all the time. No excuses
> accepted.
>
> 2) Length of code: To get a successful s2ram sequence the kernel
> runs through tens of thousands of lines of code. Code which
> never gets executed on a normal box - only if we s2ram. If
> just one step fails, we get a hung box.
>
> 3) Debuggability: a lot of s2ram code runs with the console off,
> making any bugs hard to debug. Furthermore we have no
> meaningful persistent storage either for kernel bug messages.
> The RTC trick of PM_DEBUG works but is a very narrow channel
> of information and it takes a lot of time to debug a bug via
> that method.
Yep that is an issue.
> The combination of these factors really makes up for a perfect
> storm in terms of kernel technology: we have this
> very-deceivingly-simple-looking but complex-and-rarely-executed
> piece of code, which is very hard to debug.
And much of this as you are finding with this piece of code
is how the software was designed rather then how the software
needed to be.
> Even just one of these factors would be enough to make an
> otherwise healthy subsystem fragile - no wonder s2ram has been a
> problem ever since it existed in the upstream kernel.
>
> So now we need just one thing: patience and more of the same
> good stuff that happened lately.
I think there has been some good progress, and so I am happy
to be patient. I will still mention on occasion what it
seems we are doing wrong. Unfortunately I don't have time
to do a lot more than that.
>> > Still, it might make sense to not just use the ->disable
>> > sequence but primarily the ->shutdown irqchip method (when
>> > it's available in the irqchip).
>>
>> Disable seems fine to me. This is interesting in the context
>> of all of the irqs that will when masked show up somewhere
>> else (think boot interrupts).
>>
>> > While we obviously cannot turn off the PIC that delivers
>> > timer IRQs at this stage - there's no theoretical reason why
>> > the suspend sequence couldnt power down some secondary PICs
>> > as well - in some arch code, or maybe even in the generic
>> > driver suspend sequence if the device tree is structured
>> > carefully enough so that the PIC gets turned off last.
>>
>> If the point is simply to prevent deliver of irqs to the
>> drivers I don't see the point of anything more than what the
>> patch does now.
>
> ... except for the usecase i described above. Say some PIC sits
> on a piece of silicon which gets turned off. I'm not talking
> about x86 but some custom device. We really dont want that IRQ
> line to send half of an IRQ message (un-ACK-ed) when it gets
> turned off. So physically 'suspending' all IRQ lines does make a
> certain level of long-term sense.
Good point. We will loose both level and edge triggered events
that occur between suspending the irqs and restoring them but
that is inevitable. So we might as well call shutdown and totally
turn off the irqs if we can.
I don't know where in the state machine this is getting called but
I would suggest doing this before we shutdown cpus. We are quickly
reaching the point where laptops will exceed the 8 core limit, of
lowest priority delivery mode. And only in lowest priority delivery
mode is it possible to migrate irqs outside of the interrupt handlers.
That plus if we suspend the irqs before shutting down the cpus it means
we can safely support more vectors than a single cpu can catch.
I was a little a worried about the shutdown code path because it
requires in the worst case acking a level triggered irq when we have
it disabled, but looking at ack_apic_level that appears to be a well
tested code path. We just can't reprogram the vector.
> There _might_ be one downside: overhead of ->shutdown() methods.
> With a typical IRQ count on the typical netbook i doubt it's
> more than ~50 usecs combined.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m1bpst77te.fsf@fess.ebiederm.org>
@ 2009-02-23 11:03 ` Rafael J. Wysocki
2009-02-23 11:04 ` Ingo Molnar
` (2 subsequent siblings)
3 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 11:03 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
On Monday 23 February 2009, Eric W. Biederman wrote:
> Ingo Molnar <mingo@elte.hu> writes:
>
> > * Eric W. Biederman <ebiederm@xmission.com> wrote:
> >
> >> Ingo Molnar <mingo@elte.hu> writes:
> >>
> >> > I think this aspect has been well-understood during the
> >> > discussion of this topic and it's just a slightly misleading
> >> > changelog.
> >>
> >> As I was a member of that discussion I did not see that.
> >>
> >> It took me several passes through the patches to realize the
> >> goal is to allow drivers to be able to sleep while they are in
> >> their late pm shutdown routines.
> >>
> >> Why we want this I don't know. But it seems simple enough to
> >> implement, and it makes it harder to get the late pm suspend
> >> routines wrong, which is always good.
> >
> > That's not the only goal. The other goal is to further shrink a
> > particular window of suspend fragility: the irqs-disabled stage
> > of the suspend/resume sequence.
> >
> > Since suspend/resume is a mini-reboot sequence, there's a large
> > amount of code executed - and the variety of code is large as
> > well. We had repeat cases of random drivers re-enabling
> > interrupts and thus breaking other drivers - and these are nasty
> > to debug.
> >
> > So this patchset disables device IRQs centrally and serializes
> > with pending work - so there's no races with pending IRQs
> > anymore.
> >
> > The fact that we keep the timer irq running is two-fold: firstly
> > the timer code is special and not really part of the regular
> > suspend/resume sequence.
> >
> > Drivers want to take timestamps, sometimes they even want to do
> > a small usleep(), etc. Ideally the suspend/resume code is pretty
> > much _the same_ as a regular bootup (and shutdown) code - so we
> > want to provide a similar environment to how drivers initialize
> > and deinitialize, and we want to enable them to share code
> > between bootup/shutdown and suspend/resume agressively.
> >
> > So the more generic kernel environment we give these fragile
> > handlers, the better we are off in the end. Since we already had
> > IRQS_TIMER, that was just the natural thing to do.
>
> I am all for sharing code, especially if we can factor if
> we can find common factors that do the same thing.
>
> I don't know how many times I have found drivers doing something
> weird in their shutdown routines that they don't know how
> to get the device out of. The e1000 driver has shown up several
> times because it likes to suspend the device on shutdown.
>
> The fact that the methods exposed to drivers were only defined
> to be usable on the s2ram/hibernate path is something I have
> brought up on more than one occasion as a bad choice.
>
> I'm really not convinced that the rational for separating
> out the shutdown methods from the remove methods has
> been very good. That of we don't need to clean up the in-kernel
> data structures on reboot so why do something extra that can
> introduce instability.
>
> So having been watching a smaller form of this drama on the
> reboot path for several years. Having had a device method
> with fixed semantics, and not the dwm sematics of the historical
> suspend routing. I expect there is still a ways to go before
> it is simple and easy for drivers to figure out what they need
> to implement out of the confusing variety of possible device
> methods.
>
> >> > The new suspend code does not rely on truly disabling IRQs
> >> > on the low level. The purpose is to not get IRQs to drivers
> >> > - which might crash/hang/race/misbehave.
> >>
> >> Reasonable. I expect one of the problems with drivers getting
> >> it wrong is that the interface is too complex for mortal
> >> humans to understand.
> >
> > The suspend/resume state machine certainly used to be a piece of
> > code that makes a seasoned kernel developer weep in fear.
> >
> > That has changed drastically in the past few months. The
> > suspend+hibernation logic got unified (at least as far as driver
> > methods go), and all the flow and ordering has been cleaned up
> > and has been made more robust.
>
> I will have to look again. My impression is that overloading
> a single method is part of what got us into this mess in the
> first place.
>
> No that I don't see things getting better.
>
> > What makes s2ram fragile is not human failure but the
> > combination of a handful of physical property:
> >
> > 1) Psychology: shutting the lid or pushing the suspend button is
> > a deceivingly 'simple' action to the user. But under the
> > hood, a ton of stuff happens: we deinitialize a lot of
> > things, we go through _all hardware state_, and we do so in a
> > serial fashion. If just one piece fails to do the right
> > thing, the box might not resume. Still, the user expects this
> > 'simple' thing to just work, all the time. No excuses
> > accepted.
> >
> > 2) Length of code: To get a successful s2ram sequence the kernel
> > runs through tens of thousands of lines of code. Code which
> > never gets executed on a normal box - only if we s2ram. If
> > just one step fails, we get a hung box.
> >
> > 3) Debuggability: a lot of s2ram code runs with the console off,
> > making any bugs hard to debug. Furthermore we have no
> > meaningful persistent storage either for kernel bug messages.
> > The RTC trick of PM_DEBUG works but is a very narrow channel
> > of information and it takes a lot of time to debug a bug via
> > that method.
>
> Yep that is an issue.
>
> > The combination of these factors really makes up for a perfect
> > storm in terms of kernel technology: we have this
> > very-deceivingly-simple-looking but complex-and-rarely-executed
> > piece of code, which is very hard to debug.
>
> And much of this as you are finding with this piece of code
> is how the software was designed rather then how the software
> needed to be.
>
> > Even just one of these factors would be enough to make an
> > otherwise healthy subsystem fragile - no wonder s2ram has been a
> > problem ever since it existed in the upstream kernel.
> >
> > So now we need just one thing: patience and more of the same
> > good stuff that happened lately.
>
> I think there has been some good progress, and so I am happy
> to be patient. I will still mention on occasion what it
> seems we are doing wrong. Unfortunately I don't have time
> to do a lot more than that.
>
> >> > Still, it might make sense to not just use the ->disable
> >> > sequence but primarily the ->shutdown irqchip method (when
> >> > it's available in the irqchip).
> >>
> >> Disable seems fine to me. This is interesting in the context
> >> of all of the irqs that will when masked show up somewhere
> >> else (think boot interrupts).
> >>
> >> > While we obviously cannot turn off the PIC that delivers
> >> > timer IRQs at this stage - there's no theoretical reason why
> >> > the suspend sequence couldnt power down some secondary PICs
> >> > as well - in some arch code, or maybe even in the generic
> >> > driver suspend sequence if the device tree is structured
> >> > carefully enough so that the PIC gets turned off last.
> >>
> >> If the point is simply to prevent deliver of irqs to the
> >> drivers I don't see the point of anything more than what the
> >> patch does now.
> >
> > ... except for the usecase i described above. Say some PIC sits
> > on a piece of silicon which gets turned off. I'm not talking
> > about x86 but some custom device. We really dont want that IRQ
> > line to send half of an IRQ message (un-ACK-ed) when it gets
> > turned off. So physically 'suspending' all IRQ lines does make a
> > certain level of long-term sense.
>
> Good point. We will loose both level and edge triggered events
> that occur between suspending the irqs and restoring them but
> that is inevitable. So we might as well call shutdown and totally
> turn off the irqs if we can.
>
> I don't know where in the state machine this is getting called but
> I would suggest doing this before we shutdown cpus.
This is the plan. In fact, I'm going to do this in the next patch after the
$subject one has been tested and found acceptable.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m1bpst77te.fsf@fess.ebiederm.org>
2009-02-23 11:03 ` Rafael J. Wysocki
@ 2009-02-23 11:04 ` Ingo Molnar
[not found] ` <20090223110433.GA17312@elte.hu>
[not found] ` <200902231203.09082.rjw@sisk.pl>
3 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 11:04 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, pm list, Linus Torvalds,
Thomas Gleixner
* Eric W. Biederman <ebiederm@xmission.com> wrote:
> > What makes s2ram fragile is not human failure but the
> > combination of a handful of physical property:
> >
> > 1) Psychology: shutting the lid or pushing the suspend button is
> > a deceivingly 'simple' action to the user. But under the
> > hood, a ton of stuff happens: we deinitialize a lot of
> > things, we go through _all hardware state_, and we do so in a
> > serial fashion. If just one piece fails to do the right
> > thing, the box might not resume. Still, the user expects this
> > 'simple' thing to just work, all the time. No excuses
> > accepted.
> >
> > 2) Length of code: To get a successful s2ram sequence the kernel
> > runs through tens of thousands of lines of code. Code which
> > never gets executed on a normal box - only if we s2ram. If
> > just one step fails, we get a hung box.
> >
> > 3) Debuggability: a lot of s2ram code runs with the console off,
> > making any bugs hard to debug. Furthermore we have no
> > meaningful persistent storage either for kernel bug messages.
> > The RTC trick of PM_DEBUG works but is a very narrow channel
> > of information and it takes a lot of time to debug a bug via
> > that method.
>
> Yep that is an issue.
I'd also like to add #4:
4) One more thing that makes s2ram special is that when the
resume path finds hardware often in an even more
deinitialized form than during normal bootup. During
normal bootup the BIOS/firmware has at least done some
minimal bootstrap (to get the kernel loaded), which
makes life easier for the kernel.
At s2ram stage we've got a completely pure hardware
init state, with very minimal firmware activation. So
many of the init and deinit problems and bugs we only
hit in the s2ram path - which dynamics is again not
helpful.
> > The combination of these factors really makes up for a
> > perfect storm in terms of kernel technology: we have this
> > very-deceivingly-simple-looking but
> > complex-and-rarely-executed piece of code, which is very
> > hard to debug.
>
> And much of this as you are finding with this piece of code is
> how the software was designed rather then how the software
> needed to be.
Well most of the 4 problems above are externalities and cannot
go away just by fixing the kernel.
#1 will always be with us.
#3 needs the hardware to change. It's happening, but slowly.
#4 will be with us as long as there's non-Linux BIOSes
#2 is the only thing where we can make a realistic difference,
but there's just so much we can do there.
And that still leaves the other three items: each of which is
powerful enough of a force to give a bad name to any normal
subsystem.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223083645.GA9582@elte.hu>
@ 2009-02-23 11:29 ` Rafael J. Wysocki
[not found] ` <200902231229.58743.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 11:29 UTC (permalink / raw)
To: Ingo Molnar, Johannes Berg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
On Monday 23 February 2009, Ingo Molnar wrote:
>
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> > On Sunday 22 February 2009, Rafael J. Wysocki wrote:
> > > On Sunday 22 February 2009, Linus Torvalds wrote:
> > > >
> > > > On Sun, 22 Feb 2009, Rafael J. Wysocki wrote:
> > [--snip--]
> > >
> > > Thanks a lot for your comments, I'll send an updated patch shortly.
> >
> > The updated patch is appended.
> >
> > It has been initially tested, but requires more testing,
> > especially with APM, XEN, kexec jump etc.
>
> > arch/x86/kernel/apm_32.c | 20 ++++++++++++----
> > drivers/xen/manage.c | 32 +++++++++++++++----------
> > include/linux/interrupt.h | 3 ++
> > include/linux/irq.h | 1
> > kernel/irq/manage.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++
> > kernel/kexec.c | 10 ++++----
> > kernel/power/disk.c | 46 +++++++++++++++++++++++++++++--------
> > kernel/power/main.c | 20 +++++++++++-----
> > 8 files changed, 152 insertions(+), 37 deletions(-)
> >
> > Index: linux-2.6/kernel/irq/manage.c
> > ===================================================================
> > --- linux-2.6.orig/kernel/irq/manage.c
> > +++ linux-2.6/kernel/irq/manage.c
> > @@ -746,3 +746,60 @@ int request_irq(unsigned int irq, irq_ha
> > return retval;
> > }
> > EXPORT_SYMBOL(request_irq);
> > +
> > +#ifdef CONFIG_PM_SLEEP
> > +/**
> > + * suspend_device_irqs - disable all currently enabled interrupt lines
>
> Code placement nit: please dont put new #ifdef blocks into the
> core IRQ code, add a kernel/irq/power.c file instead and make
> the kbuild rule depend on PM_SLEEP.
>
> The new suspend_device_irqs() and resume_device_irqs() doesnt
> use any manage.c internals so this should work straight away.
OK, I'll do that.
> > + *
> > + * During system-wide suspend or hibernation device interrupts need to be
> > + * disabled at the chip level and this function is provided for this
> > + * purpose. It disables all interrupt lines that are enabled at the
> > + * moment and sets the IRQ_SUSPENDED flag for them.
> > + */
> > +void suspend_device_irqs(void)
> > +{
> > + struct irq_desc *desc;
> > + int irq;
> > +
> > + for_each_irq_desc(irq, desc) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&desc->lock, flags);
> > +
> > + if (!desc->depth && desc->action
> > + && !(desc->action->flags & IRQF_TIMER)) {
> > + desc->depth++;
> > + desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
> > + desc->chip->disable(irq);
> > + }
> > +
> > + spin_unlock_irqrestore(&desc->lock, flags);
> > + }
> > +
> > + for_each_irq_desc(irq, desc) {
> > + if (desc->status & IRQ_SUSPENDED)
> > + synchronize_irq(irq);
> > + }
>
> Optimization/code-flow nit: a possibility might be to do a
> single loop, i.e. i think it's safe to couple the disable+sync
> bits [as in 99.99% of the cases there will be no in-execution
> irq handlers when we execute this.]
Well, Linus suggested to do it in a separate loop. I'm fine with both ways.
> Something like:
>
> int do_sync = 0;
>
> spin_lock_irqsave(&desc->lock, flags);
>
> if (!desc->depth && desc->action
> && !(desc->action->flags & IRQF_TIMER)) {
>
> desc->depth++;
> desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
> desc->chip->disable(irq);
> do_sync = 1;
> }
>
> spin_unlock_irqrestore(&desc->lock, flags);
>
> if (do_sync)
> synchronize_irq(irq);
>
> In fact i'd suggest to factor out this logic into a separate
> __suspend_irq(irq) / __resume_irq(irq) inline helper functions.
> (They should be inline for the time being as they are not
> shared-irq-safe so they shouldnt really be exposed to drivers in
> such a singular capacity.)
Good idea, I'll do it.
> Doing so will also fix the line-break ugliness of the first
> branch - as in a standalone function the condition fits into a
> single line.
>
> There's a performance reason as well: especially when we have a
> lot of IRQ descriptors that will be about twice as fast. (with a
> large iteration scope this function is cachemiss-limited and
> doing this passes doubles the cachemiss rate.)
>
> > +}
> > +EXPORT_SYMBOL_GPL(suspend_device_irqs);
> > +
> > +/**
> > + * resume_device_irqs - enable interrupts disabled by suspend_device_irqs()
> > + *
> > + * Enable all interrupt lines previously disabled by suspend_device_irqs()
> > + * that have the IRQ_SUSPENDED flag set.
> > + */
> > +void resume_device_irqs(void)
> > +{
> > + struct irq_desc *desc;
> > + int irq;
> > +
> > + for_each_irq_desc(irq, desc) {
> > + if (!(desc->status & IRQ_SUSPENDED))
> > + continue;
> > + desc->status &= ~IRQ_SUSPENDED;
> > + enable_irq(irq);
> > + }
>
> Robustness+optimization nit: this will work but could be done in
> a nicer way: enable_irq() should auto-clear IRQ_SUSPENDED. (We
> already clear flags there so it's even a tiny bit faster this
> way.)
OK
> We definitely dont want IRQ_SUSPENDED to 'leak' out into an
> enabled line, should something call enable_irq() on a suspended
> line. So either make it auto-unsuspend in enable_irq(), or add
> an extra WARN_ON() to enable_irq(), to make sure IRQ_SUSPENDED
> is always off by that time.
>
> > + arch_suspend_disable_irqs();
> > + BUG_ON(!irqs_disabled());
>
> Please. We just disabled all devices - a BUG_ON() is a very
> counter-productive thing to do here - chances are the user will
> never see anything but a hang. So please turn this into a nice
> WARN_ONCE().
This is just moving code. Also, the BUG_ON() can only affect powerpc and it's
there on purpose AFAICS (Johannes?). Anyway, changing that would be a separate
patch.
> > --- linux-2.6.orig/include/linux/interrupt.h
> > +++ linux-2.6/include/linux/interrupt.h
> > @@ -470,4 +470,7 @@ extern int early_irq_init(void);
> > extern int arch_early_irq_init(void);
> > extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
> >
> > +extern void suspend_device_irqs(void);
> > +extern void resume_device_irqs(void);
>
> Header cleanliness nit: please dont just throw new prototypes to
> the tail of headers, but think about where they fit in best,
> logically.
>
> These two new prototypes should go straight after the normal irq
> line state management functions:
>
> extern void disable_irq_nosync(unsigned int irq);
> extern void disable_irq(unsigned int irq);
> extern void enable_irq(unsigned int irq);
>
> Perhaps also with a comment like this:
>
> /*
> * Note: dont use these functions in driver code - they are for
> * core kernel use only.
> */
OK, I'll put them in there.
> > +++ linux-2.6/kernel/power/main.c
> [...]
> > +
> > + Unlock:
> > + resume_device_irqs();
>
> Small drive-by style nit: while at it could you please fix the
> capitalization and the naming of the label (and all labels in
> this file)?
I don't think they are wrong. They are uniform accross the file and it's
clear what they mean.
> The standard label is "out_unlock". [and "err_unlock" for failure cases
> - but this isnt a failure case.]
Where exactly is this standard defined?
> There's 43 such bad label names in kernel/power/*.c, see the
> output of:
>
> git grep '^ [A-Z][a-z].*:$' kernel/power/
If you think they are bad, please send a patch to change them.
> > Index: linux-2.6/arch/x86/kernel/apm_32.c
> > ===================================================================
> > --- linux-2.6.orig/arch/x86/kernel/apm_32.c
> > +++ linux-2.6/arch/x86/kernel/apm_32.c
>
> > +
> > + suspend_device_irqs();
> > device_power_down(PMSG_SUSPEND);
> > +
> > + local_irq_disable();
>
> hm, this is a very repetitive pattern, all around the various
> suspend/resume variants. Might make sense to make:
>
> device_power_down(PMSG_SUSPEND);
>
> do the irq line disabling plus the local irq disabling
> automatically. That also means it cannot be forgotten. The
> symmetric action should happen for PMSG_RESUME.
>
> Is there ever a case where we want a different pattern?
Even if there's no such case, I prefer to call local_irq_disable() explicitly
in here, so that it's clearly known where it happens to anyone reading this
code.
Doing the "late" suspend of devices and disabling interrupts on the CPU
are separate logical steps.
> > Index: linux-2.6/drivers/xen/manage.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/xen/manage.c
> > +++ linux-2.6/drivers/xen/manage.c
> > @@ -39,12 +39,6 @@ static int xen_suspend(void *data)
>
> > - if (!*cancelled) {
> > - xen_irq_resume();
> > - xen_console_resume();
> > - xen_timer_resume();
>
> This change needs a second look. xen_suspend() is a
> stop_machine() handler and as such executes on specific CPUs,
> and your change modifies this. OTOH, i had a look at these
> handlers and it all looks safe. Jeremy?
>
> > +resume_devices:
> > + resume_device_irqs();
>
> Small style nit: labels should start with a space character.
> I.e. it should be:
I know, but the second label in there starts without a space character and
IMO keeping a uniform coding style i a single file is more important than
trying to adjust it to a broader set of rules FWIW. I also think that coding
style changes shouldn't be mixed with functional changes as far as reasonably
possible.
> > + resume_devices:
> > + resume_device_irqs();
>
> > +++ linux-2.6/kernel/kexec.c
> > @@ -1454,7 +1454,7 @@ int kernel_kexec(void)
> > if (error)
> > goto Resume_devices;
> > device_pm_lock();
> > - local_irq_disable();
> > + suspend_device_irqs();
> > /* At this point, device_suspend() has been called,
> > * but *not* device_power_down(). We *must*
> > * device_power_down() now. Otherwise, drivers for
> > @@ -1464,8 +1464,9 @@ int kernel_kexec(void)
> > */
> > error = device_power_down(PMSG_FREEZE);
> > if (error)
> > - goto Enable_irqs;
> > + goto Resume_irqs;
> >
> > + local_irq_disable();
> > /* Suspend system devices */
> > error = sysdev_suspend(PMSG_FREEZE);
> > if (error)
> > @@ -1484,9 +1485,10 @@ int kernel_kexec(void)
> > if (kexec_image->preserve_context) {
> > sysdev_resume();
> > Power_up_devices:
> > - device_power_up(PMSG_RESTORE);
> > - Enable_irqs:
> > local_irq_enable();
> > + device_power_up(PMSG_RESTORE);
> > + Resume_irqs:
> > + resume_device_irqs();
> > device_pm_unlock();
> > enable_nonboot_cpus();
> > Resume_devices:
>
> (same comment about label style applies here too.)
>
> > Index: linux-2.6/include/linux/irq.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/irq.h
> > +++ linux-2.6/include/linux/irq.h
> > @@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsig
> > #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
> > #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
> > #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
> > +#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
> >
> > #ifdef CONFIG_IRQ_PER_CPU
> > # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
>
> Note, you should probably make PM_SLEEP depend on
> GENERIC_HARDIRQS - as this change will break the build on all
> non-genirq architectures. (sparc, alpha, etc.)
PM_SLEEP depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE, which
I don't think is set on these architectures.
Thanlks a lot for your comments.
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231229.58743.rjw@sisk.pl>
@ 2009-02-23 12:28 ` Ingo Molnar
2009-02-23 12:45 ` Ingo Molnar
` (5 subsequent siblings)
6 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 12:28 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > > Index: linux-2.6/arch/x86/kernel/apm_32.c
> > > ===================================================================
> > > --- linux-2.6.orig/arch/x86/kernel/apm_32.c
> > > +++ linux-2.6/arch/x86/kernel/apm_32.c
> >
> > > +
> > > + suspend_device_irqs();
> > > device_power_down(PMSG_SUSPEND);
> > > +
> > > + local_irq_disable();
> >
> > hm, this is a very repetitive pattern, all around the various
> > suspend/resume variants. Might make sense to make:
> >
> > device_power_down(PMSG_SUSPEND);
> >
> > do the irq line disabling plus the local irq disabling
> > automatically. That also means it cannot be forgotten. The
> > symmetric action should happen for PMSG_RESUME.
> >
> > Is there ever a case where we want a different pattern?
>
> Even if there's no such case, I prefer to call
> local_irq_disable() explicitly in here, so that it's clearly
> known where it happens to anyone reading this code.
That property can be implied in the function name:
device_power_down_irq_disable(PMSG_SUSPEND);
Open-coding it, if it looks the same in all the cases just
increases the chances that someone somewhere copies them
incorrectly.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231229.58743.rjw@sisk.pl>
2009-02-23 12:28 ` Ingo Molnar
@ 2009-02-23 12:45 ` Ingo Molnar
[not found] ` <20090223122832.GB31427@elte.hu>
` (4 subsequent siblings)
6 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 12:45 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > > +resume_devices:
> > > + resume_device_irqs();
> >
> > Small style nit: labels should start with a space character.
> > I.e. it should be:
>
> I know, but the second label in there starts without a space
> character and IMO keeping a uniform coding style i a single
> file is more important than trying to adjust it to a broader
> set of rules FWIW. [...]
Even though it's just a very small and insignificant detail
(nowhere described in the CodingStyle), barely worth the mention
(and i already regret having brought it up at all), what you say
is wrong on a conceptual level and that alarms me a bit ;-)
It is exactly these kinds of "my code, my style!" world view
that results in a crappy overall kernel style.
For a single file to look consistent is just the first (and
required) step, what matters even more is for files to have
similar coding patterns, to make the style as helpful to the
general kernel developer/reviewer/bug-fixer/maintainer as
possible.
"code with a helpful style" here means two things:
1) it should understand and adhere to basic style principles.
This is just an (often arbitrary) subset of the infinite set
of reasonable style guides. The most common-sense ones are
written down in Documentation/CodingStyle. There's a lot of
leeway, as long as the basic principle of "be helpful" is
understood and followed.
2) it should carry meta information outside of the language
syntax and it should build expectations about a code's
purpose and general structure.
That is essential so that we can find bugs during review.
If each file has a slightly different style to express labels
then that means we insert extra entropy and degrades and
obfuscates the true meat of the code and hurts the overall
reviewability of the code.
In practical terms: i noticed that weird label - otherwise i
would not have commented on it. I noticed it because it had the
pattern of a comment block (most comment blocks start with
capital letters, and for that good reason).
It was completely unnecessary for me to notice that label - it
carries no information about the patch itself. Ergo, it would be
better in the long run if code does not raise unnecessary mental
exceptions. We have a limited set of exceptions we are able to
handle during review, lets make sure we use them sparingly.
Sure, there will always be borderline cases where we'll have to
agree to disagree, even if we agree about the general principle.
But this is not one of those cases - having a "Suspend:"
capitalized label is not something you added to enhance the
basic coding style - it is something very uncommon and
self-serving which you added in _spite_ of the general
principles i believe. It has no other message beyond "I do this
because i can!".
I.e. it is not helpful at all. When it comes to coding style the
kernel is not a democracy at all.
> [...] I also think that coding style changes shouldn't be
> mixed with functional changes as far as reasonably possible.
Sure, you got that drive-by review for free, by virtue of
context diffs ;-)
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223110433.GA17312@elte.hu>
@ 2009-02-23 14:45 ` Rafael J. Wysocki
[not found] ` <200902231545.26250.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 14:45 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
On Monday 23 February 2009, Ingo Molnar wrote:
>
> * Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> > > What makes s2ram fragile is not human failure but the
> > > combination of a handful of physical property:
> > >
> > > 1) Psychology: shutting the lid or pushing the suspend button is
> > > a deceivingly 'simple' action to the user. But under the
> > > hood, a ton of stuff happens: we deinitialize a lot of
> > > things, we go through _all hardware state_, and we do so in a
> > > serial fashion. If just one piece fails to do the right
> > > thing, the box might not resume. Still, the user expects this
> > > 'simple' thing to just work, all the time. No excuses
> > > accepted.
> > >
> > > 2) Length of code: To get a successful s2ram sequence the kernel
> > > runs through tens of thousands of lines of code. Code which
> > > never gets executed on a normal box - only if we s2ram. If
> > > just one step fails, we get a hung box.
> > >
> > > 3) Debuggability: a lot of s2ram code runs with the console off,
> > > making any bugs hard to debug. Furthermore we have no
> > > meaningful persistent storage either for kernel bug messages.
> > > The RTC trick of PM_DEBUG works but is a very narrow channel
> > > of information and it takes a lot of time to debug a bug via
> > > that method.
> >
> > Yep that is an issue.
>
> I'd also like to add #4:
>
> 4) One more thing that makes s2ram special is that when the
> resume path finds hardware often in an even more
> deinitialized form than during normal bootup. During
> normal bootup the BIOS/firmware has at least done some
> minimal bootstrap (to get the kernel loaded), which
> makes life easier for the kernel.
>
> At s2ram stage we've got a completely pure hardware
> init state, with very minimal firmware activation.
This is very true and at least in some cases done on purpose, AFAICS, due to
some timing constraints forced on HW vendors by M$, for example.
> So many of the init and deinit problems and bugs we only
> hit in the s2ram path - which dynamics is again not
> helpful.
Plus ACPI requires us to do additional things during suspend-resume that
are not done on boot-shutdown and which have their own ordering requirements
(not necessarily stated directly, but such that we have do discover
experimentally). That also change from one BIOS to another.
> > > The combination of these factors really makes up for a
> > > perfect storm in terms of kernel technology: we have this
> > > very-deceivingly-simple-looking but
> > > complex-and-rarely-executed piece of code, which is very
> > > hard to debug.
> >
> > And much of this as you are finding with this piece of code is
> > how the software was designed rather then how the software
> > needed to be.
>
> Well most of the 4 problems above are externalities and cannot
> go away just by fixing the kernel.
>
> #1 will always be with us.
> #3 needs the hardware to change. It's happening, but slowly.
> #4 will be with us as long as there's non-Linux BIOSes
>
> #2 is the only thing where we can make a realistic difference,
> but there's just so much we can do there.
>
> And that still leaves the other three items: each of which is
> powerful enough of a force to give a bad name to any normal
> subsystem.
Agreed.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223122832.GB31427@elte.hu>
@ 2009-02-23 14:48 ` Rafael J. Wysocki
2009-02-23 20:49 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 14:48 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
On Monday 23 February 2009, Ingo Molnar wrote:
>
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> > > > Index: linux-2.6/arch/x86/kernel/apm_32.c
> > > > ===================================================================
> > > > --- linux-2.6.orig/arch/x86/kernel/apm_32.c
> > > > +++ linux-2.6/arch/x86/kernel/apm_32.c
> > >
> > > > +
> > > > + suspend_device_irqs();
> > > > device_power_down(PMSG_SUSPEND);
> > > > +
> > > > + local_irq_disable();
> > >
> > > hm, this is a very repetitive pattern, all around the various
> > > suspend/resume variants. Might make sense to make:
> > >
> > > device_power_down(PMSG_SUSPEND);
> > >
> > > do the irq line disabling plus the local irq disabling
> > > automatically. That also means it cannot be forgotten. The
> > > symmetric action should happen for PMSG_RESUME.
> > >
> > > Is there ever a case where we want a different pattern?
> >
> > Even if there's no such case, I prefer to call
> > local_irq_disable() explicitly in here, so that it's clearly
> > known where it happens to anyone reading this code.
>
> That property can be implied in the function name:
>
> device_power_down_irq_disable(PMSG_SUSPEND);
>
> Open-coding it, if it looks the same in all the cases just
> increases the chances that someone somewhere copies them
> incorrectly.
Well, I see your point, but in that case I'd rather couple the disabling of
local interrupts on the CPU with sysdev_suspend and the disabling (or whatever
Eric would like to call that) of device interrupts with device_power_down().
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231545.26250.rjw@sisk.pl>
@ 2009-02-23 15:06 ` Ingo Molnar
2009-02-23 21:59 ` Rafael J. Wysocki
0 siblings, 1 reply; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 15:06 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Monday 23 February 2009, Ingo Molnar wrote:
> >
> > * Eric W. Biederman <ebiederm@xmission.com> wrote:
> >
> > > > What makes s2ram fragile is not human failure but the
> > > > combination of a handful of physical property:
> > > >
> > > > 1) Psychology: shutting the lid or pushing the suspend button is
> > > > a deceivingly 'simple' action to the user. But under the
> > > > hood, a ton of stuff happens: we deinitialize a lot of
> > > > things, we go through _all hardware state_, and we do so in a
> > > > serial fashion. If just one piece fails to do the right
> > > > thing, the box might not resume. Still, the user expects this
> > > > 'simple' thing to just work, all the time. No excuses
> > > > accepted.
> > > >
> > > > 2) Length of code: To get a successful s2ram sequence the kernel
> > > > runs through tens of thousands of lines of code. Code which
> > > > never gets executed on a normal box - only if we s2ram. If
> > > > just one step fails, we get a hung box.
> > > >
> > > > 3) Debuggability: a lot of s2ram code runs with the console off,
> > > > making any bugs hard to debug. Furthermore we have no
> > > > meaningful persistent storage either for kernel bug messages.
> > > > The RTC trick of PM_DEBUG works but is a very narrow channel
> > > > of information and it takes a lot of time to debug a bug via
> > > > that method.
> > >
> > > Yep that is an issue.
> >
> > I'd also like to add #4:
> >
> > 4) One more thing that makes s2ram special is that when the
> > resume path finds hardware often in an even more
> > deinitialized form than during normal bootup. During
> > normal bootup the BIOS/firmware has at least done some
> > minimal bootstrap (to get the kernel loaded), which
> > makes life easier for the kernel.
> >
> > At s2ram stage we've got a completely pure hardware
> > init state, with very minimal firmware activation.
>
> This is very true and at least in some cases done on purpose,
> AFAICS, due to some timing constraints forced on HW vendors by
> M$, for example.
IMHO i think it's the technically sane thing to do. Personally i
trust the quirks of bare metal much more than the combined
quirks of firmware _and_ bare metal.
> > So many of the init and deinit problems and bugs we
> > only hit in the s2ram path - which dynamics is again
> > not helpful.
>
> Plus ACPI requires us to do additional things during
> suspend-resume that are not done on boot-shutdown and which
> have their own ordering requirements (not necessarily stated
> directly, but such that we have do discover experimentally).
> That also change from one BIOS to another.
We could perhaps do a few things here to trigger bugs sooner.
For example at driver init, instead of executing just
->driver_open(), we could execute:
->driver_open()
->driver_suspend()
->driver_resume()
I.e. we'd simulate a suspend+resume mini-step. This makes it
sure that the basic driver callbacks are sane. It is also
supposed to work because the driver is just being initialized.
This way certain types of bugs would not show up as difficult to
debug s2ram regressions - but would show up as 'boot hang' or
'boot crash' bugs.
This does not simulate the "big picture" resume machinery (the
dependencies, etc.), nor does it trigger any of the "hardware
got really turned off" effects that true resume will trigger -
but at least it offloads a portion of the testing space from
's2ram' to 'bootup' testing.
What's your feeling - what percentage of all s2ram regressions
in the last year or so could have been triggered this way? Lets
assume we had 100 regressions in that timeframe - would it be in
the 10 bugs range? Or much lower or much higher?
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223124503.GC31427@elte.hu>
@ 2009-02-23 15:07 ` Rafael J. Wysocki
0 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 15:07 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
On Monday 23 February 2009, Ingo Molnar wrote:
>
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> > > > +resume_devices:
> > > > + resume_device_irqs();
> > >
> > > Small style nit: labels should start with a space character.
> > > I.e. it should be:
> >
> > I know, but the second label in there starts without a space
> > character and IMO keeping a uniform coding style i a single
> > file is more important than trying to adjust it to a broader
> > set of rules FWIW. [...]
>
> Even though it's just a very small and insignificant detail
> (nowhere described in the CodingStyle), barely worth the mention
> (and i already regret having brought it up at all), what you say
> is wrong on a conceptual level and that alarms me a bit ;-)
>
> It is exactly these kinds of "my code, my style!" world view
> that results in a crappy overall kernel style.
>
> For a single file to look consistent is just the first (and
> required) step, what matters even more is for files to have
> similar coding patterns, to make the style as helpful to the
> general kernel developer/reviewer/bug-fixer/maintainer as
> possible.
>
> "code with a helpful style" here means two things:
>
> 1) it should understand and adhere to basic style principles.
> This is just an (often arbitrary) subset of the infinite set
> of reasonable style guides. The most common-sense ones are
> written down in Documentation/CodingStyle. There's a lot of
> leeway, as long as the basic principle of "be helpful" is
> understood and followed.
>
> 2) it should carry meta information outside of the language
> syntax and it should build expectations about a code's
> purpose and general structure.
>
> That is essential so that we can find bugs during review.
>
> If each file has a slightly different style to express labels
> then that means we insert extra entropy and degrades and
> obfuscates the true meat of the code and hurts the overall
> reviewability of the code.
>
> In practical terms: i noticed that weird label - otherwise i
> would not have commented on it. I noticed it because it had the
> pattern of a comment block (most comment blocks start with
> capital letters, and for that good reason).
>
> It was completely unnecessary for me to notice that label - it
> carries no information about the patch itself. Ergo, it would be
> better in the long run if code does not raise unnecessary mental
> exceptions. We have a limited set of exceptions we are able to
> handle during review, lets make sure we use them sparingly.
Just to clarify, I have nothing against labels that are not capitalized etc.,
actually I can live with whatever style of labels is considered as appropriate
and/or helpful.
However, if specific style of labels was chosen for given file in the past and
it is consistent over the entire file, I don't think it should be changed in a
patch that does a different thing, regardless of who's maintaining the file
or who's written the code in question. It should be changed in a separate
patch with a changelog describing why this change is being made. I don't
really have the time to write such a patch at the moment and I don't really
think it's _that_ important. YMMV.
> Sure, there will always be borderline cases where we'll have to
> agree to disagree, even if we agree about the general principle.
>
> But this is not one of those cases - having a "Suspend:"
> capitalized label is not something you added to enhance the
> basic coding style - it is something very uncommon and
> self-serving which you added in _spite_ of the general
> principles i believe. It has no other message beyond "I do this
> because i can!".
>
> I.e. it is not helpful at all. When it comes to coding style the
> kernel is not a democracy at all.
>
> > [...] I also think that coding style changes shouldn't be
> > mixed with functional changes as far as reasonably possible.
>
> Sure, you got that drive-by review for free, by virtue of
> context diffs ;-)
Well, OK. :-)
Still, IMHO it's more helpful if the comments related to the changes that
belong to the patch in question are not mixed with comments related to the
coding style of the files being modified. Perhaps I'm picky ...
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231203.09082.rjw@sisk.pl>
@ 2009-02-23 15:28 ` Eric W. Biederman
[not found] ` <m18wnx41h5.fsf@fess.ebiederm.org>
1 sibling, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-23 15:28 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
"Rafael J. Wysocki" <rjw@sisk.pl> writes:
>> I don't know where in the state machine this is getting called but
>> I would suggest doing this before we shutdown cpus.
>
> This is the plan. In fact, I'm going to do this in the next patch after the
> $subject one has been tested and found acceptable.
Good to hear. Then let's please get a version of the irq disable that calls
shutdown, so we can be certain we don't have hardware irqs in flight.
For the drivers it should not matter for clean cpu shutdown it will.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231229.58743.rjw@sisk.pl>
` (3 preceding siblings ...)
[not found] ` <20090223124503.GC31427@elte.hu>
@ 2009-02-23 15:52 ` Johannes Berg
2009-02-23 17:16 ` Ingo Molnar
[not found] ` <20090223171630.GA28651@elte.hu>
6 siblings, 0 replies; 77+ messages in thread
From: Johannes Berg @ 2009-02-23 15:52 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
[-- Attachment #1.1: Type: text/plain, Size: 1047 bytes --]
On Mon, 2009-02-23 at 12:29 +0100, Rafael J. Wysocki wrote:
> > > + arch_suspend_disable_irqs();
> > > + BUG_ON(!irqs_disabled());
> >
> > Please. We just disabled all devices - a BUG_ON() is a very
> > counter-productive thing to do here - chances are the user will
> > never see anything but a hang. So please turn this into a nice
> > WARN_ONCE().
>
> This is just moving code. Also, the BUG_ON() can only affect powerpc and it's
> there on purpose AFAICS (Johannes?). Anyway, changing that would be a separate
> patch.
It can affect any platform that overrides the weak symbol
arch_suspend_disable_irqs(), and I think that if you're writing this
low-level code you better have a way to debug. As such, I don't think it
needs changing, because you can only ever see that while implementing
arch_suspend_disable_irqs(). OTOH, since it can only trigger then, a
WARN_ON is probably fine as well since you'll be getting your machine
into inconsistent states all the time while implementing this ;)
johannes
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902231229.58743.rjw@sisk.pl>
` (4 preceding siblings ...)
2009-02-23 15:52 ` Johannes Berg
@ 2009-02-23 17:16 ` Ingo Molnar
[not found] ` <20090223171630.GA28651@elte.hu>
6 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-23 17:16 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > > +void suspend_device_irqs(void)
> > > +{
> > > + struct irq_desc *desc;
> > > + int irq;
> > > +
> > > + for_each_irq_desc(irq, desc) {
> > > + unsigned long flags;
> > > +
> > > + spin_lock_irqsave(&desc->lock, flags);
> > > +
> > > + if (!desc->depth && desc->action
> > > + && !(desc->action->flags & IRQF_TIMER)) {
> > > + desc->depth++;
> > > + desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
> > > + desc->chip->disable(irq);
> > > + }
> > > +
> > > + spin_unlock_irqrestore(&desc->lock, flags);
> > > + }
> > > +
> > > + for_each_irq_desc(irq, desc) {
> > > + if (desc->status & IRQ_SUSPENDED)
> > > + synchronize_irq(irq);
> > > + }
> >
> > Optimization/code-flow nit: a possibility might be to do a
> > single loop, i.e. i think it's safe to couple the
> > disable+sync bits [as in 99.99% of the cases there will be
> > no in-execution irq handlers when we execute this.]
>
> Well, Linus suggested to do it in a separate loop. I'm fine
> with both ways.
Linus, do you have a strong opinion about which variant we
should use?
The two approaches are not completely equivalent, the variant
suggested by Linus is a bit more 'atomic' - in that it first
turns off everything, then looks for everything that needs to be
synchronized.
OTOH, it _shouldnt_ make much of a difference on a correctly
working system - we ought to be able to disable the irqs one by
one and wait on any pending ones on the spot. Maybe if there was
some implicit dependency between irq sources it would be more
robust to do Linus's version.
Dunno ... no strong feelings either way.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223171630.GA28651@elte.hu>
@ 2009-02-23 17:28 ` Linus Torvalds
2009-02-23 22:11 ` Rafael J. Wysocki
0 siblings, 1 reply; 77+ messages in thread
From: Linus Torvalds @ 2009-02-23 17:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, pm list
On Mon, 23 Feb 2009, Ingo Molnar wrote:
>
> Linus, do you have a strong opinion about which variant we
> should use?
Strong? No. I think mine is better just because _if_ another CPU is busy
handling an interrupt that we're just now disabling, we'll just go on to
the next interrupt. Waiting for them all at the end is always more
efficient.
But does it really matter? No. In this case I think we've shut down all
other CPU's anyway, so the whole "serialize_irq()" should probably not
even be needed.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090223122832.GB31427@elte.hu>
2009-02-23 14:48 ` Rafael J. Wysocki
@ 2009-02-23 20:49 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 77+ messages in thread
From: Benjamin Herrenschmidt @ 2009-02-23 20:49 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Johannes Berg, Linus Torvalds, pm list
> That property can be implied in the function name:
>
> device_power_down_irq_disable(PMSG_SUSPEND);
>
> Open-coding it, if it looks the same in all the cases just
> increases the chances that someone somewhere copies them
> incorrectly.
No. Some archs need to do "special" things at the irq disable point,
leave it open coded in the caller please.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m18wnx41h5.fsf@fess.ebiederm.org>
@ 2009-02-23 21:39 ` Rafael J. Wysocki
[not found] ` <200902232239.05926.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 21:39 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
On Monday 23 February 2009, Eric W. Biederman wrote:
> "Rafael J. Wysocki" <rjw@sisk.pl> writes:
>
> >> I don't know where in the state machine this is getting called but
> >> I would suggest doing this before we shutdown cpus.
> >
> > This is the plan. In fact, I'm going to do this in the next patch after the
> > $subject one has been tested and found acceptable.
>
> Good to hear. Then let's please get a version of the irq disable that calls
> shutdown, so we can be certain we don't have hardware irqs in flight.
>
> For the drivers it should not matter for clean cpu shutdown it will.
OK, I will.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-23 15:06 ` Ingo Molnar
@ 2009-02-23 21:59 ` Rafael J. Wysocki
0 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 21:59 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
On Monday 23 February 2009, Ingo Molnar wrote:
>
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> > On Monday 23 February 2009, Ingo Molnar wrote:
> > >
> > > * Eric W. Biederman <ebiederm@xmission.com> wrote:
> > >
> > > > > What makes s2ram fragile is not human failure but the
> > > > > combination of a handful of physical property:
> > > > >
> > > > > 1) Psychology: shutting the lid or pushing the suspend button is
> > > > > a deceivingly 'simple' action to the user. But under the
> > > > > hood, a ton of stuff happens: we deinitialize a lot of
> > > > > things, we go through _all hardware state_, and we do so in a
> > > > > serial fashion. If just one piece fails to do the right
> > > > > thing, the box might not resume. Still, the user expects this
> > > > > 'simple' thing to just work, all the time. No excuses
> > > > > accepted.
> > > > >
> > > > > 2) Length of code: To get a successful s2ram sequence the kernel
> > > > > runs through tens of thousands of lines of code. Code which
> > > > > never gets executed on a normal box - only if we s2ram. If
> > > > > just one step fails, we get a hung box.
> > > > >
> > > > > 3) Debuggability: a lot of s2ram code runs with the console off,
> > > > > making any bugs hard to debug. Furthermore we have no
> > > > > meaningful persistent storage either for kernel bug messages.
> > > > > The RTC trick of PM_DEBUG works but is a very narrow channel
> > > > > of information and it takes a lot of time to debug a bug via
> > > > > that method.
> > > >
> > > > Yep that is an issue.
> > >
> > > I'd also like to add #4:
> > >
> > > 4) One more thing that makes s2ram special is that when the
> > > resume path finds hardware often in an even more
> > > deinitialized form than during normal bootup. During
> > > normal bootup the BIOS/firmware has at least done some
> > > minimal bootstrap (to get the kernel loaded), which
> > > makes life easier for the kernel.
> > >
> > > At s2ram stage we've got a completely pure hardware
> > > init state, with very minimal firmware activation.
> >
> > This is very true and at least in some cases done on purpose,
> > AFAICS, due to some timing constraints forced on HW vendors by
> > M$, for example.
>
> IMHO i think it's the technically sane thing to do. Personally i
> trust the quirks of bare metal much more than the combined
> quirks of firmware _and_ bare metal.
>
> > > So many of the init and deinit problems and bugs we
> > > only hit in the s2ram path - which dynamics is again
> > > not helpful.
> >
> > Plus ACPI requires us to do additional things during
> > suspend-resume that are not done on boot-shutdown and which
> > have their own ordering requirements (not necessarily stated
> > directly, but such that we have do discover experimentally).
> > That also change from one BIOS to another.
>
> We could perhaps do a few things here to trigger bugs sooner.
>
> For example at driver init, instead of executing just
> ->driver_open(), we could execute:
>
> ->driver_open()
> ->driver_suspend()
> ->driver_resume()
I'm not sure. On PCI we run some code apart from the driver's suspend and
resume callbacks, especially in the new framework, and the bus type executes
the driver callbacks.
> I.e. we'd simulate a suspend+resume mini-step. This makes it
> sure that the basic driver callbacks are sane. It is also
> supposed to work because the driver is just being initialized.
>
> This way certain types of bugs would not show up as difficult to
> debug s2ram regressions - but would show up as 'boot hang' or
> 'boot crash' bugs.
There is a testing facility exactly for this (/sys/power/pm_test) that allows
you to simulate the entire suspend sequence without suspending as well as some
separate pieces of it. Still, it doesn't work very well, because the
conditions in which the resume callbacks are being run differ substantially
from the conditions right after we get control from the BIOS.
For one example, if ->suspend() puts the device into D3, then your simulated
->resume() will get the device in D3, while the BIOS would probably put it into
D0 (at least as far as PCI devices are concerned).
> This does not simulate the "big picture" resume machinery (the
> dependencies, etc.), nor does it trigger any of the "hardware
> got really turned off" effects that true resume will trigger -
> but at least it offloads a portion of the testing space from
> 's2ram' to 'bootup' testing.
>
> What's your feeling - what percentage of all s2ram regressions
> in the last year or so could have been triggered this way? Lets
> assume we had 100 regressions in that timeframe - would it be in
> the 10 bugs range? Or much lower or much higher?
Very small number of actual bugs with rather a lot of false positives.
IMO there are three basic sources of recent suspend regressions:
1) Arch-dependent changes (x86 mostly) and low-level changes affecting suspend
(like PCI bus enumeration, IOMMU etc.), where people didn't realize their
modifications would have a broader effect.
2) PM core changes where we weren't sure what was the best way to go (probably
I'm to blame for the majority of these).
3) Changes related to graphics (this has always been difficult, but is getting
much better now).
Driver regressions, other than the graphics-related, are really a very small
fraction.
Well, there still are some known problems unsolved, but that's a different
matter.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902221839.50357.rjw@sisk.pl>
2009-02-22 18:01 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902220946070.3111@localhost.localdomain>
@ 2009-02-23 22:11 ` Arve Hjønnevåg
[not found] ` <d6200be20902231411s654f8b3csf1388a97f3f94ac4@mail.gmail.com>
3 siblings, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-23 22:11 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Sun, Feb 22, 2009 at 9:39 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> From: Rafael J. Wysocki <rjw@sisk.pl>
>
> Introduce two helper functions allowing us to disable device
> interrupts (at the IO-APIC level) during suspend or hibernation
> and enable them during the subsequent resume, respectively, so that
> the timer interrupts are enabled while "late" suspend callbacks and
> "early" resume callbacks provided by device drivers are being
> executed.
>
> Use these functions to rework the handling of interrupts during
> suspend (hibernation) and resume. Namely, interrupts will only be
> disabled on the CPU right before suspending sysdevs, while device
> interrupts will be disabled (at the IO-APIC level), with the help of
> the new helper function, before calling "late" suspend callbacks
> provided by device drivers and analogously during resume.
>
What impact does this have on wakeup interrupts? Unless you add a
check, after masking all interrupt at the CPU, to abort suspend if any
wakeup interrupt has IRQ_PENDING set I think you will loose wakeup
interrupts (at least for irqs that use default_disable).
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-23 17:28 ` Linus Torvalds
@ 2009-02-23 22:11 ` Rafael J. Wysocki
0 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 22:11 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, pm list, Ingo Molnar, Johannes Berg
On Monday 23 February 2009, Linus Torvalds wrote:
>
> On Mon, 23 Feb 2009, Ingo Molnar wrote:
> >
> > Linus, do you have a strong opinion about which variant we
> > should use?
>
> Strong? No. I think mine is better just because _if_ another CPU is busy
> handling an interrupt that we're just now disabling, we'll just go on to
> the next interrupt. Waiting for them all at the end is always more
> efficient.
>
> But does it really matter? No. In this case I think we've shut down all
> other CPU's anyway, so the whole "serialize_irq()" should probably not
> even be needed.
But we're going to move the shutting down of the other CPUs after this point.
Finally, the sequence is going to be:
- "normal" suspend of devices
- disable device interrupts
- "late" suspend of devices
- _PTS
- disable nonboot CPUs
- local_irq_disable
- sysdev_suspend
[This is because ACPI wants us to put devices into low power states before
doing the _PTS, which in turn is supposed to be done before the disabling of
nonboot CPUs, and we want to put devices into low power states during "late"
suspend. Of course, analogously for the resume part.]
So, I think your version is _really_ better. :-)
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902231411s654f8b3csf1388a97f3f94ac4@mail.gmail.com>
@ 2009-02-23 22:23 ` Rafael J. Wysocki
2009-02-23 22:44 ` Arve Hjønnevåg
0 siblings, 1 reply; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-23 22:23 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Monday 23 February 2009, Arve Hjønnevåg wrote:
> On Sun, Feb 22, 2009 at 9:39 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > From: Rafael J. Wysocki <rjw@sisk.pl>
> >
> > Introduce two helper functions allowing us to disable device
> > interrupts (at the IO-APIC level) during suspend or hibernation
> > and enable them during the subsequent resume, respectively, so that
> > the timer interrupts are enabled while "late" suspend callbacks and
> > "early" resume callbacks provided by device drivers are being
> > executed.
> >
> > Use these functions to rework the handling of interrupts during
> > suspend (hibernation) and resume. Namely, interrupts will only be
> > disabled on the CPU right before suspending sysdevs, while device
> > interrupts will be disabled (at the IO-APIC level), with the help of
> > the new helper function, before calling "late" suspend callbacks
> > provided by device drivers and analogously during resume.
> >
>
> What impact does this have on wakeup interrupts? Unless you add a
> check, after masking all interrupt at the CPU, to abort suspend if any
> wakeup interrupt has IRQ_PENDING set I think you will loose wakeup
> interrupts (at least for irqs that use default_disable).
I _think_ they would have to be reenabled after we've called
local_irq_disable().
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-23 22:23 ` Rafael J. Wysocki
@ 2009-02-23 22:44 ` Arve Hjønnevåg
0 siblings, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-23 22:44 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Mon, Feb 23, 2009 at 2:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Monday 23 February 2009, Arve Hjønnevåg wrote:
>> On Sun, Feb 22, 2009 at 9:39 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> > From: Rafael J. Wysocki <rjw@sisk.pl>
>> >
>> > Introduce two helper functions allowing us to disable device
>> > interrupts (at the IO-APIC level) during suspend or hibernation
>> > and enable them during the subsequent resume, respectively, so that
>> > the timer interrupts are enabled while "late" suspend callbacks and
>> > "early" resume callbacks provided by device drivers are being
>> > executed.
>> >
>> > Use these functions to rework the handling of interrupts during
>> > suspend (hibernation) and resume. Namely, interrupts will only be
>> > disabled on the CPU right before suspending sysdevs, while device
>> > interrupts will be disabled (at the IO-APIC level), with the help of
>> > the new helper function, before calling "late" suspend callbacks
>> > provided by device drivers and analogously during resume.
>> >
>>
>> What impact does this have on wakeup interrupts? Unless you add a
>> check, after masking all interrupt at the CPU, to abort suspend if any
>> wakeup interrupt has IRQ_PENDING set I think you will loose wakeup
>> interrupts (at least for irqs that use default_disable).
>
> I _think_ they would have to be reenabled after we've called
> local_irq_disable().
Are you talking about the irq_chip switching from enabled interrupts
to wake interrupts? It is not enough for the irq_chip to reenable the
hardware interrupt. If the interrupt is edge triggered and occurred
after you disabled it, but before local_irq_disable, the only record
of it is the IRQ_PENDING flag.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902232239.05926.rjw@sisk.pl>
@ 2009-02-24 3:30 ` Eric W. Biederman
[not found] ` <m1iqn01ph3.fsf@fess.ebiederm.org>
1 sibling, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-24 3:30 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
"Rafael J. Wysocki" <rjw@sisk.pl> writes:
> On Monday 23 February 2009, Eric W. Biederman wrote:
>> "Rafael J. Wysocki" <rjw@sisk.pl> writes:
>>
>> >> I don't know where in the state machine this is getting called but
>> >> I would suggest doing this before we shutdown cpus.
>> >
>> > This is the plan. In fact, I'm going to do this in the next patch after the
>> > $subject one has been tested and found acceptable.
>>
>> Good to hear. Then let's please get a version of the irq disable that calls
>> shutdown, so we can be certain we don't have hardware irqs in flight.
>>
>> For the drivers it should not matter for clean cpu shutdown it will.
>
> OK, I will.
My apologies I was wrong. Calling shutdown is not safe.
I just remembered that masking an ioapic from anywhere besides the
irq handler can lock the ioapic state machine, and lead to non-recoverable
interrupts. It is rare but I have seen it happen. I wanted to figure out
how to migrate interrupts outside of interrupt context and this was what
prevented me. A suspend/resume cycle might be enough of a reset to
get the ioapic out of that state but I don't know.
The only safe way on x86 to shutdown a level triggered ioapic irq
outside of irq context is for the driver to program the hardware to
not generate an irq.
Therefore doing anything with the irqs at the point where we are
suspending them is a formality, and perhaps simply code that ensures
in-flight irqs don't make it past a certain point.
I believe we just need to call disable() and print a big nasty warning
if any irq comes in after the suspend stage.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m1iqn01ph3.fsf@fess.ebiederm.org>
@ 2009-02-24 22:42 ` Rafael J. Wysocki
[not found] ` <200902242342.07721.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-24 22:42 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
On Tuesday 24 February 2009, Eric W. Biederman wrote:
> "Rafael J. Wysocki" <rjw@sisk.pl> writes:
>
> > On Monday 23 February 2009, Eric W. Biederman wrote:
> >> "Rafael J. Wysocki" <rjw@sisk.pl> writes:
> >>
> >> >> I don't know where in the state machine this is getting called but
> >> >> I would suggest doing this before we shutdown cpus.
> >> >
> >> > This is the plan. In fact, I'm going to do this in the next patch after the
> >> > $subject one has been tested and found acceptable.
> >>
> >> Good to hear. Then let's please get a version of the irq disable that calls
> >> shutdown, so we can be certain we don't have hardware irqs in flight.
> >>
> >> For the drivers it should not matter for clean cpu shutdown it will.
> >
> > OK, I will.
>
> My apologies I was wrong. Calling shutdown is not safe.
>
> I just remembered that masking an ioapic from anywhere besides the
> irq handler can lock the ioapic state machine, and lead to non-recoverable
> interrupts. It is rare but I have seen it happen. I wanted to figure out
> how to migrate interrupts outside of interrupt context and this was what
> prevented me. A suspend/resume cycle might be enough of a reset to
> get the ioapic out of that state but I don't know.
>
> The only safe way on x86 to shutdown a level triggered ioapic irq
> outside of irq context is for the driver to program the hardware to
> not generate an irq.
Well, that changes things quite a bit, because it means we can't change the
suspend-resume sequence in a way we thought we could without fixing all
drivers first, but this is exactly what we'd like to avoid by changing the
core.
I think the most important source of level triggered interrupts are PCI
devices, so perhaps we can make the PCI PM core use bit 10 of the PCI Device
Control register to prevent devices from generating INTx after the drivers'
suspend routines have been executed?
> Therefore doing anything with the irqs at the point where we are
> suspending them is a formality, and perhaps simply code that ensures
> in-flight irqs don't make it past a certain point.
>
> I believe we just need to call disable() and print a big nasty warning
> if any irq comes in after the suspend stage.
At the moment we're safe, since PCI devices are put into low power states
in the suspend stage. However, we'd like to make that happen in the "late
suspend" stage to avoid a problem with a shared interrupt occuring after one
of the devices using it has been suspended and its driver's irq handler can't
cope with that.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902242342.07721.rjw@sisk.pl>
@ 2009-02-24 22:51 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902241449221.3111@localhost.localdomain>
2009-02-25 15:32 ` Alan Stern
2 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-24 22:51 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
>
> > The only safe way on x86 to shutdown a level triggered ioapic irq
> > outside of irq context is for the driver to program the hardware to
> > not generate an irq.
>
> Well, that changes things quite a bit, because it means we can't change the
> suspend-resume sequence in a way we thought we could without fixing all
> drivers first, but this is exactly what we'd like to avoid by changing the
> core.
Calling "disable_irq()" is perfectly fine.
What is not possible on that broken IO-APIC (among other things) is to
actually turn the interrupts off at the apic (ie the whole ->shutdown()
thing). But that's not what we even want to do. What we care about is
just disabling the interrupt from a drievr perspective.
IOW, the patches I have seen are fine, and all the comments from Eric are
just confusion about what we want done.
WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may happen later, but
that's totally unrelated to this whole "suspend_device_irq()" thing.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902241449221.3111@localhost.localdomain>
@ 2009-02-24 23:07 ` Rafael J. Wysocki
[not found] ` <200902250007.13069.rjw@sisk.pl>
` (2 subsequent siblings)
3 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-24 23:07 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Tuesday 24 February 2009, Linus Torvalds wrote:
>
> On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
> >
> > > The only safe way on x86 to shutdown a level triggered ioapic irq
> > > outside of irq context is for the driver to program the hardware to
> > > not generate an irq.
> >
> > Well, that changes things quite a bit, because it means we can't change the
> > suspend-resume sequence in a way we thought we could without fixing all
> > drivers first, but this is exactly what we'd like to avoid by changing the
> > core.
>
> Calling "disable_irq()" is perfectly fine.
>
> What is not possible on that broken IO-APIC (among other things) is to
> actually turn the interrupts off at the apic (ie the whole ->shutdown()
> thing). But that's not what we even want to do. What we care about is
> just disabling the interrupt from a drievr perspective.
>
> IOW, the patches I have seen are fine, and all the comments from Eric are
> just confusion about what we want done.
Ah, OK. Thanks for the explanation, I got confused too.
> WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may happen later, but
> that's totally unrelated to this whole "suspend_device_irq()" thing.
Yeah.
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902250007.13069.rjw@sisk.pl>
@ 2009-02-24 23:09 ` Ingo Molnar
[not found] ` <20090224230935.GA15165@elte.hu>
1 sibling, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-24 23:09 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Tuesday 24 February 2009, Linus Torvalds wrote:
> >
> > On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
> > >
> > > > The only safe way on x86 to shutdown a level triggered ioapic irq
> > > > outside of irq context is for the driver to program the hardware to
> > > > not generate an irq.
> > >
> > > Well, that changes things quite a bit, because it means we can't change the
> > > suspend-resume sequence in a way we thought we could without fixing all
> > > drivers first, but this is exactly what we'd like to avoid by changing the
> > > core.
> >
> > Calling "disable_irq()" is perfectly fine.
> >
> > What is not possible on that broken IO-APIC (among other
> > things) is to actually turn the interrupts off at the apic
> > (ie the whole ->shutdown() thing). But that's not what we
> > even want to do. What we care about is just disabling the
> > interrupt from a drievr perspective.
> >
> > IOW, the patches I have seen are fine, and all the comments
> > from Eric are just confusion about what we want done.
>
> Ah, OK. Thanks for the explanation, I got confused too.
>
> > WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may
> > happen later, but that's totally unrelated to this whole
> > "suspend_device_irq()" thing.
>
> Yeah.
We definitely dont want to turn off x86 IO-APICs - the timer IRQ
goes via one of them.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090224230935.GA15165@elte.hu>
@ 2009-02-24 23:29 ` Rafael J. Wysocki
[not found] ` <200902250029.16107.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-24 23:29 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
On Wednesday 25 February 2009, Ingo Molnar wrote:
>
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> > On Tuesday 24 February 2009, Linus Torvalds wrote:
> > >
> > > On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
> > > >
> > > > > The only safe way on x86 to shutdown a level triggered ioapic irq
> > > > > outside of irq context is for the driver to program the hardware to
> > > > > not generate an irq.
> > > >
> > > > Well, that changes things quite a bit, because it means we can't change the
> > > > suspend-resume sequence in a way we thought we could without fixing all
> > > > drivers first, but this is exactly what we'd like to avoid by changing the
> > > > core.
> > >
> > > Calling "disable_irq()" is perfectly fine.
> > >
> > > What is not possible on that broken IO-APIC (among other
> > > things) is to actually turn the interrupts off at the apic
> > > (ie the whole ->shutdown() thing). But that's not what we
> > > even want to do. What we care about is just disabling the
> > > interrupt from a drievr perspective.
> > >
> > > IOW, the patches I have seen are fine, and all the comments
> > > from Eric are just confusion about what we want done.
> >
> > Ah, OK. Thanks for the explanation, I got confused too.
> >
> > > WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may
> > > happen later, but that's totally unrelated to this whole
> > > "suspend_device_irq()" thing.
> >
> > Yeah.
>
> We definitely dont want to turn off x86 IO-APICs - the timer IRQ
> goes via one of them.
No, we don't. At least not at this point.
BTW, appended is the current (3rd) version of the $subject patch with some
of your comments taken into account. In particular, I did the following:
- moved [suspend|resume]_device_irqs() to a separate file (pm.c)
- fixed interrupt.h so that their headers are at a better place
- made enable_irq() clear IRQ_SUSPENDED
- made device_power_down() and device_power_up() call
suspend_device_irqs() and resume_device_irqs(), respectively, which
simplified the callers quite a bit (it changed the Xen code ordering, though,
but I _think_ it still should work).
Please have a look.
Thanks,
Rafael
---
From: Rafael J. Wysocki <rjw@sisk.pl>
Subject: PM: Rework handling of interrupts during suspend-resume (rev. 3)
Introduce two helper functions allowing us to prevent device drivers
from getting any interrupts (without disabling interrupts on the CPU)
during suspend (or hibernation) and to make them start to receive
interrupts again during the subsequent resume, respectively. These
functions make it possible to keep timer interrupts enabled while the
"late" suspend and "early" resume callbacks provided by device
drivers are being executed.
Use these functions to rework the handling of interrupts during
suspend (hibernation) and resume. Namely, interrupts will only be
disabled on the CPU right before suspending sysdevs, while device
drivers will be prevented from receiving interrupts, with the help of
the new helper function, before their "late" suspend callbacks run
(and analogously during resume).
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
arch/x86/kernel/apm_32.c | 15 ++++++++--
drivers/base/power/main.c | 20 ++++++++------
drivers/xen/manage.c | 16 ++++++-----
include/linux/interrupt.h | 4 ++
include/linux/irq.h | 1
kernel/irq/Makefile | 1
kernel/irq/manage.c | 3 +-
kernel/irq/pm.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/kexec.c | 8 ++---
kernel/power/disk.c | 39 +++++++++++++++++++++-------
kernel/power/main.c | 17 ++++++++----
11 files changed, 146 insertions(+), 41 deletions(-)
Index: linux-2.6/include/linux/interrupt.h
===================================================================
--- linux-2.6.orig/include/linux/interrupt.h
+++ linux-2.6/include/linux/interrupt.h
@@ -106,6 +106,10 @@ extern void disable_irq_nosync(unsigned
extern void disable_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
+/* The following two functions are for the core kernel use only. */
+extern void suspend_device_irqs(void);
+extern void resume_device_irqs(void);
+
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
extern cpumask_var_t irq_default_affinity;
Index: linux-2.6/kernel/power/main.c
===================================================================
--- linux-2.6.orig/kernel/power/main.c
+++ linux-2.6/kernel/power/main.c
@@ -287,17 +287,19 @@ void __attribute__ ((weak)) arch_suspend
*/
static int suspend_enter(suspend_state_t state)
{
- int error = 0;
+ int error;
device_pm_lock();
- arch_suspend_disable_irqs();
- BUG_ON(!irqs_disabled());
- if ((error = device_power_down(PMSG_SUSPEND))) {
+ error = device_power_down(PMSG_SUSPEND);
+ if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Done;
}
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
if (!suspend_test(TEST_CORE))
@@ -305,11 +307,14 @@ static int suspend_enter(suspend_state_t
sysdev_resume();
}
- device_power_up(PMSG_RESUME);
- Done:
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
+
+ device_power_up(PMSG_RESUME);
+
+ Done:
device_pm_unlock();
+
return error;
}
Index: linux-2.6/kernel/power/disk.c
===================================================================
--- linux-2.6.orig/kernel/power/disk.c
+++ linux-2.6/kernel/power/disk.c
@@ -214,7 +214,7 @@ static int create_image(int platform_mod
return error;
device_pm_lock();
- local_irq_disable();
+
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -225,8 +225,11 @@ static int create_image(int platform_mod
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting hibernation\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_FREEZE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
@@ -252,12 +255,16 @@ static int create_image(int platform_mod
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
+
Power_up_devices:
+ local_irq_enable();
+
device_power_up(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
- Enable_irqs:
- local_irq_enable();
+
+ Unlock:
device_pm_unlock();
+
return error;
}
@@ -336,13 +343,16 @@ static int resume_target_kernel(void)
int error;
device_pm_lock();
- local_irq_disable();
+
error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_QUIESCE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
@@ -366,11 +376,16 @@ static int resume_target_kernel(void)
swsusp_free();
restore_processor_state();
touch_softlockup_watchdog();
+
sysdev_resume();
- device_power_up(PMSG_RECOVER);
- Enable_irqs:
+
local_irq_enable();
+
+ device_power_up(PMSG_RECOVER);
+
+ Unlock:
device_pm_unlock();
+
return error;
}
@@ -447,15 +462,16 @@ int hibernation_platform_enter(void)
goto Finish;
device_pm_lock();
- local_irq_disable();
+
error = device_power_down(PMSG_HIBERNATE);
if (!error) {
+ local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
hibernation_ops->enter();
/* We should never get here */
while (1);
}
- local_irq_enable();
+
device_pm_unlock();
/*
@@ -464,12 +480,15 @@ int hibernation_platform_enter(void)
*/
Finish:
hibernation_ops->finish();
+
Resume_devices:
entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
+
Close:
hibernation_ops->end();
+
return error;
}
Index: linux-2.6/arch/x86/kernel/apm_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apm_32.c
+++ linux-2.6/arch/x86/kernel/apm_32.c
@@ -1190,8 +1190,10 @@ static int suspend(int vetoable)
struct apm_user *as;
device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1209,9 +1211,12 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
+
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+
device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
@@ -1228,8 +1233,9 @@ static void standby(void)
{
int err;
- local_irq_disable();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();
@@ -1239,8 +1245,9 @@ static void standby(void)
local_irq_disable();
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
}
static apm_event_t get_event(void)
Index: linux-2.6/drivers/xen/manage.c
===================================================================
--- linux-2.6.orig/drivers/xen/manage.c
+++ linux-2.6/drivers/xen/manage.c
@@ -39,12 +39,6 @@ static int xen_suspend(void *data)
BUG_ON(!irqs_disabled());
- err = device_power_down(PMSG_SUSPEND);
- if (err) {
- printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
- err);
- return err;
- }
err = sysdev_suspend(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
@@ -69,7 +63,6 @@ static int xen_suspend(void *data)
xen_mm_unpin_all();
sysdev_resume();
- device_power_up(PMSG_RESUME);
if (!*cancelled) {
xen_irq_resume();
@@ -108,6 +101,12 @@ static void do_suspend(void)
/* XXX use normal device tree? */
xenbus_suspend();
+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "device_power_down failed: %d\n", err);
+ goto resume_devices;
+ }
+
err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
@@ -120,6 +119,9 @@ static void do_suspend(void)
} else
xenbus_suspend_cancel();
+ device_power_up(PMSG_RESUME);
+
+resume_devices:
device_resume(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c
+++ linux-2.6/kernel/kexec.c
@@ -1454,7 +1454,6 @@ int kernel_kexec(void)
if (error)
goto Resume_devices;
device_pm_lock();
- local_irq_disable();
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
* device_power_down() now. Otherwise, drivers for
@@ -1464,8 +1463,9 @@ int kernel_kexec(void)
*/
error = device_power_down(PMSG_FREEZE);
if (error)
- goto Enable_irqs;
+ goto Unlock_pm;
+ local_irq_disable();
/* Suspend system devices */
error = sysdev_suspend(PMSG_FREEZE);
if (error)
@@ -1484,9 +1484,9 @@ int kernel_kexec(void)
if (kexec_image->preserve_context) {
sysdev_resume();
Power_up_devices:
- device_power_up(PMSG_RESTORE);
- Enable_irqs:
local_irq_enable();
+ device_power_up(PMSG_RESTORE);
+ Unlock_pm:
device_pm_unlock();
enable_nonboot_cpus();
Resume_devices:
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsig
#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
+#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
#ifdef CONFIG_IRQ_PER_CPU
# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
Index: linux-2.6/kernel/irq/pm.c
===================================================================
--- /dev/null
+++ linux-2.6/kernel/irq/pm.c
@@ -0,0 +1,63 @@
+/*
+ * linux/kernel/irq/pm.c
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file contains power management functions related to interrupts.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+/**
+ * suspend_device_irqs - disable all currently enabled interrupt lines
+ *
+ * During system-wide suspend or hibernation device interrupts need to be
+ * disabled at the chip level and this function is provided for this
+ * purpose. It disables all interrupt lines that are enabled at the
+ * moment and sets the IRQ_SUSPENDED flag for them.
+ */
+void suspend_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if (!desc->depth && !(desc->status & IRQ_WAKEUP)
+ && desc->action && !(desc->action->flags & IRQF_TIMER)) {
+ desc->depth++;
+ desc->status |= IRQ_DISABLED | IRQ_SUSPENDED;
+ desc->chip->disable(irq);
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ for_each_irq_desc(irq, desc) {
+ if (desc->status & IRQ_SUSPENDED)
+ synchronize_irq(irq);
+ }
+}
+EXPORT_SYMBOL_GPL(suspend_device_irqs);
+
+/**
+ * resume_device_irqs - enable interrupts disabled by suspend_device_irqs()
+ *
+ * Enable all interrupt lines previously disabled by suspend_device_irqs()
+ * that have the IRQ_SUSPENDED flag set.
+ */
+void resume_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc)
+ if (desc->status & IRQ_SUSPENDED)
+ enable_irq(irq);
+}
+EXPORT_SYMBOL_GPL(resume_device_irqs);
Index: linux-2.6/kernel/irq/Makefile
===================================================================
--- linux-2.6.orig/kernel/irq/Makefile
+++ linux-2.6/kernel/irq/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autop
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_PM_SLEEP) += pm.o
Index: linux-2.6/kernel/irq/manage.c
===================================================================
--- linux-2.6.orig/kernel/irq/manage.c
+++ linux-2.6/kernel/irq/manage.c
@@ -222,8 +222,9 @@ static void __enable_irq(struct irq_desc
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
break;
case 1: {
- unsigned int status = desc->status & ~IRQ_DISABLED;
+ unsigned int status;
+ status = desc->status & ~(IRQ_DISABLED | IRQ_SUSPENDED);
/* Prevent probing on this irq: */
desc->status = status | IRQ_NOPROBE;
check_irq_resend(desc, irq);
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -23,6 +23,7 @@
#include <linux/pm.h>
#include <linux/resume-trace.h>
#include <linux/rwsem.h>
+#include <linux/interrupt.h>
#include "../base.h"
#include "power.h"
@@ -305,7 +306,8 @@ static int resume_device_noirq(struct de
* Execute the appropriate "noirq resume" callback for all devices marked
* as DPM_OFF_IRQ.
*
- * Must be called with interrupts disabled and only one CPU running.
+ * Must be called under dpm_list_mtx. Device drivers should not receive
+ * interrupts while it's being executed.
*/
static void dpm_power_up(pm_message_t state)
{
@@ -326,14 +328,13 @@ static void dpm_power_up(pm_message_t st
* device_power_up - Turn on all devices that need special attention.
* @state: PM transition of the system being carried out.
*
- * Power on system devices, then devices that required we shut them down
- * with interrupts disabled.
- *
- * Must be called with interrupts disabled.
+ * Call the "early" resume handlers and enable device drivers to receive
+ * interrupts.
*/
void device_power_up(pm_message_t state)
{
dpm_power_up(state);
+ resume_device_irqs();
}
EXPORT_SYMBOL_GPL(device_power_up);
@@ -558,16 +559,17 @@ static int suspend_device_noirq(struct d
* device_power_down - Shut down special devices.
* @state: PM transition of the system being carried out.
*
- * Power down devices that require interrupts to be disabled.
- * Then power down system devices.
+ * Prevent device drivers from receiving interrupts and call the "late"
+ * suspend handlers.
*
- * Must be called with interrupts disabled and only one CPU running.
+ * Must be called under dpm_list_mtx.
*/
int device_power_down(pm_message_t state)
{
struct device *dev;
int error = 0;
+ suspend_device_irqs();
list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
error = suspend_device_noirq(dev, state);
if (error) {
@@ -577,7 +579,7 @@ int device_power_down(pm_message_t state
dev->power.status = DPM_OFF_IRQ;
}
if (error)
- dpm_power_up(resume_event(state));
+ device_power_up(resume_event(state));
return error;
}
EXPORT_SYMBOL_GPL(device_power_down);
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902241449221.3111@localhost.localdomain>
2009-02-24 23:07 ` Rafael J. Wysocki
[not found] ` <200902250007.13069.rjw@sisk.pl>
@ 2009-02-25 4:16 ` Eric W. Biederman
[not found] ` <m18wnvup53.fsf@fess.ebiederm.org>
3 siblings, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-25 4:16 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, pm list
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
>>
>> > The only safe way on x86 to shutdown a level triggered ioapic irq
>> > outside of irq context is for the driver to program the hardware to
>> > not generate an irq.
>>
>> Well, that changes things quite a bit, because it means we can't change the
>> suspend-resume sequence in a way we thought we could without fixing all
>> drivers first, but this is exactly what we'd like to avoid by changing the
>> core.
>
> Calling "disable_irq()" is perfectly fine.
Agreed, I did not mean to indicate otherwise.
> What is not possible on that broken IO-APIC (among other things) is to
> actually turn the interrupts off at the apic (ie the whole ->shutdown()
> thing). But that's not what we even want to do. What we care about is
> just disabling the interrupt from a drievr perspective.
>
> IOW, the patches I have seen are fine, and all the comments from Eric are
> just confusion about what we want done.
Largely yes.
> WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may happen later, but
> that's totally unrelated to this whole "suspend_device_irq()" thing.
Right.
The question I was asking is:
Can we get the broken cpu hotunplug code out of the suspend path?
If we can get the devices into a low power state and not generating
interrupts by the time we disable cpus then we do not need to migrate
irqs from process context and risk hitting the ioapic bugs.
While related safely suspending cpus is a different problem and a
different patch.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m18wnvup53.fsf@fess.ebiederm.org>
@ 2009-02-25 4:26 ` Linus Torvalds
2009-02-25 4:59 ` Eric W. Biederman
0 siblings, 1 reply; 77+ messages in thread
From: Linus Torvalds @ 2009-02-25 4:26 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, pm list
On Tue, 24 Feb 2009, Eric W. Biederman wrote:
> The question I was asking is:
> Can we get the broken cpu hotunplug code out of the suspend path?
I think we can move it around. I don't think we can get rid of it.
> If we can get the devices into a low power state and not generating
> interrupts by the time we disable cpus then we do not need to migrate
> irqs from process context and risk hitting the ioapic bugs.
At least one issue is that the actual final "go to sleep" is something
that has to happen on just one CPU. And I'm pretty sure the others have to
have gone through the shutdown sequence before that.
And knowing ACPI, the ordering requirements will boil down to something
insane, like "you have to turn off the other CPU's _before_ you turn off
some od the core devices, because turning off the other CPU's may involve
them".
So if what you would _want_ to do is to move the "turn off CPU's" into the
very innermost layer, so that different architectures can then decide
whether they even need to go through that whole thing or not (because
turning off one core will automatically turn off all the others, simply
because the power was turned off), I suspect the answer is "no".
So you were probably hoping to never have to have that whole horrible
issue with moving interrupts around. I'm afraid I'm not seeing it happen.
But maybe we can have it happen after we've disabled all the non-system
devices, so that in practice there simply won't be any new interrupts
coming in any more.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-25 4:26 ` Linus Torvalds
@ 2009-02-25 4:59 ` Eric W. Biederman
0 siblings, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-25 4:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, pm list
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Tue, 24 Feb 2009, Eric W. Biederman wrote:
>> The question I was asking is:
>> Can we get the broken cpu hotunplug code out of the suspend path?
>
> I think we can move it around. I don't think we can get rid of it.
>
>> If we can get the devices into a low power state and not generating
>> interrupts by the time we disable cpus then we do not need to migrate
>> irqs from process context and risk hitting the ioapic bugs.
>
> At least one issue is that the actual final "go to sleep" is something
> that has to happen on just one CPU. And I'm pretty sure the others have to
> have gone through the shutdown sequence before that.
>
> And knowing ACPI, the ordering requirements will boil down to something
> insane, like "you have to turn off the other CPU's _before_ you turn off
> some od the core devices, because turning off the other CPU's may involve
> them".
>
> So if what you would _want_ to do is to move the "turn off CPU's" into the
> very innermost layer, so that different architectures can then decide
> whether they even need to go through that whole thing or not (because
> turning off one core will automatically turn off all the others, simply
> because the power was turned off), I suspect the answer is "no".
>
> So you were probably hoping to never have to have that whole horrible
> issue with moving interrupts around. I'm afraid I'm not seeing it happen.
> But maybe we can have it happen after we've disabled all the non-system
> devices, so that in practice there simply won't be any new interrupts
> coming in any more.
Right. That is what I am hoping for. No device interrupts coming into the
cpus at the time we turn them off.
We can disable the devices and thus disable the interrupts the devices
are sending before we disable the cpus. That should make cpu disable
on suspend much easier to get solid then general x86 cpu hot-unplug.
Eric
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902250029.16107.rjw@sisk.pl>
@ 2009-02-25 13:23 ` Ingo Molnar
2009-02-26 1:17 ` Arve Hjønnevåg
[not found] ` <d6200be20902251717r531c022bl4c25fcb902201188@mail.gmail.com>
2 siblings, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-25 13:23 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
* Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Wednesday 25 February 2009, Ingo Molnar wrote:
> >
> > * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >
> > > On Tuesday 24 February 2009, Linus Torvalds wrote:
> > > >
> > > > On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
> > > > >
> > > > > > The only safe way on x86 to shutdown a level triggered ioapic irq
> > > > > > outside of irq context is for the driver to program the hardware to
> > > > > > not generate an irq.
> > > > >
> > > > > Well, that changes things quite a bit, because it means we can't change the
> > > > > suspend-resume sequence in a way we thought we could without fixing all
> > > > > drivers first, but this is exactly what we'd like to avoid by changing the
> > > > > core.
> > > >
> > > > Calling "disable_irq()" is perfectly fine.
> > > >
> > > > What is not possible on that broken IO-APIC (among other
> > > > things) is to actually turn the interrupts off at the apic
> > > > (ie the whole ->shutdown() thing). But that's not what we
> > > > even want to do. What we care about is just disabling the
> > > > interrupt from a drievr perspective.
> > > >
> > > > IOW, the patches I have seen are fine, and all the comments
> > > > from Eric are just confusion about what we want done.
> > >
> > > Ah, OK. Thanks for the explanation, I got confused too.
> > >
> > > > WE DO NOT WANT TO TURN OFF THE IO-APIC. That may or may
> > > > happen later, but that's totally unrelated to this whole
> > > > "suspend_device_irq()" thing.
> > >
> > > Yeah.
> >
> > We definitely dont want to turn off x86 IO-APICs - the timer IRQ
> > goes via one of them.
>
> No, we don't. At least not at this point.
>
> BTW, appended is the current (3rd) version of the $subject patch with some
> of your comments taken into account. In particular, I did the following:
> - moved [suspend|resume]_device_irqs() to a separate file (pm.c)
> - fixed interrupt.h so that their headers are at a better place
> - made enable_irq() clear IRQ_SUSPENDED
> - made device_power_down() and device_power_up() call
> suspend_device_irqs() and resume_device_irqs(), respectively, which
> simplified the callers quite a bit (it changed the Xen code ordering, though,
> but I _think_ it still should work).
>
> Please have a look.
Looks good, thanks Rafael!
Acked-by: Ingo Molnar <mingo@elte.hu>
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902242342.07721.rjw@sisk.pl>
2009-02-24 22:51 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902241449221.3111@localhost.localdomain>
@ 2009-02-25 15:32 ` Alan Stern
2 siblings, 0 replies; 77+ messages in thread
From: Alan Stern @ 2009-02-25 15:32 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Ingo Molnar, Linus Torvalds, Thomas Gleixner
On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
> I think the most important source of level triggered interrupts are PCI
> devices, so perhaps we can make the PCI PM core use bit 10 of the PCI Device
> Control register to prevent devices from generating INTx after the drivers'
> suspend routines have been executed?
I wish that were true. As I recall, the original PCI specification did
not define this bit, and older PCI devices don't support it. So you
can't count on being able to supress interrupt generation this way.
Alan Stern
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] <Pine.LNX.4.44L0.0902251027270.3373-100000@iolanthe.rowland.org>
@ 2009-02-25 16:19 ` Linus Torvalds
0 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-25 16:19 UTC (permalink / raw)
To: Alan Stern
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Wed, 25 Feb 2009, Alan Stern wrote:
> On Tue, 24 Feb 2009, Rafael J. Wysocki wrote:
>
> > I think the most important source of level triggered interrupts are PCI
> > devices, so perhaps we can make the PCI PM core use bit 10 of the PCI Device
> > Control register to prevent devices from generating INTx after the drivers'
> > suspend routines have been executed?
>
> I wish that were true. As I recall, the original PCI specification did
> not define this bit, and older PCI devices don't support it. So you
> can't count on being able to supress interrupt generation this way.
It's definitely a new feature. In fact, I think even the current one makes
it optional, so even for "new" devices it's very unclear how many of them
actually support that bit.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902250029.16107.rjw@sisk.pl>
2009-02-25 13:23 ` Ingo Molnar
@ 2009-02-26 1:17 ` Arve Hjønnevåg
[not found] ` <d6200be20902251717r531c022bl4c25fcb902201188@mail.gmail.com>
2 siblings, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 1:17 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Tue, Feb 24, 2009 at 3:29 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> BTW, appended is the current (3rd) version of the $subject patch with some
> of your comments taken into account. In particular, I did the following:
> - moved [suspend|resume]_device_irqs() to a separate file (pm.c)
> - fixed interrupt.h so that their headers are at a better place
> - made enable_irq() clear IRQ_SUSPENDED
> - made device_power_down() and device_power_up() call
> suspend_device_irqs() and resume_device_irqs(), respectively, which
> simplified the callers quite a bit (it changed the Xen code ordering, though,
> but I _think_ it still should work).
Do you plan to fix edge triggered wakeup interrupts? It still looks
like edge triggered wakeup interrupts that occur between
suspend_device_irqs and local_irq_disable will not cause a wakeup.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902251717r531c022bl4c25fcb902201188@mail.gmail.com>
@ 2009-02-26 1:27 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251724220.3111@localhost.localdomain>
` (2 subsequent siblings)
3 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-26 1:27 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>
> Do you plan to fix edge triggered wakeup interrupts? It still looks
> like edge triggered wakeup interrupts that occur between
> suspend_device_irqs and local_irq_disable will not cause a wakeup.
IF we ever see this as a real issue, we can either see it in the
IRQ_PENDING flag, or we can mark such interrupts specially. So it would be
solvable. That said, I haven't actually heard any real usage cases. Normal
wakeup events are _not_ interrupts in the regular "device interrupt
controller" sense.
So can you actually point to an explicit example of something where this
is a real issue?
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902251724220.3111@localhost.localdomain>
@ 2009-02-26 2:13 ` Arve Hjønnevåg
[not found] ` <d6200be20902251813o3ff4a065nb0a54321e27c512d@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 2:13 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, Feb 25, 2009 at 5:27 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
>
> On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>>
>> Do you plan to fix edge triggered wakeup interrupts? It still looks
>> like edge triggered wakeup interrupts that occur between
>> suspend_device_irqs and local_irq_disable will not cause a wakeup.
>
> IF we ever see this as a real issue, we can either see it in the
> IRQ_PENDING flag, or we can mark such interrupts specially. So it would be
> solvable. That said, I haven't actually heard any real usage cases. Normal
> wakeup events are _not_ interrupts in the regular "device interrupt
> controller" sense.
>
> So can you actually point to an explicit example of something where this
> is a real issue?
On the msm platform the keyboard driver currently leave the interrupts
enabled when suspended. If the interrupt handler is called, we use a
wakelock to abort suspend (without wakelocks you would need to set a
flag and abort in suspend_late instead). If the interrupt occurs after
local_irq_disable, it will still be pending when we get to the suspend
enter hook and suspend will be aborted there.
As far as I can tell, this change breaks this. If you press a key at
the right time, it will be ignored.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902251813o3ff4a065nb0a54321e27c512d@mail.gmail.com>
@ 2009-02-26 2:51 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251848120.3111@localhost.localdomain>
1 sibling, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-26 2:51 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>
> On the msm platform the keyboard driver currently leave the interrupts
> enabled when suspended. If the interrupt handler is called, we use a
> wakelock to abort suspend (without wakelocks you would need to set a
> flag and abort in suspend_late instead). If the interrupt occurs after
> local_irq_disable, it will still be pending when we get to the suspend
> enter hook and suspend will be aborted there.
>
> As far as I can tell, this change breaks this. If you press a key at
> the right time, it will be ignored.
Is the irq on a private non-shared interrupt line? If so, you could just
mark it as IRQF_TIMER, and the irq disable logic won't touch it.
What keyboard driver does this mfm thing, btw?
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902251848120.3111@localhost.localdomain>
@ 2009-02-26 3:00 ` Ingo Molnar
[not found] ` <20090226030050.GA3361@elte.hu>
1 sibling, 0 replies; 77+ messages in thread
From: Ingo Molnar @ 2009-02-26 3:00 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner
* Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
>
> On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
> >
> > On the msm platform the keyboard driver currently leave the interrupts
> > enabled when suspended. If the interrupt handler is called, we use a
> > wakelock to abort suspend (without wakelocks you would need to set a
> > flag and abort in suspend_late instead). If the interrupt occurs after
> > local_irq_disable, it will still be pending when we get to the suspend
> > enter hook and suspend will be aborted there.
> >
> > As far as I can tell, this change breaks this. If you press a key at
> > the right time, it will be ignored.
>
> Is the irq on a private non-shared interrupt line? If so, you
> could just mark it as IRQF_TIMER, and the irq disable logic
> won't touch it.
Hm, if that solves the problem then it would be nice to have a
new IRQF_NO_SUSPEND flag for it, in addition to IRQF_TIMER:
./interrupt.h: * IRQF_TIMER - Flag to mark this interrupt as timer interrupt
./interrupt.h:#define IRQF_TIMER 0x00000200
to express such quirks cleanly.
and the suspend code can check the (IRQF_TIMER |
IRQF_NO_SUSPEND) mask - so no extra cost.
Right now we have a clean enumeration of timer interrupts, would
be nice to keep that.
Ingo
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <20090226030050.GA3361@elte.hu>
@ 2009-02-26 3:31 ` Arve Hjønnevåg
[not found] ` <d6200be20902251931lf563c2aw43d5266e9677e168@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 3:31 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Linus Torvalds, Thomas Gleixner
On Wed, Feb 25, 2009 at 7:00 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
>>
>>
>> On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>> >
>> > On the msm platform the keyboard driver currently leave the interrupts
>> > enabled when suspended. If the interrupt handler is called, we use a
>> > wakelock to abort suspend (without wakelocks you would need to set a
>> > flag and abort in suspend_late instead). If the interrupt occurs after
>> > local_irq_disable, it will still be pending when we get to the suspend
>> > enter hook and suspend will be aborted there.
>> >
>> > As far as I can tell, this change breaks this. If you press a key at
>> > the right time, it will be ignored.
>>
>> Is the irq on a private non-shared interrupt line? If so, you
>> could just mark it as IRQF_TIMER, and the irq disable logic
>> won't touch it.
That would not work without wakelocks support, since the interrupt
could occur after suspend_late which is the last chance for the driver
to abort sleep. (The patch also breaks my current wakelock
implementation since I use a suspend_late hook to abort sleep, but
this should be easy to fix)
> Hm, if that solves the problem then it would be nice to have a
> new IRQF_NO_SUSPEND flag for it, in addition to IRQF_TIMER:
I think the right fix is for any interrupt that has IRQ_WAKEUP set to
abort suspend if it is pending. I don't know if anyone relies on these
interrupts being dropped now though.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902251931lf563c2aw43d5266e9677e168@mail.gmail.com>
@ 2009-02-26 3:37 ` Linus Torvalds
2009-02-26 3:50 ` Arve Hjønnevåg
[not found] ` <d6200be20902251950l34d5968q4b553ce8305ebd4b@mail.gmail.com>
0 siblings, 2 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-26 3:37 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>
> That would not work without wakelocks support, since the interrupt
> could occur after suspend_late which is the last chance for the driver
> to abort sleep. (The patch also breaks my current wakelock
> implementation since I use a suspend_late hook to abort sleep, but
> this should be easy to fix)
Since this must be some very deep arch-specific thing anyway, just make
the dang thing be a "sysdev". At that point, its "suspend" function gets
called way later (at which point CPU interrupts are off).
> > Hm, if that solves the problem then it would be nice to have a
> > new IRQF_NO_SUSPEND flag for it, in addition to IRQF_TIMER:
>
> I think the right fix is for any interrupt that has IRQ_WAKEUP set to
> abort suspend if it is pending. I don't know if anyone relies on these
> interrupts being dropped now though.
We could add something like that, but quite frankly, I'd hate to unless
there is some seriously common case. If it's just an oddball hacky special
case, it's easier to just say "hey, you have that crazy system device, you
handle it yourself".
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-26 3:37 ` Linus Torvalds
@ 2009-02-26 3:50 ` Arve Hjønnevåg
[not found] ` <d6200be20902251950l34d5968q4b553ce8305ebd4b@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 3:50 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, Feb 25, 2009 at 7:37 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
>
> On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>>
>> That would not work without wakelocks support, since the interrupt
>> could occur after suspend_late which is the last chance for the driver
>> to abort sleep. (The patch also breaks my current wakelock
>> implementation since I use a suspend_late hook to abort sleep, but
>> this should be easy to fix)
>
> Since this must be some very deep arch-specific thing anyway, just make
> the dang thing be a "sysdev". At that point, its "suspend" function gets
> called way later (at which point CPU interrupts are off).
Wakelocks can use a sysdev, but I don't think a keyboard driver should
be a sysdev.
>
>> > Hm, if that solves the problem then it would be nice to have a
>> > new IRQF_NO_SUSPEND flag for it, in addition to IRQF_TIMER:
>>
>> I think the right fix is for any interrupt that has IRQ_WAKEUP set to
>> abort suspend if it is pending. I don't know if anyone relies on these
>> interrupts being dropped now though.
>
> We could add something like that, but quite frankly, I'd hate to unless
> there is some seriously common case. If it's just an oddball hacky special
> case, it's easier to just say "hey, you have that crazy system device, you
> handle it yourself".
I don't think this is a oddball case. It is very common to connect
keys or keypads to gpios. If these keys are wakeup keys, it is not OK
to loose interrupts during the suspend phase.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902251950l34d5968q4b553ce8305ebd4b@mail.gmail.com>
@ 2009-02-26 3:57 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251956450.3111@localhost.localdomain>
1 sibling, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-26 3:57 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>
> I don't think this is a oddball case. It is very common to connect
> keys or keypads to gpios. If these keys are wakeup keys, it is not OK
> to loose interrupts during the suspend phase.
.. and how many drivers is that? Is it one or two "gpio input drivers" or
is it a hundred?
The "common" is not so much about "how many machines", but "in how many
drivers would you actually do this".
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902251956450.3111@localhost.localdomain>
@ 2009-02-26 4:13 ` Arve Hjønnevåg
[not found] ` <d6200be20902252013v366ce771w8a129e9f3b75927f@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 4:13 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Wed, Feb 25, 2009 at 7:57 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
>
> On Wed, 25 Feb 2009, Arve Hjønnevåg wrote:
>>
>> I don't think this is a oddball case. It is very common to connect
>> keys or keypads to gpios. If these keys are wakeup keys, it is not OK
>> to loose interrupts during the suspend phase.
>
> .. and how many drivers is that? Is it one or two "gpio input drivers" or
> is it a hundred?
>
> The "common" is not so much about "how many machines", but "in how many
> drivers would you actually do this".
We only have one gpio input driver, but I don't think is good to loose
any wakeup interrupts. Any driver that needs an edge triggered wakeup
interrupt will have problems if the hardware does not regenerate the
interrupt when the host does not respond.
It is not hard to work around this problem in the platform specific
interrupt code, but I think it is a generic problem worth fixing for
every platform.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902252013v366ce771w8a129e9f3b75927f@mail.gmail.com>
@ 2009-02-26 4:20 ` Eric W. Biederman
[not found] ` <m1y6vtkewc.fsf@fess.ebiederm.org>
1 sibling, 0 replies; 77+ messages in thread
From: Eric W. Biederman @ 2009-02-26 4:20 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
Arve Hjønnevåg <arve@android.com> writes:
> We only have one gpio input driver, but I don't think is good to loose
> any wakeup interrupts. Any driver that needs an edge triggered wakeup
> interrupt will have problems if the hardware does not regenerate the
> interrupt when the host does not respond.
We are not loosing interrupts. The normal implementation of disable
is a software disable and sets IRQ_PENDING to ensure we don't loose
interrupts when the interrupt is disabled.
Eric
_______________________________________________
linux-pm mailing list
linux-pm@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/linux-pm
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <m1y6vtkewc.fsf@fess.ebiederm.org>
@ 2009-02-26 4:24 ` Arve Hjønnevåg
0 siblings, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 4:24 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Ingo Molnar, Linus Torvalds, pm list
On Wed, Feb 25, 2009 at 8:20 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Arve Hjønnevåg <arve@android.com> writes:
>
>> We only have one gpio input driver, but I don't think is good to loose
>> any wakeup interrupts. Any driver that needs an edge triggered wakeup
>> interrupt will have problems if the hardware does not regenerate the
>> interrupt when the host does not respond.
>
> We are not loosing interrupts. The normal implementation of disable
> is a software disable and sets IRQ_PENDING to ensure we don't loose
> interrupts when the interrupt is disabled.
We loose the wakeup, but yes, the interrupt will be delivered if the
system wakes up for any other reason.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902251717r531c022bl4c25fcb902201188@mail.gmail.com>
2009-02-26 1:27 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251724220.3111@localhost.localdomain>
@ 2009-02-26 9:50 ` Rafael J. Wysocki
[not found] ` <200902261050.50531.rjw@sisk.pl>
3 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-26 9:50 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thursday 26 February 2009, Arve Hjønnevåg wrote:
> On Tue, Feb 24, 2009 at 3:29 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > BTW, appended is the current (3rd) version of the $subject patch with some
> > of your comments taken into account. In particular, I did the following:
> > - moved [suspend|resume]_device_irqs() to a separate file (pm.c)
> > - fixed interrupt.h so that their headers are at a better place
> > - made enable_irq() clear IRQ_SUSPENDED
> > - made device_power_down() and device_power_up() call
> > suspend_device_irqs() and resume_device_irqs(), respectively, which
> > simplified the callers quite a bit (it changed the Xen code ordering, though,
> > but I _think_ it still should work).
>
> Do you plan to fix edge triggered wakeup interrupts? It still looks
> like edge triggered wakeup interrupts that occur between
> suspend_device_irqs and local_irq_disable will not cause a wakeup.
In the current version of the patch the interrupts that have IRQ_WAKEUP set
in status are not disabled. Is this not enough?
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902261050.50531.rjw@sisk.pl>
@ 2009-02-26 20:34 ` Arve Hjønnevåg
[not found] ` <d6200be20902261234i5a3896fo78a7f56d16a247c7@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 20:34 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thu, Feb 26, 2009 at 1:50 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Thursday 26 February 2009, Arve Hjønnevåg wrote:
>> On Tue, Feb 24, 2009 at 3:29 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> > BTW, appended is the current (3rd) version of the $subject patch with some
>> > of your comments taken into account. In particular, I did the following:
>> > - moved [suspend|resume]_device_irqs() to a separate file (pm.c)
>> > - fixed interrupt.h so that their headers are at a better place
>> > - made enable_irq() clear IRQ_SUSPENDED
>> > - made device_power_down() and device_power_up() call
>> > suspend_device_irqs() and resume_device_irqs(), respectively, which
>> > simplified the callers quite a bit (it changed the Xen code ordering, though,
>> > but I _think_ it still should work).
>>
>> Do you plan to fix edge triggered wakeup interrupts? It still looks
>> like edge triggered wakeup interrupts that occur between
>> suspend_device_irqs and local_irq_disable will not cause a wakeup.
>
> In the current version of the patch the interrupts that have IRQ_WAKEUP set
> in status are not disabled. Is this not enough?
That is enough for drivers that use wakelocks to abort suspend (if I
fix the wakelock code to not use a platform device as its last abort
point). It is not enough if you don't have wakelocks, since the
interrupt can occur after suspend_late has been called and the driver
has no way to abort suspend.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902261234i5a3896fo78a7f56d16a247c7@mail.gmail.com>
@ 2009-02-26 20:57 ` Benjamin Herrenschmidt
2009-02-26 21:20 ` Arve Hjønnevåg
[not found] ` <d6200be20902261320q58a347e5s1af790e56b186c81@mail.gmail.com>
2009-02-26 21:58 ` Rafael J. Wysocki
[not found] ` <200902262258.55835.rjw@sisk.pl>
2 siblings, 2 replies; 77+ messages in thread
From: Benjamin Herrenschmidt @ 2009-02-26 20:57 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thu, 2009-02-26 at 12:34 -0800, Arve Hjønnevåg wrote:
> That is enough for drivers that use wakelocks to abort suspend (if I
> fix the wakelock code to not use a platform device as its last abort
> point). It is not enough if you don't have wakelocks, since the
> interrupt can occur after suspend_late has been called and the driver
> has no way to abort suspend.
>
I still don't quite see how you deal with the race anyway. Ie. Even
without Rafael patch, what if the interrupt occurs after your sysdev
suspend ?
In general, unless they are level sensitive, wakeup interrupts tend to
always be somewhat racy.
Cheers,
Ben.
_______________________________________________
linux-pm mailing list
linux-pm@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/linux-pm
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-26 20:57 ` Benjamin Herrenschmidt
@ 2009-02-26 21:20 ` Arve Hjønnevåg
[not found] ` <d6200be20902261320q58a347e5s1af790e56b186c81@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 21:20 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thu, Feb 26, 2009 at 12:57 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Thu, 2009-02-26 at 12:34 -0800, Arve Hjønnevåg wrote:
>> That is enough for drivers that use wakelocks to abort suspend (if I
>> fix the wakelock code to not use a platform device as its last abort
>> point). It is not enough if you don't have wakelocks, since the
>> interrupt can occur after suspend_late has been called and the driver
>> has no way to abort suspend.
>>
> I still don't quite see how you deal with the race anyway. Ie. Even
> without Rafael patch, what if the interrupt occurs after your sysdev
> suspend ?
After local_irq_disable has been called, the interrupt will no longer
be cleared by Linux when it occurs. This means that is still pending
when you get to the low level suspend code which will prevent suspend.
> In general, unless they are level sensitive, wakeup interrupts tend to
> always be somewhat racy.
They don't have to be. If you have a separate hardware component that
tracks wakeup interrupts, you need to start this before you stop the
main interrupt controller. If any interrupts are pending at this time
you abort suspend. After a wakeup you do the reverse.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902261320q58a347e5s1af790e56b186c81@mail.gmail.com>
@ 2009-02-26 21:49 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 77+ messages in thread
From: Benjamin Herrenschmidt @ 2009-02-26 21:49 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thu, 2009-02-26 at 13:20 -0800, Arve Hjønnevåg wrote:
> On Thu, Feb 26, 2009 at 12:57 PM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
> > On Thu, 2009-02-26 at 12:34 -0800, Arve Hjønnevåg wrote:
> >> That is enough for drivers that use wakelocks to abort suspend (if I
> >> fix the wakelock code to not use a platform device as its last abort
> >> point). It is not enough if you don't have wakelocks, since the
> >> interrupt can occur after suspend_late has been called and the driver
> >> has no way to abort suspend.
> >>
> > I still don't quite see how you deal with the race anyway. Ie. Even
> > without Rafael patch, what if the interrupt occurs after your sysdev
> > suspend ?
>
> After local_irq_disable has been called, the interrupt will no longer
> be cleared by Linux when it occurs. This means that is still pending
> when you get to the low level suspend code which will prevent suspend.
Ok so you want this interrupt to stay pending at the PIC level ? So just
marking it so the kernel doesn't disable it should do the trick.
> > In general, unless they are level sensitive, wakeup interrupts tend to
> > always be somewhat racy.
>
> They don't have to be. If you have a separate hardware component that
> tracks wakeup interrupts, you need to start this before you stop the
> main interrupt controller. If any interrupts are pending at this time
> you abort suspend. After a wakeup you do the reverse.
Right but then you can start this earlier and there is no problem. But
if you do want the interrupt to remaining pending in the PIC, then you
probably need to set that magic flag so we don't disable it, that should
do the trick just fine no ?
It's hard to tell without more detailed HW specs of course...
Ben.
_______________________________________________
linux-pm mailing list
linux-pm@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/linux-pm
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902261234i5a3896fo78a7f56d16a247c7@mail.gmail.com>
2009-02-26 20:57 ` Benjamin Herrenschmidt
@ 2009-02-26 21:58 ` Rafael J. Wysocki
[not found] ` <200902262258.55835.rjw@sisk.pl>
2 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-26 21:58 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thursday 26 February 2009, Arve Hjønnevåg wrote:
> On Thu, Feb 26, 2009 at 1:50 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > On Thursday 26 February 2009, Arve Hjønnevåg wrote:
> >> On Tue, Feb 24, 2009 at 3:29 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >> > BTW, appended is the current (3rd) version of the $subject patch with some
> >> > of your comments taken into account. In particular, I did the following:
> >> > - moved [suspend|resume]_device_irqs() to a separate file (pm.c)
> >> > - fixed interrupt.h so that their headers are at a better place
> >> > - made enable_irq() clear IRQ_SUSPENDED
> >> > - made device_power_down() and device_power_up() call
> >> > suspend_device_irqs() and resume_device_irqs(), respectively, which
> >> > simplified the callers quite a bit (it changed the Xen code ordering, though,
> >> > but I _think_ it still should work).
> >>
> >> Do you plan to fix edge triggered wakeup interrupts? It still looks
> >> like edge triggered wakeup interrupts that occur between
> >> suspend_device_irqs and local_irq_disable will not cause a wakeup.
> >
> > In the current version of the patch the interrupts that have IRQ_WAKEUP set
> > in status are not disabled. Is this not enough?
>
> That is enough for drivers that use wakelocks to abort suspend (if I
> fix the wakelock code to not use a platform device as its last abort
> point). It is not enough if you don't have wakelocks, since the
> interrupt can occur after suspend_late has been called and the driver
> has no way to abort suspend.
Well, how exactly the $subject patch does cause this problem to happen?
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902262258.55835.rjw@sisk.pl>
@ 2009-02-26 22:10 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902261404580.3111@localhost.localdomain>
1 sibling, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-26 22:10 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Thu, 26 Feb 2009, Rafael J. Wysocki wrote:
>
> Well, how exactly the $subject patch does cause this problem to happen?
Rafael, the problem is that if an interrupt happens while it's disabled -
but before the CPU has actually turned all interrupts off - the CPU will
ACK the interrupt (but just set a flag for it being PENDING), so now the
chipset logic around it will not see it as pending any more, so now the
chipset won't auto-wake the CPU immediately (or more likely, it won't
even suspend it).
It's trivial to fix multiple ways, so I wouldn't worry. The most trivial
way is to just have some sysdev drievr code simply do something like
static int sysdev_suspend()
{
for_each_irq(irq,desc) {
if (!(desc->flags & IRQF_WAKE))
continue;
if (desc->flags & IRQ_PENDING)
return -EBUSY;
}
return 0;
}
and that should automatically mean that if any irq is pending, the suspend
will fail and we'll immediately wake up again.
It looks trivial, and I don't understand why Arve can't just do the sysdev
thing.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902261404580.3111@localhost.localdomain>
@ 2009-02-26 22:30 ` Arve Hjønnevåg
2009-02-26 22:30 ` Rafael J. Wysocki
[not found] ` <d6200be20902261430w19ea3e6fye014f8443d9ab5ca@mail.gmail.com>
2 siblings, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-26 22:30 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Thu, Feb 26, 2009 at 2:10 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
>
> On Thu, 26 Feb 2009, Rafael J. Wysocki wrote:
>>
>> Well, how exactly the $subject patch does cause this problem to happen?
>
> Rafael, the problem is that if an interrupt happens while it's disabled -
> but before the CPU has actually turned all interrupts off - the CPU will
> ACK the interrupt (but just set a flag for it being PENDING), so now the
> chipset logic around it will not see it as pending any more, so now the
> chipset won't auto-wake the CPU immediately (or more likely, it won't
> even suspend it).
>
> It's trivial to fix multiple ways, so I wouldn't worry. The most trivial
> way is to just have some sysdev drievr code simply do something like
>
> static int sysdev_suspend()
> {
> for_each_irq(irq,desc) {
> if (!(desc->flags & IRQF_WAKE))
> continue;
> if (desc->flags & IRQ_PENDING)
> return -EBUSY;
> }
> return 0;
> }
>
> and that should automatically mean that if any irq is pending, the suspend
> will fail and we'll immediately wake up again.
>
> It looks trivial, and I don't understand why Arve can't just do the sysdev
> thing.
I can. My point is that the patch breaks our existing code. If anyone
else uses edge triggered wakeup interrupt it may break from them as
well. The main question if this should be fixed separately for every
platform that needs it, or if pending wakeup interrupts should always
abort sleep.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902261404580.3111@localhost.localdomain>
2009-02-26 22:30 ` Arve Hjønnevåg
@ 2009-02-26 22:30 ` Rafael J. Wysocki
[not found] ` <d6200be20902261430w19ea3e6fye014f8443d9ab5ca@mail.gmail.com>
2 siblings, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-26 22:30 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Thursday 26 February 2009, Linus Torvalds wrote:
>
> On Thu, 26 Feb 2009, Rafael J. Wysocki wrote:
> >
> > Well, how exactly the $subject patch does cause this problem to happen?
>
> Rafael, the problem is that if an interrupt happens while it's disabled -
> but before the CPU has actually turned all interrupts off - the CPU will
> ACK the interrupt (but just set a flag for it being PENDING), so now the
> chipset logic around it will not see it as pending any more, so now the
> chipset won't auto-wake the CPU immediately (or more likely, it won't
> even suspend it).
Ah, I see now, thanks.
> It's trivial to fix multiple ways, so I wouldn't worry. The most trivial
> way is to just have some sysdev drievr code simply do something like
>
> static int sysdev_suspend()
> {
> for_each_irq(irq,desc) {
> if (!(desc->flags & IRQF_WAKE))
> continue;
> if (desc->flags & IRQ_PENDING)
> return -EBUSY;
> }
> return 0;
> }
>
> and that should automatically mean that if any irq is pending, the suspend
> will fail and we'll immediately wake up again.
Yeah.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902261430w19ea3e6fye014f8443d9ab5ca@mail.gmail.com>
@ 2009-02-26 23:10 ` Rafael J. Wysocki
[not found] ` <200902270010.38291.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-26 23:10 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thursday 26 February 2009, Arve Hjønnevåg wrote:
> On Thu, Feb 26, 2009 at 2:10 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> >
> > On Thu, 26 Feb 2009, Rafael J. Wysocki wrote:
> >>
> >> Well, how exactly the $subject patch does cause this problem to happen?
> >
> > Rafael, the problem is that if an interrupt happens while it's disabled -
> > but before the CPU has actually turned all interrupts off - the CPU will
> > ACK the interrupt (but just set a flag for it being PENDING), so now the
> > chipset logic around it will not see it as pending any more, so now the
> > chipset won't auto-wake the CPU immediately (or more likely, it won't
> > even suspend it).
> >
> > It's trivial to fix multiple ways, so I wouldn't worry. The most trivial
> > way is to just have some sysdev drievr code simply do something like
> >
> > static int sysdev_suspend()
> > {
> > for_each_irq(irq,desc) {
> > if (!(desc->flags & IRQF_WAKE))
> > continue;
> > if (desc->flags & IRQ_PENDING)
> > return -EBUSY;
> > }
> > return 0;
> > }
> >
> > and that should automatically mean that if any irq is pending, the suspend
> > will fail and we'll immediately wake up again.
> >
> > It looks trivial, and I don't understand why Arve can't just do the sysdev
> > thing.
>
> I can. My point is that the patch breaks our existing code.
Is that a mainline kernel code?
> If anyone else uses edge triggered wakeup interrupt it may break from them as
> well. The main question if this should be fixed separately for every
> platform that needs it, or if pending wakeup interrupts should always
> abort sleep.
Well, I'm not really sure if this is the problem. In fact the problem is that
you have a regular device the interrupt of which can be a wake-up one. I think
the problem wouldn't have existed at all if it had been a sysdev. Is that
correct?
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902270010.38291.rjw@sisk.pl>
@ 2009-02-27 0:00 ` Arve Hjønnevåg
[not found] ` <d6200be20902261600t7531568ekd9a7aa9215f53263@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-27 0:00 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, Linus Torvalds, pm list
On Thu, Feb 26, 2009 at 3:10 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Thursday 26 February 2009, Arve Hjønnevåg wrote:
>> On Thu, Feb 26, 2009 at 2:10 PM, Linus Torvalds
>> <torvalds@linux-foundation.org> wrote:
>> >
>> >
>> > On Thu, 26 Feb 2009, Rafael J. Wysocki wrote:
>> >>
>> >> Well, how exactly the $subject patch does cause this problem to happen?
>> >
>> > Rafael, the problem is that if an interrupt happens while it's disabled -
>> > but before the CPU has actually turned all interrupts off - the CPU will
>> > ACK the interrupt (but just set a flag for it being PENDING), so now the
>> > chipset logic around it will not see it as pending any more, so now the
>> > chipset won't auto-wake the CPU immediately (or more likely, it won't
>> > even suspend it).
>> >
>> > It's trivial to fix multiple ways, so I wouldn't worry. The most trivial
>> > way is to just have some sysdev drievr code simply do something like
>> >
>> > static int sysdev_suspend()
>> > {
>> > for_each_irq(irq,desc) {
>> > if (!(desc->flags & IRQF_WAKE))
>> > continue;
>> > if (desc->flags & IRQ_PENDING)
>> > return -EBUSY;
>> > }
>> > return 0;
>> > }
>> >
>> > and that should automatically mean that if any irq is pending, the suspend
>> > will fail and we'll immediately wake up again.
>> >
>> > It looks trivial, and I don't understand why Arve can't just do the sysdev
>> > thing.
>>
>> I can. My point is that the patch breaks our existing code.
>
> Is that a mainline kernel code?
No, the msm suspend support has not been merged.
>
>> If anyone else uses edge triggered wakeup interrupt it may break from them as
>> well. The main question if this should be fixed separately for every
>> platform that needs it, or if pending wakeup interrupts should always
>> abort sleep.
>
> Well, I'm not really sure if this is the problem. In fact the problem is that
> you have a regular device the interrupt of which can be a wake-up one. I think
Is that not a common case and what enable_irq_wake is for?
> the problem wouldn't have existed at all if it had been a sysdev. Is that
> correct?
How many sysdevs use interrupts?
I found may drivers in the mainline kernel that use enable_irq_wake,
but I did not see any that handle this race condition.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902261600t7531568ekd9a7aa9215f53263@mail.gmail.com>
@ 2009-02-27 0:27 ` Linus Torvalds
0 siblings, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-27 0:27 UTC (permalink / raw)
To: Arve Hjønnevåg
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Thomas Gleixner,
Eric W. Biederman, Ingo Molnar, pm list
On Thu, 26 Feb 2009, Arve Hjønnevåg wrote:
>
> How many sysdevs use interrupts?
>
> I found may drivers in the mainline kernel that use enable_irq_wake,
> but I did not see any that handle this race condition.
The _only_ driver that does enable_irq_wake() on x86 is the cmos timer
driver, and even there it actually doesn't use irq_wake, but ACPI. Why?
Because I don't think irq wakeup even _works_ on x86.
So the whole enable_irq_wake is largely some embedded ARM platform issue,
and a very special case, and doesn't exist anywhere else.
Maybe I'm missing something, but it's definitely not the normal case.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] <alpine.LFD.2.00.0902261620370.3111@localhost.localdomain>
@ 2009-02-27 3:20 ` Alan Stern
0 siblings, 0 replies; 77+ messages in thread
From: Alan Stern @ 2009-02-27 3:20 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Thu, 26 Feb 2009, Linus Torvalds wrote:
> The _only_ driver that does enable_irq_wake() on x86 is the cmos timer
> driver, and even there it actually doesn't use irq_wake, but ACPI. Why?
> Because I don't think irq wakeup even _works_ on x86.
>
> So the whole enable_irq_wake is largely some embedded ARM platform issue,
> and a very special case, and doesn't exist anywhere else.
>
> Maybe I'm missing something, but it's definitely not the normal case.
What you're missing is that the embedded world is quite a large one.
As any member of CELF will tell you, there are lots more embedded
systems around than there are desktop/laptop computers. (I admit, I
don't know what the ratio is if you restrict your attention to systems
running Linux.) We can't afford to regard them as second-class
citizens.
Plenty of embedded systems use normal interrupts from GPIO lines as
wakeup sources. Don't discount the need for this just because desktop
systems don't use them that way. It may not be "normal" in the
circles you're accustomed to, but it _is_ normal elsewhere.
Alan Stern
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] <Pine.LNX.4.44L0.0902262211090.22001-100000@netrider.rowland.org>
@ 2009-02-27 4:43 ` Linus Torvalds
2009-02-27 14:59 ` Alan Stern
0 siblings, 1 reply; 77+ messages in thread
From: Linus Torvalds @ 2009-02-27 4:43 UTC (permalink / raw)
To: Alan Stern
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Thu, 26 Feb 2009, Alan Stern wrote:
>
> What you're missing is that the embedded world is quite a large one.
I'm gpoing to give you one more clue, and if you don't stop sending out
these IDIOTIC emails, I'm going to put you into my killfile.
Got it?
So listen up:
- the number of ARM chips sold doesn't matter one F*CKING WHIT.
- You need to add ONE SINGLE "sysdev" entry for ARM to take care of this
FOR EVERY DAMN SINGLE ONE.
- Your inane whining about this AFTER I HAVE TOLD YOU MULTIPLE TIMES HOW
TO DO IT, AND AFTER I HAVE TOLD YOU THAT IT'S A SPECIAL CASE, IS
F*CKING IRRITATING.
Got it?
I _grepped_ for that enable_irq_wake() use. It looks like it's only used
on ARM and maybe BF. Add the five lines of code (just cut and paste them
from my earlier email) to your architecture already, AND STOP WHINING.
It's not a generic case. It's not a problem. You can damn well fix it in
the ONE SINGLE ARCHITECTURE (or maybe two) that cares. I've told you how.
Why is it so damn hard for you to just accept?
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
2009-02-27 4:43 ` Linus Torvalds
@ 2009-02-27 14:59 ` Alan Stern
0 siblings, 0 replies; 77+ messages in thread
From: Alan Stern @ 2009-02-27 14:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Thu, 26 Feb 2009, Linus Torvalds wrote:
> On Thu, 26 Feb 2009, Alan Stern wrote:
> >
> > What you're missing is that the embedded world is quite a large one.
>
> I'm gpoing to give you one more clue, and if you don't stop sending out
> these IDIOTIC emails, I'm going to put you into my killfile.
>
> Got it?
Whoa!! Hold on there! You got too angry too quickly. I'm Alan Stern,
not Arve Hjønnevåg; that was the first email I've sent on this topic.
And while perhaps it was idiotic, you shouldn't put the blame for it on
Arve.
> So listen up:
> - the number of ARM chips sold doesn't matter one F*CKING WHIT.
> - You need to add ONE SINGLE "sysdev" entry for ARM to take care of this
> FOR EVERY DAMN SINGLE ONE.
> - Your inane whining about this AFTER I HAVE TOLD YOU MULTIPLE TIMES HOW
> TO DO IT, AND AFTER I HAVE TOLD YOU THAT IT'S A SPECIAL CASE, IS
> F*CKING IRRITATING.
>
> Got it?
>
> I _grepped_ for that enable_irq_wake() use. It looks like it's only used
> on ARM and maybe BF. Add the five lines of code (just cut and paste them
> from my earlier email) to your architecture already, AND STOP WHINING.
Really? Let's see (this is using Greg KH's development tree):
$ find . -name '*.[ch]' | xargs grep enable_irq_wake
./drivers/serial/serial_core.c: enable_irq_wake(port->irq);
./drivers/usb/gadget/at91_udc.c: enable_irq_wake(udc->udp_irq);
./drivers/usb/gadget/at91_udc.c: enable_irq_wake(udc->board.vbus_pin);
./drivers/usb/musb/musb_core.c: if (enable_irq_wake(nIrq) == 0) {
./drivers/usb/host/ohci-at91.c: enable_irq_wake(hcd->irq);
./drivers/input/serio/sa1111ps2.c: enable_irq_wake(ps2if->dev->irq[0]);
./drivers/input/keyboard/gpio_keys.c: enable_irq_wake(irq);
./drivers/input/keyboard/pxa27x_keypad.c: enable_irq_wake(keypad->irq);
./drivers/input/keyboard/bf54x-keys.c: enable_irq_wake(bf54x_kpad->irq);
./drivers/pcmcia/at91_cf.c: enable_irq_wake(board->det_pin);
./drivers/pcmcia/at91_cf.c: enable_irq_wake(board->irq_pin);
./drivers/mmc/host/at91_mci.c: enable_irq_wake(host->board->det_pin);
./drivers/mfd/htc-egpio.c: enable_irq_wake(ei->chained_irq);
./drivers/mfd/pcf50633-core.c: if (enable_irq_wake(client->irq) < 0)
./drivers/rtc/rtc-sa1100.c: enable_irq_wake(IRQ_RTCAlrm);
./drivers/rtc/rtc-omap.c: enable_irq_wake(omap_rtc_alarm);
./drivers/rtc/rtc-s3c.c: enable_irq_wake(s3c_rtc_alarmno);
./drivers/rtc/rtc-at91rm9200.c: enable_irq_wake(AT91_ID_SYS);
./drivers/rtc/rtc-cmos.c: enable_irq_wake(cmos->irq);
./drivers/rtc/rtc-bfin.c: enable_irq_wake(IRQ_RTC);
./drivers/rtc/rtc-ds1374.c: enable_irq_wake(client->irq);
./drivers/rtc/rtc-at91sam9.c: enable_irq_wake(AT91_ID_SYS);
./drivers/rtc/rtc-pxa.c: enable_irq_wake(pxa_rtc->irq_Alrm);
./drivers/power/pda_power.c: ac_wakeup_enabled = !enable_irq_wake(ac_irq->start);
./drivers/power/pda_power.c: usb_wakeup_enabled = !enable_irq_wake(usb_irq->start);
./arch/arm/mach-sa1100/neponset.c: enable_irq_wake(IRQ_GPIO25);
./arch/arm/mach-s3c2410/mach-amlm5900.c: enable_irq_wake(IRQ_EINT9);
./arch/arm/mach-omap1/board-osk.c: enable_irq_wake(irq);
./arch/arm/mach-omap1/serial.c: enable_irq_wake(gpio_to_irq(gpio_nr));
./arch/arm/plat-omap/gpio.c: enable_irq_wake(bank->irq);
./arch/arm/plat-omap/gpio.c: enable_irq_wake(bank->irq);
./arch/arm/plat-omap/gpio.c:/* Use disable_irq_wake() and enable_irq_wake() functions from drivers */
./include/linux/interrupt.h:static inline int enable_irq_wake(unsigned int irq)
./include/linux/interrupt.h:static inline int enable_irq_wake(unsigned int irq)
Perhaps these aren't all the sort of usage you're talking about, but I
bet most of them are. It certainly looks like more than just ARM.
Maybe not all that much more, but definitely more. And the number will
only grow in the future.
> It's not a generic case. It's not a problem. You can damn well fix it in
> the ONE SINGLE ARCHITECTURE (or maybe two) that cares. I've told you how.
I'm not arguing with your suggestion; I'm merely disagreeing with your
statement that wakeup interrupts are "definitely not the normal case".
Alan Stern
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] <Pine.LNX.4.44L0.0902270948540.2926-100000@iolanthe.rowland.org>
@ 2009-02-27 20:30 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902271221360.3111@localhost.localdomain>
1 sibling, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-27 20:30 UTC (permalink / raw)
To: Alan Stern
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Fri, 27 Feb 2009, Alan Stern wrote:
>
> Perhaps these aren't all the sort of usage you're talking about, but I
> bet most of them are. It certainly looks like more than just ARM.
> Maybe not all that much more, but definitely more. And the number will
> only grow in the future.
Are you really sure? Because it can't be x86. I'm pretty sure that that is
simply not how x86 wake events _work_ - they're not interrupts.
And that's the big point that people seem to be missing here: the whole
"wake up interrupt" thing is not some generic model in the first place. I
strongly suspect that it literally only works on certain architectures.
In other words, I'm getting damn tired of people who CLEARLY DON'T EVEN
KNOW HOW THE HARDWARE WORKS arguing over this.
Here's another hint: that whole "enable_irq_wake()" - have you possibly
spent even five seconds to look at what it actually does? I bet you
haven't. Because what it does is to call the irq controller "set_wake"
function.
Now, grep for that. Whay do you find? Like maybe ARM and BlackFin? Oh, and
I note one MIPS platform.
The point is, that whole "irq wake" really is system dependent. We can do
helper functions for it, but anybody who thinks it's anything "generic" is
totally mistaken.
In other words, making it a sysdev thing is the CORRECT thing to do. It
really is not just "here's how you work around something". It really is
"this is how the hardware FUNDAMENTALLY WORKS".
Please, stop arguing. At least argue only after you understand what the
physical hardware actyally does.
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <alpine.LFD.2.00.0902271221360.3111@localhost.localdomain>
@ 2009-02-28 3:54 ` Arve Hjønnevåg
[not found] ` <d6200be20902271954q16697282ia056d4f40ab49184@mail.gmail.com>
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-28 3:54 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Fri, Feb 27, 2009 at 12:30 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
>
> On Fri, 27 Feb 2009, Alan Stern wrote:
>>
>> Perhaps these aren't all the sort of usage you're talking about, but I
>> bet most of them are. It certainly looks like more than just ARM.
>> Maybe not all that much more, but definitely more. And the number will
>> only grow in the future.
>
> Are you really sure? Because it can't be x86. I'm pretty sure that that is
> simply not how x86 wake events _work_ - they're not interrupts.
They are not interrupts on every arm platform that implements set_wake
either, but it is useful to pretend that they are. If the platform
code reads the wakeup status and marks the corresponding interrupt
pending, the driver does not need to know if the event occurred before
or after the system entered the low power state. I don't know if this
can be implemented on x86, but it might be worth looking into.
>
> And that's the big point that people seem to be missing here: the whole
> "wake up interrupt" thing is not some generic model in the first place. I
> strongly suspect that it literally only works on certain architectures.
My point was that it was not specific to our platform. I don't have a
problem fixing our platform if this patch is merged, but this is case
where a change to the generic code breaks some platforms. I don't
think there is a good reason to make the fix arm specific, trivial or
not, since any platform implementing set_wake may run into the race
condition that this patch introduced. If the platform does not
implement set_wake, IRQ_WAKEUP never gets set, and the fix should not
have any effect.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <d6200be20902271954q16697282ia056d4f40ab49184@mail.gmail.com>
@ 2009-02-28 10:06 ` Rafael J. Wysocki
[not found] ` <200902281106.41793.rjw@sisk.pl>
1 sibling, 0 replies; 77+ messages in thread
From: Rafael J. Wysocki @ 2009-02-28 10:06 UTC (permalink / raw)
To: Arve Hjønnevåg, Linus Torvalds
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Saturday 28 February 2009, Arve Hjønnevåg wrote:
> On Fri, Feb 27, 2009 at 12:30 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> >
> > On Fri, 27 Feb 2009, Alan Stern wrote:
> >>
> >> Perhaps these aren't all the sort of usage you're talking about, but I
> >> bet most of them are. It certainly looks like more than just ARM.
> >> Maybe not all that much more, but definitely more. And the number will
> >> only grow in the future.
> >
> > Are you really sure? Because it can't be x86. I'm pretty sure that that is
> > simply not how x86 wake events _work_ - they're not interrupts.
>
> They are not interrupts on every arm platform that implements set_wake
> either, but it is useful to pretend that they are. If the platform
> code reads the wakeup status and marks the corresponding interrupt
> pending, the driver does not need to know if the event occurred before
> or after the system entered the low power state. I don't know if this
> can be implemented on x86, but it might be worth looking into.
That would have been a new feature, no? And I don't think anyone except for
you does it. So, what you're saying boils down to "please don't break my new
feature that hasn't been merged yet".
> > And that's the big point that people seem to be missing here: the whole
> > "wake up interrupt" thing is not some generic model in the first place. I
> > strongly suspect that it literally only works on certain architectures.
>
> My point was that it was not specific to our platform. I don't have a
> problem fixing our platform if this patch is merged, but this is case
> where a change to the generic code breaks some platforms.
Quite frankly, I don't really think it will break anything else than your
platform.
Still, if Linus agrees, I can put the loop suggested by him directly into
sysdev_suspend(). Linus?
> I don't think there is a good reason to make the fix arm specific, trivial or
> not, since any platform implementing set_wake may run into the race
> condition that this patch introduced. If the platform does not
> implement set_wake, IRQ_WAKEUP never gets set, and the fix should not
> have any effect.
The point is, if we put anything like this into the generic code, platforms
start to rely on this and it will become more and more difficult to change at
the generic level if need be.
The fact that your platform relies on the generic code to disable IRQs on
the CPU at a particular point shows the mechanism very well. :-)
Thanks,
Rafael
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902281106.41793.rjw@sisk.pl>
@ 2009-02-28 17:03 ` Linus Torvalds
2009-02-28 22:15 ` Arve Hjønnevåg
1 sibling, 0 replies; 77+ messages in thread
From: Linus Torvalds @ 2009-02-28 17:03 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Ingo Molnar
On Sat, 28 Feb 2009, Rafael J. Wysocki wrote:
>
> Still, if Linus agrees, I can put the loop suggested by him directly into
> sysdev_suspend(). Linus?
I don't much care - it's going to be a no-op on architectures that don't
have that kind of "turn an interrupt into a wakeup event" capability. So
it's not going to break for things like x86, and it's not like going over
the irq list one more time is going to be so expensive as to be
noticeable, even if that architecture doesn't ever get any advantage of
it.
However - my main worry is that we will notice that different
architectures (and possibly even different platforms _within_ the same
architecture - depending on which kind of interrupt/pm controller they
have) will want to do different things, and actually do something to the
interrupt controller itself too at that point.
But we can certainly try starting out with just the generic "if a wakeup
interrupt is pending, sysdev_suspend() returns an error immediately".
Linus
^ permalink raw reply [flat|nested] 77+ messages in thread
* Re: [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume
[not found] ` <200902281106.41793.rjw@sisk.pl>
2009-02-28 17:03 ` Linus Torvalds
@ 2009-02-28 22:15 ` Arve Hjønnevåg
1 sibling, 0 replies; 77+ messages in thread
From: Arve Hjønnevåg @ 2009-02-28 22:15 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jeremy Fitzhardinge, LKML, Jesse Barnes, Eric W. Biederman,
pm list, Thomas Gleixner, Linus Torvalds, Ingo Molnar
On Sat, Feb 28, 2009 at 2:06 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Saturday 28 February 2009, Arve Hjønnevåg wrote:
>> On Fri, Feb 27, 2009 at 12:30 PM, Linus Torvalds
>> <torvalds@linux-foundation.org> wrote:
>> >
>> >
>> > On Fri, 27 Feb 2009, Alan Stern wrote:
>> >>
>> >> Perhaps these aren't all the sort of usage you're talking about, but I
>> >> bet most of them are. It certainly looks like more than just ARM.
>> >> Maybe not all that much more, but definitely more. And the number will
>> >> only grow in the future.
>> >
>> > Are you really sure? Because it can't be x86. I'm pretty sure that that is
>> > simply not how x86 wake events _work_ - they're not interrupts.
>>
>> They are not interrupts on every arm platform that implements set_wake
>> either, but it is useful to pretend that they are. If the platform
>> code reads the wakeup status and marks the corresponding interrupt
>> pending, the driver does not need to know if the event occurred before
>> or after the system entered the low power state. I don't know if this
>> can be implemented on x86, but it might be worth looking into.
>
> That would have been a new feature, no? And I don't think anyone except for
> you does it. So, what you're saying boils down to "please don't break my new
> feature that hasn't been merged yet".
I was not referring to our platform, so not this is not a new feature.
>> > And that's the big point that people seem to be missing here: the whole
>> > "wake up interrupt" thing is not some generic model in the first place. I
>> > strongly suspect that it literally only works on certain architectures.
>>
>> My point was that it was not specific to our platform. I don't have a
>> problem fixing our platform if this patch is merged, but this is case
>> where a change to the generic code breaks some platforms.
>
> Quite frankly, I don't really think it will break anything else than your
> platform.
That is quite possible, but do other platforms not break because they
are already broken? I saw no attempt to avoid race conditions on
suspend in the drivers I looked at.
> Still, if Linus agrees, I can put the loop suggested by him directly into
> sysdev_suspend(). Linus?
I vote for this.
>
>> I don't think there is a good reason to make the fix arm specific, trivial or
>> not, since any platform implementing set_wake may run into the race
>> condition that this patch introduced. If the platform does not
>> implement set_wake, IRQ_WAKEUP never gets set, and the fix should not
>> have any effect.
>
> The point is, if we put anything like this into the generic code, platforms
> start to rely on this and it will become more and more difficult to change at
> the generic level if need be.
>
> The fact that your platform relies on the generic code to disable IRQs on
> the CPU at a particular point shows the mechanism very well. :-)
If the generic code did not clear the interrupt I think this would be
a stronger point. Since our hardware does not have a mask register,
only enable, I find this feature (leave the interrupt enabled, and
mask it only if it triggers) of the generic interrupt code quite
useful. If the generic code did not do this, the platform code would
have to.
--
Arve Hjønnevåg
^ permalink raw reply [flat|nested] 77+ messages in thread
end of thread, other threads:[~2009-02-28 22:15 UTC | newest]
Thread overview: 77+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <Pine.LNX.4.44L0.0902251027270.3373-100000@iolanthe.rowland.org>
2009-02-25 16:19 ` [RFC][PATCH 2/2] PM: Rework handling of interrupts during suspend-resume Linus Torvalds
[not found] <Pine.LNX.4.44L0.0902270948540.2926-100000@iolanthe.rowland.org>
2009-02-27 20:30 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902271221360.3111@localhost.localdomain>
2009-02-28 3:54 ` Arve Hjønnevåg
[not found] ` <d6200be20902271954q16697282ia056d4f40ab49184@mail.gmail.com>
2009-02-28 10:06 ` Rafael J. Wysocki
[not found] ` <200902281106.41793.rjw@sisk.pl>
2009-02-28 17:03 ` Linus Torvalds
2009-02-28 22:15 ` Arve Hjønnevåg
[not found] <Pine.LNX.4.44L0.0902262211090.22001-100000@netrider.rowland.org>
2009-02-27 4:43 ` Linus Torvalds
2009-02-27 14:59 ` Alan Stern
[not found] <alpine.LFD.2.00.0902261620370.3111@localhost.localdomain>
2009-02-27 3:20 ` Alan Stern
2009-02-22 17:37 [RFC][PATCH 0/2] Rework disabling " Rafael J. Wysocki
2009-02-22 17:39 ` [RFC][PATCH 2/2] PM: Rework handling " Rafael J. Wysocki
[not found] ` <200902221839.50357.rjw@sisk.pl>
2009-02-22 18:01 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902220946070.3111@localhost.localdomain>
2009-02-22 22:42 ` Rafael J. Wysocki
[not found] ` <200902222342.08285.rjw@sisk.pl>
2009-02-22 23:48 ` Rafael J. Wysocki
[not found] ` <200902230048.33635.rjw@sisk.pl>
2009-02-23 0:05 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902221603440.3111@localhost.localdomain>
2009-02-23 1:23 ` Linus Torvalds
2009-02-23 3:04 ` Eric W. Biederman
2009-02-23 8:36 ` Ingo Molnar
[not found] ` <m1ab8ddfbr.fsf@fess.ebiederm.org>
2009-02-23 8:44 ` Ingo Molnar
[not found] ` <20090223084400.GB9582@elte.hu>
2009-02-23 9:22 ` Eric W. Biederman
[not found] ` <m17i3ha4nk.fsf@fess.ebiederm.org>
2009-02-23 9:44 ` Ingo Molnar
2009-02-23 10:13 ` Benjamin Herrenschmidt
[not found] ` <20090223094441.GM9582@elte.hu>
2009-02-23 10:42 ` Eric W. Biederman
[not found] ` <m1bpst77te.fsf@fess.ebiederm.org>
2009-02-23 11:03 ` Rafael J. Wysocki
2009-02-23 11:04 ` Ingo Molnar
[not found] ` <20090223110433.GA17312@elte.hu>
2009-02-23 14:45 ` Rafael J. Wysocki
[not found] ` <200902231545.26250.rjw@sisk.pl>
2009-02-23 15:06 ` Ingo Molnar
2009-02-23 21:59 ` Rafael J. Wysocki
[not found] ` <200902231203.09082.rjw@sisk.pl>
2009-02-23 15:28 ` Eric W. Biederman
[not found] ` <m18wnx41h5.fsf@fess.ebiederm.org>
2009-02-23 21:39 ` Rafael J. Wysocki
[not found] ` <200902232239.05926.rjw@sisk.pl>
2009-02-24 3:30 ` Eric W. Biederman
[not found] ` <m1iqn01ph3.fsf@fess.ebiederm.org>
2009-02-24 22:42 ` Rafael J. Wysocki
[not found] ` <200902242342.07721.rjw@sisk.pl>
2009-02-24 22:51 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902241449221.3111@localhost.localdomain>
2009-02-24 23:07 ` Rafael J. Wysocki
[not found] ` <200902250007.13069.rjw@sisk.pl>
2009-02-24 23:09 ` Ingo Molnar
[not found] ` <20090224230935.GA15165@elte.hu>
2009-02-24 23:29 ` Rafael J. Wysocki
[not found] ` <200902250029.16107.rjw@sisk.pl>
2009-02-25 13:23 ` Ingo Molnar
2009-02-26 1:17 ` Arve Hjønnevåg
[not found] ` <d6200be20902251717r531c022bl4c25fcb902201188@mail.gmail.com>
2009-02-26 1:27 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251724220.3111@localhost.localdomain>
2009-02-26 2:13 ` Arve Hjønnevåg
[not found] ` <d6200be20902251813o3ff4a065nb0a54321e27c512d@mail.gmail.com>
2009-02-26 2:51 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251848120.3111@localhost.localdomain>
2009-02-26 3:00 ` Ingo Molnar
[not found] ` <20090226030050.GA3361@elte.hu>
2009-02-26 3:31 ` Arve Hjønnevåg
[not found] ` <d6200be20902251931lf563c2aw43d5266e9677e168@mail.gmail.com>
2009-02-26 3:37 ` Linus Torvalds
2009-02-26 3:50 ` Arve Hjønnevåg
[not found] ` <d6200be20902251950l34d5968q4b553ce8305ebd4b@mail.gmail.com>
2009-02-26 3:57 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902251956450.3111@localhost.localdomain>
2009-02-26 4:13 ` Arve Hjønnevåg
[not found] ` <d6200be20902252013v366ce771w8a129e9f3b75927f@mail.gmail.com>
2009-02-26 4:20 ` Eric W. Biederman
[not found] ` <m1y6vtkewc.fsf@fess.ebiederm.org>
2009-02-26 4:24 ` Arve Hjønnevåg
2009-02-26 9:50 ` Rafael J. Wysocki
[not found] ` <200902261050.50531.rjw@sisk.pl>
2009-02-26 20:34 ` Arve Hjønnevåg
[not found] ` <d6200be20902261234i5a3896fo78a7f56d16a247c7@mail.gmail.com>
2009-02-26 20:57 ` Benjamin Herrenschmidt
2009-02-26 21:20 ` Arve Hjønnevåg
[not found] ` <d6200be20902261320q58a347e5s1af790e56b186c81@mail.gmail.com>
2009-02-26 21:49 ` Benjamin Herrenschmidt
2009-02-26 21:58 ` Rafael J. Wysocki
[not found] ` <200902262258.55835.rjw@sisk.pl>
2009-02-26 22:10 ` Linus Torvalds
[not found] ` <alpine.LFD.2.00.0902261404580.3111@localhost.localdomain>
2009-02-26 22:30 ` Arve Hjønnevåg
2009-02-26 22:30 ` Rafael J. Wysocki
[not found] ` <d6200be20902261430w19ea3e6fye014f8443d9ab5ca@mail.gmail.com>
2009-02-26 23:10 ` Rafael J. Wysocki
[not found] ` <200902270010.38291.rjw@sisk.pl>
2009-02-27 0:00 ` Arve Hjønnevåg
[not found] ` <d6200be20902261600t7531568ekd9a7aa9215f53263@mail.gmail.com>
2009-02-27 0:27 ` Linus Torvalds
2009-02-25 4:16 ` Eric W. Biederman
[not found] ` <m18wnvup53.fsf@fess.ebiederm.org>
2009-02-25 4:26 ` Linus Torvalds
2009-02-25 4:59 ` Eric W. Biederman
2009-02-25 15:32 ` Alan Stern
[not found] ` <20090223083645.GA9582@elte.hu>
2009-02-23 11:29 ` Rafael J. Wysocki
[not found] ` <200902231229.58743.rjw@sisk.pl>
2009-02-23 12:28 ` Ingo Molnar
2009-02-23 12:45 ` Ingo Molnar
[not found] ` <20090223122832.GB31427@elte.hu>
2009-02-23 14:48 ` Rafael J. Wysocki
2009-02-23 20:49 ` Benjamin Herrenschmidt
[not found] ` <20090223124503.GC31427@elte.hu>
2009-02-23 15:07 ` Rafael J. Wysocki
2009-02-23 15:52 ` Johannes Berg
2009-02-23 17:16 ` Ingo Molnar
[not found] ` <20090223171630.GA28651@elte.hu>
2009-02-23 17:28 ` Linus Torvalds
2009-02-23 22:11 ` Rafael J. Wysocki
2009-02-23 22:11 ` Arve Hjønnevåg
[not found] ` <d6200be20902231411s654f8b3csf1388a97f3f94ac4@mail.gmail.com>
2009-02-23 22:23 ` Rafael J. Wysocki
2009-02-23 22:44 ` Arve Hjønnevåg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox