[update, rev. 6] Re: [PATCH 1/10] PM: Rework handling of interrupts during suspend-resume (rev. 5)

From: Rafael J. Wysocki
Date: Thu Mar 12 2009 - 17:44:04 EST


On Thursday 12 March 2009, Rafael J. Wysocki wrote:
> On Wednesday 11 March 2009, Thomas Gleixner wrote:
> > On Wed, 11 Mar 2009, Thomas Gleixner wrote:
> > > On Wed, 11 Mar 2009, Rafael J. Wysocki wrote:
> > > > On Wednesday 11 March 2009, Thomas Gleixner wrote:
> > > > > > +EXPORT_SYMBOL_GPL(suspend_device_irqs);
> > > > >
> > > > > I'm not too enthusiastic about this open coded implementation of
> > > > > disable_irq() with slightly different semantics.
> > > >
> > > > The difference in semantics is important IMO, otherwise I woulndn't have
> > > > done that. In particular, IMO, the condition should be under the spinlock IMO
> > > > and I'd rather not synchronize all interrupts we don't really disable here.
> > >
> > > I don't say that the difference is not relevant. But the code is
> > > almost the same and disable_irq() could have the sync_irq optimization
> > > as well.
> >
> > Thought more about that. Avoiding the sync_irq() for irqs which have
> > no action associated is fine, but you need to catch the following case
> > as well:
> >
> > driver code calls disable_irq_nosyc() from the handler (which is
> > still running)
> >
> > suspend code skips the sync due to depth > 0
> >
> > The sync operation is not that expensive.
>
> OK, what about this (untested, irrelevant parts skipped)?

Well, I guess I need to assume that no reaction means it's fine. ;-)

Below is the complete patch. Thomas, Ingo, please let me know it it is fine
with you.

Thanks,
Rafael

---
From: Rafael J. Wysocki <rjw@xxxxxxx>
Subject: PM: Rework handling of interrupts during suspend-resume (rev. 6)

Introduce two helper functions allowing us to prevent device drivers
from getting any interrupts (without disabling interrupts on the CPU)
during suspend (or hibernation) and to make them start to receive
interrupts again during the subsequent resume, respectively. These
functions make it possible to keep timer interrupts enabled while the
"late" suspend and "early" resume callbacks provided by device
drivers are being executed.

Use these functions to rework the handling of interrupts during
suspend (hibernation) and resume. Namely, interrupts will only be
disabled on the CPU right before suspending sysdevs, while device
drivers will be prevented from receiving interrupts, with the help of
the new helper function, before their "late" suspend callbacks run
(and analogously during resume).

In addition, since the device interrups are now disabled before the
CPU has turned all interrupts off and the CPU will ACK the interrupts
setting the IRQ_PENDING bit for them, check in sysdev_suspend() if
any wake-up interrupts are pending and abort suspend if that's the
case.

Signed-off-by: Rafael J. Wysocki <rjw@xxxxxxx>
---
arch/x86/kernel/apm_32.c | 15 ++++++--
drivers/base/power/main.c | 20 ++++++-----
drivers/base/sys.c | 8 ++++
drivers/xen/manage.c | 16 +++++----
include/linux/interrupt.h | 5 ++
include/linux/irq.h | 1
kernel/irq/Makefile | 1
kernel/irq/internals.h | 2 +
kernel/irq/manage.c | 31 +++++++++++++-----
kernel/irq/pm.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/kexec.c | 8 ++--
kernel/power/disk.c | 39 ++++++++++++++++------
kernel/power/main.c | 17 ++++++---
13 files changed, 195 insertions(+), 47 deletions(-)

Index: linux-2.6/include/linux/interrupt.h
===================================================================
--- linux-2.6.orig/include/linux/interrupt.h
+++ linux-2.6/include/linux/interrupt.h
@@ -106,6 +106,11 @@ extern void disable_irq_nosync(unsigned
extern void disable_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);

+/* The following three functions are for the core kernel use only. */
+extern void suspend_device_irqs(void);
+extern void resume_device_irqs(void);
+extern int check_wakeup_irqs(void);
+
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)

extern cpumask_var_t irq_default_affinity;
Index: linux-2.6/kernel/power/main.c
===================================================================
--- linux-2.6.orig/kernel/power/main.c
+++ linux-2.6/kernel/power/main.c
@@ -287,17 +287,19 @@ void __attribute__ ((weak)) arch_suspend
*/
static int suspend_enter(suspend_state_t state)
{
- int error = 0;
+ int error;

device_pm_lock();
- arch_suspend_disable_irqs();
- BUG_ON(!irqs_disabled());

- if ((error = device_power_down(PMSG_SUSPEND))) {
+ error = device_power_down(PMSG_SUSPEND);
+ if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Done;
}

+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
if (!suspend_test(TEST_CORE))
@@ -305,11 +307,14 @@ static int suspend_enter(suspend_state_t
sysdev_resume();
}

- device_power_up(PMSG_RESUME);
- Done:
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
+
+ device_power_up(PMSG_RESUME);
+
+ Done:
device_pm_unlock();
+
return error;
}

Index: linux-2.6/kernel/power/disk.c
===================================================================
--- linux-2.6.orig/kernel/power/disk.c
+++ linux-2.6/kernel/power/disk.c
@@ -214,7 +214,7 @@ static int create_image(int platform_mod
return error;

device_pm_lock();
- local_irq_disable();
+
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -225,8 +225,11 @@ static int create_image(int platform_mod
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting hibernation\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_FREEZE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
@@ -252,12 +255,16 @@ static int create_image(int platform_mod
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
+
Power_up_devices:
+ local_irq_enable();
+
device_power_up(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
- Enable_irqs:
- local_irq_enable();
+
+ Unlock:
device_pm_unlock();
+
return error;
}

@@ -336,13 +343,16 @@ static int resume_target_kernel(void)
int error;

device_pm_lock();
- local_irq_disable();
+
error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
- goto Enable_irqs;
+ goto Unlock;
}
+
+ local_irq_disable();
+
sysdev_suspend(PMSG_QUIESCE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
@@ -366,11 +376,16 @@ static int resume_target_kernel(void)
swsusp_free();
restore_processor_state();
touch_softlockup_watchdog();
+
sysdev_resume();
- device_power_up(PMSG_RECOVER);
- Enable_irqs:
+
local_irq_enable();
+
+ device_power_up(PMSG_RECOVER);
+
+ Unlock:
device_pm_unlock();
+
return error;
}

@@ -447,15 +462,16 @@ int hibernation_platform_enter(void)
goto Finish;

device_pm_lock();
- local_irq_disable();
+
error = device_power_down(PMSG_HIBERNATE);
if (!error) {
+ local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
hibernation_ops->enter();
/* We should never get here */
while (1);
}
- local_irq_enable();
+
device_pm_unlock();

/*
@@ -464,12 +480,15 @@ int hibernation_platform_enter(void)
*/
Finish:
hibernation_ops->finish();
+
Resume_devices:
entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
+
Close:
hibernation_ops->end();
+
return error;
}

Index: linux-2.6/arch/x86/kernel/apm_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apm_32.c
+++ linux-2.6/arch/x86/kernel/apm_32.c
@@ -1190,8 +1190,10 @@ static int suspend(int vetoable)
struct apm_user *as;

device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);

local_irq_enable();
@@ -1209,9 +1211,12 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
+
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
+
device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
@@ -1228,8 +1233,9 @@ static void standby(void)
{
int err;

- local_irq_disable();
device_power_down(PMSG_SUSPEND);
+
+ local_irq_disable();
sysdev_suspend(PMSG_SUSPEND);
local_irq_enable();

@@ -1239,8 +1245,9 @@ static void standby(void)

local_irq_disable();
sysdev_resume();
- device_power_up(PMSG_RESUME);
local_irq_enable();
+
+ device_power_up(PMSG_RESUME);
}

static apm_event_t get_event(void)
Index: linux-2.6/drivers/xen/manage.c
===================================================================
--- linux-2.6.orig/drivers/xen/manage.c
+++ linux-2.6/drivers/xen/manage.c
@@ -39,12 +39,6 @@ static int xen_suspend(void *data)

BUG_ON(!irqs_disabled());

- err = device_power_down(PMSG_SUSPEND);
- if (err) {
- printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
- err);
- return err;
- }
err = sysdev_suspend(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
@@ -69,7 +63,6 @@ static int xen_suspend(void *data)
xen_mm_unpin_all();

sysdev_resume();
- device_power_up(PMSG_RESUME);

if (!*cancelled) {
xen_irq_resume();
@@ -108,6 +101,12 @@ static void do_suspend(void)
/* XXX use normal device tree? */
xenbus_suspend();

+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "device_power_down failed: %d\n", err);
+ goto resume_devices;
+ }
+
err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
@@ -120,6 +119,9 @@ static void do_suspend(void)
} else
xenbus_suspend_cancel();

+ device_power_up(PMSG_RESUME);
+
+resume_devices:
device_resume(PMSG_RESUME);

/* Make sure timer events get retriggered on all CPUs */
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c
+++ linux-2.6/kernel/kexec.c
@@ -1454,7 +1454,6 @@ int kernel_kexec(void)
if (error)
goto Resume_devices;
device_pm_lock();
- local_irq_disable();
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
* device_power_down() now. Otherwise, drivers for
@@ -1464,8 +1463,9 @@ int kernel_kexec(void)
*/
error = device_power_down(PMSG_FREEZE);
if (error)
- goto Enable_irqs;
+ goto Unlock_pm;

+ local_irq_disable();
/* Suspend system devices */
error = sysdev_suspend(PMSG_FREEZE);
if (error)
@@ -1484,9 +1484,9 @@ int kernel_kexec(void)
if (kexec_image->preserve_context) {
sysdev_resume();
Power_up_devices:
- device_power_up(PMSG_RESTORE);
- Enable_irqs:
local_irq_enable();
+ device_power_up(PMSG_RESTORE);
+ Unlock_pm:
device_pm_unlock();
enable_nonboot_cpus();
Resume_devices:
Index: linux-2.6/include/linux/irq.h
===================================================================
--- linux-2.6.orig/include/linux/irq.h
+++ linux-2.6/include/linux/irq.h
@@ -65,6 +65,7 @@ typedef void (*irq_flow_handler_t)(unsig
#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
+#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */

#ifdef CONFIG_IRQ_PER_CPU
# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
Index: linux-2.6/kernel/irq/pm.c
===================================================================
--- /dev/null
+++ linux-2.6/kernel/irq/pm.c
@@ -0,0 +1,79 @@
+/*
+ * linux/kernel/irq/pm.c
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@xxxxxxx>, Novell Inc.
+ *
+ * This file contains power management functions related to interrupts.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+/**
+ * suspend_device_irqs - disable all currently enabled interrupt lines
+ *
+ * During system-wide suspend or hibernation device interrupts need to be
+ * disabled at the chip level and this function is provided for this purpose.
+ * It disables all interrupt lines that are enabled at the moment and sets the
+ * IRQ_SUSPENDED flag for them.
+ */
+void suspend_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ __disable_irq(desc, irq, true);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ for_each_irq_desc(irq, desc)
+ if (desc->status & IRQ_SUSPENDED)
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(suspend_device_irqs);
+
+/**
+ * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
+ *
+ * Enable all interrupt lines previously disabled by suspend_device_irqs() that
+ * have the IRQ_SUSPENDED flag set.
+ */
+void resume_device_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc) {
+ unsigned long flags;
+
+ if (!(desc->status & IRQ_SUSPENDED))
+ continue;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ __enable_irq(desc, irq, true);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+}
+EXPORT_SYMBOL_GPL(resume_device_irqs);
+
+/**
+ * check_wakeup_irqs - check if any wake-up interrupts are pending
+ */
+int check_wakeup_irqs(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ for_each_irq_desc(irq, desc)
+ if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING))
+ return -EBUSY;
+
+ return 0;
+}
Index: linux-2.6/kernel/irq/Makefile
===================================================================
--- linux-2.6.orig/kernel/irq/Makefile
+++ linux-2.6/kernel/irq/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autop
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_PM_SLEEP) += pm.o
Index: linux-2.6/kernel/irq/manage.c
===================================================================
--- linux-2.6.orig/kernel/irq/manage.c
+++ linux-2.6/kernel/irq/manage.c
@@ -162,6 +162,20 @@ static inline int do_irq_select_affinity
}
#endif

+void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+{
+ if (suspend) {
+ if (desc->action && (desc->action->flags & IRQF_TIMER))
+ return;
+ desc->status |= IRQ_SUSPENDED;
+ }
+
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->chip->disable(irq);
+ }
+}
+
/**
* disable_irq_nosync - disable an irq without waiting
* @irq: Interrupt to disable
@@ -182,10 +196,7 @@ void disable_irq_nosync(unsigned int irq
return;

spin_lock_irqsave(&desc->lock, flags);
- if (!desc->depth++) {
- desc->status |= IRQ_DISABLED;
- desc->chip->disable(irq);
- }
+ __disable_irq(desc, irq, false);
spin_unlock_irqrestore(&desc->lock, flags);
}
EXPORT_SYMBOL(disable_irq_nosync);
@@ -215,15 +226,21 @@ void disable_irq(unsigned int irq)
}
EXPORT_SYMBOL(disable_irq);

-static void __enable_irq(struct irq_desc *desc, unsigned int irq)
+void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
{
+ if (resume)
+ desc->status &= ~IRQ_SUSPENDED;
+
switch (desc->depth) {
case 0:
+ err_out:
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
break;
case 1: {
unsigned int status = desc->status & ~IRQ_DISABLED;

+ if (desc->status & IRQ_SUSPENDED)
+ goto err_out;
/* Prevent probing on this irq: */
desc->status = status | IRQ_NOPROBE;
check_irq_resend(desc, irq);
@@ -253,7 +270,7 @@ void enable_irq(unsigned int irq)
return;

spin_lock_irqsave(&desc->lock, flags);
- __enable_irq(desc, irq);
+ __enable_irq(desc, irq, false);
spin_unlock_irqrestore(&desc->lock, flags);
}
EXPORT_SYMBOL(enable_irq);
@@ -511,7 +528,7 @@ __setup_irq(unsigned int irq, struct irq
*/
if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) {
desc->status &= ~IRQ_SPURIOUS_DISABLED;
- __enable_irq(desc, irq);
+ __enable_irq(desc, irq, false);
}

spin_unlock_irqrestore(&desc->lock, flags);
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -23,6 +23,7 @@
#include <linux/pm.h>
#include <linux/resume-trace.h>
#include <linux/rwsem.h>
+#include <linux/interrupt.h>

#include "../base.h"
#include "power.h"
@@ -305,7 +306,8 @@ static int resume_device_noirq(struct de
* Execute the appropriate "noirq resume" callback for all devices marked
* as DPM_OFF_IRQ.
*
- * Must be called with interrupts disabled and only one CPU running.
+ * Must be called under dpm_list_mtx. Device drivers should not receive
+ * interrupts while it's being executed.
*/
static void dpm_power_up(pm_message_t state)
{
@@ -326,14 +328,13 @@ static void dpm_power_up(pm_message_t st
* device_power_up - Turn on all devices that need special attention.
* @state: PM transition of the system being carried out.
*
- * Power on system devices, then devices that required we shut them down
- * with interrupts disabled.
- *
- * Must be called with interrupts disabled.
+ * Call the "early" resume handlers and enable device drivers to receive
+ * interrupts.
*/
void device_power_up(pm_message_t state)
{
dpm_power_up(state);
+ resume_device_irqs();
}
EXPORT_SYMBOL_GPL(device_power_up);

@@ -558,16 +559,17 @@ static int suspend_device_noirq(struct d
* device_power_down - Shut down special devices.
* @state: PM transition of the system being carried out.
*
- * Power down devices that require interrupts to be disabled.
- * Then power down system devices.
+ * Prevent device drivers from receiving interrupts and call the "late"
+ * suspend handlers.
*
- * Must be called with interrupts disabled and only one CPU running.
+ * Must be called under dpm_list_mtx.
*/
int device_power_down(pm_message_t state)
{
struct device *dev;
int error = 0;

+ suspend_device_irqs();
list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
error = suspend_device_noirq(dev, state);
if (error) {
@@ -577,7 +579,7 @@ int device_power_down(pm_message_t state
dev->power.status = DPM_OFF_IRQ;
}
if (error)
- dpm_power_up(resume_event(state));
+ device_power_up(resume_event(state));
return error;
}
EXPORT_SYMBOL_GPL(device_power_down);
Index: linux-2.6/drivers/base/sys.c
===================================================================
--- linux-2.6.orig/drivers/base/sys.c
+++ linux-2.6/drivers/base/sys.c
@@ -22,6 +22,7 @@
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/mutex.h>
+#include <linux/interrupt.h>

#include "base.h"

@@ -369,6 +370,13 @@ int sysdev_suspend(pm_message_t state)
struct sysdev_driver *drv, *err_drv;
int ret;

+ pr_debug("Checking wake-up interrupts\n");
+
+ /* Return error code if there are any wake-up interrupts pending */
+ ret = check_wakeup_irqs();
+ if (ret)
+ return ret;
+
pr_debug("Suspending System Devices\n");

list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
Index: linux-2.6/kernel/irq/internals.h
===================================================================
--- linux-2.6.orig/kernel/irq/internals.h
+++ linux-2.6/kernel/irq/internals.h
@@ -12,6 +12,8 @@ extern void compat_irq_chip_set_default_

extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);

extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/