Re: Suspend-resume failure on Intel Eagle Lake Core2Duo

From: Tomi Sarvela
Date: Fri Jul 28 2017 - 08:31:12 EST


On 28/07/17 00:08, Thomas Gleixner wrote:
On Thu, 27 Jul 2017, Thomas Gleixner wrote:
On Thu, 27 Jul 2017, Thomas Gleixner wrote:
On Thu, 27 Jul 2017, Tomi Sarvela wrote:
On 27/07/17 10:42, Thomas Gleixner wrote:
On Thu, 27 Jul 2017, Tomi Sarvela wrote:
On 26/07/17 17:26, Thomas Gleixner wrote:
So reverting that commit does not help. Does it help on your machine?

Yes. Reverting it does not cause the machine to lock up on resume.

I haven't tested if the machine locks up later on, but at least it
survives
couple of s/r cycles.

Can you please try to add 'nohpet' to the kernel command line?

Option nohpet didn't change anything, still hangs on s/r.

Ok. Was a shot in the dark. I tried on a similar machine, but that one
resumes fine (except that the AHCI controller plays silly buggers, but
nothing interrupt related). I might have access to another core2duo machine
tomorrow.

I'll send you a debug patch shortly, but can you please first check when
the wreckage happens by testing the states in

/sys/power/pm_test

freezer
devices
platform
processors
core

Actually for suspend to ram we only have

freezer, devices, platform

I assume it's platform because that is where the actual interrupt
suspend/resume happens.

If that survives, then it's the low level architecture s/r code which
fiddles with the interrupt controllers and leaves them in a state which is
not known to the core code.

Debug patch below. It should make the machine resume again. Emphasis on
"should". Please provide the output of /sys/kernel/debug/tracing/trace
after resume.

The patch didn't apply cleanly: can you tell exact commit or tag it has been created against? I tried to hand-wrangle the changes in, but then I got compilation errors:

CC ipc/compat.o
+0x0): multiple definition of `irq_suspend_resume'
kernel/irq/irqdesc.o:(.bss+0x0): first defined here
kernel/irq/manage.o:(.bss+0x8): multiple definition of `irq_suspend_resume'
kernel/irq/irqdesc.o:/home/testrunner/drm-tip/kernel/irq/irqdesc.c:270: first defined here
kernel/irq/spurious.o:(.bss+0x0): multiple definition of `irq_suspend_resume'

Also, the usage of /sys/power/pm_test was not intuitive to me. Can you explain which kind of combinations do you want to test?

Best regards,

Tomi


Thanks,

tglx

8<-----------

--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -304,7 +304,10 @@ void irq_shutdown(struct irq_desc *desc)
void irq_enable(struct irq_desc *desc)
{
- if (!irqd_irq_disabled(&desc->irq_data)) {
+ if (irq_suspend_resume)
+ irq_trace_state("preenable", desc);
+
+ if (!irqd_irq_disabled(&desc->irq_data) && !irq_suspend_resume) {
unmask_irq(desc);
} else {
irq_state_clr_disabled(desc);
@@ -315,10 +318,16 @@ void irq_enable(struct irq_desc *desc)
unmask_irq(desc);
}
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postenable", desc);
}
static void __irq_disable(struct irq_desc *desc, bool mask)
{
+ if (irq_suspend_resume)
+ irq_trace_state("predisable", desc);
+
if (irqd_irq_disabled(&desc->irq_data)) {
if (mask)
mask_irq(desc);
@@ -331,6 +340,9 @@ static void __irq_disable(struct irq_des
mask_irq(desc);
}
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postdisable", desc);
}
/**
@@ -390,6 +402,9 @@ static inline void mask_ack_irq(struct i
void mask_irq(struct irq_desc *desc)
{
+ if (irq_suspend_resume)
+ irq_trace_state("premask", desc);
+
if (irqd_irq_masked(&desc->irq_data))
return;
@@ -397,17 +412,26 @@ void mask_irq(struct irq_desc *desc)
desc->irq_data.chip->irq_mask(&desc->irq_data);
irq_state_set_masked(desc);
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postmask", desc);
}
void unmask_irq(struct irq_desc *desc)
{
- if (!irqd_irq_masked(&desc->irq_data))
+ if (irq_suspend_resume)
+ irq_trace_state("preunmask", desc);
+
+ if (!irqd_irq_masked(&desc->irq_data) && !irq_suspend_resume)
return;
if (desc->irq_data.chip->irq_unmask) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
irq_state_clr_masked(desc);
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postunmask", desc);
}
void unmask_threaded_irq(struct irq_desc *desc)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -459,3 +459,11 @@ static inline void irq_remove_debugfs_en
{
}
#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
+
+bool irq_suspend_resume;
+
+static inline void irq_trace_state(const char *what, struct irq_desc *desc)
+{
+ trace_printk("%s %d state %08x\n", what, irq_desc_get_irq(desc),
+ irqd_get(&desc->irq_data));
+}
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -14,6 +14,8 @@
#include "internals.h"
+bool irq_suspend_resume;
+
bool irq_pm_check_wakeup(struct irq_desc *desc)
{
if (irqd_is_wakeup_armed(&desc->irq_data)) {
@@ -120,6 +122,7 @@ void suspend_device_irqs(void)
struct irq_desc *desc;
int irq;
+ irq_suspend_resume = true;
for_each_irq_desc(irq, desc) {
unsigned long flags;
bool sync;
@@ -127,7 +130,9 @@ void suspend_device_irqs(void)
if (irq_settings_is_nested_thread(desc))
continue;
raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_trace_state("presuspend", desc);
sync = suspend_device_irq(desc);
+ irq_trace_state("postsuspend", desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
if (sync)
@@ -172,9 +177,14 @@ static void resume_irqs(bool want_early)
continue;
raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_trace_state("preresume", desc);
resume_irq(desc);
+ irq_trace_state("postresume", desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+
+ if (!want_early)
+ irq_suspend_resume = false;
}
/**

--
Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo