Re: Suspend-resume failure on Intel Eagle Lake Core2Duo

From: Thomas Gleixner
Date: Thu Jul 27 2017 - 17:08:22 EST


On Thu, 27 Jul 2017, Thomas Gleixner wrote:
> On Thu, 27 Jul 2017, Thomas Gleixner wrote:
> > On Thu, 27 Jul 2017, Tomi Sarvela wrote:
> >
> > > On 27/07/17 10:42, Thomas Gleixner wrote:
> > > > On Thu, 27 Jul 2017, Tomi Sarvela wrote:
> > > > > On 26/07/17 17:26, Thomas Gleixner wrote:
> > > > > > So reverting that commit does not help. Does it help on your machine?
> > > > >
> > > > > Yes. Reverting it does not cause the machine to lock up on resume.
> > > > >
> > > > > I haven't tested if the machine locks up later on, but at least it
> > > > > survives
> > > > > couple of s/r cycles.
> > > >
> > > > Can you please try to add 'nohpet' to the kernel command line?
> > >
> > > Option nohpet didn't change anything, still hangs on s/r.
> >
> > Ok. Was a shot in the dark. I tried on a similar machine, but that one
> > resumes fine (except that the AHCI controller plays silly buggers, but
> > nothing interrupt related). I might have access to another core2duo machine
> > tomorrow.
> >
> > I'll send you a debug patch shortly, but can you please first check when
> > the wreckage happens by testing the states in
> >
> > /sys/power/pm_test
> >
> > freezer
> > devices
> > platform
> > processors
> > core
>
> Actually for suspend to ram we only have
>
> freezer, devices, platform
>
> I assume it's platform because that is where the actual interrupt
> suspend/resume happens.
>
> If that survives, then it's the low level architecture s/r code which
> fiddles with the interrupt controllers and leaves them in a state which is
> not known to the core code.

Debug patch below. It should make the machine resume again. Emphasis on
"should". Please provide the output of /sys/kernel/debug/tracing/trace
after resume.

Thanks,

tglx

8<-----------

--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -304,7 +304,10 @@ void irq_shutdown(struct irq_desc *desc)

void irq_enable(struct irq_desc *desc)
{
- if (!irqd_irq_disabled(&desc->irq_data)) {
+ if (irq_suspend_resume)
+ irq_trace_state("preenable", desc);
+
+ if (!irqd_irq_disabled(&desc->irq_data) && !irq_suspend_resume) {
unmask_irq(desc);
} else {
irq_state_clr_disabled(desc);
@@ -315,10 +318,16 @@ void irq_enable(struct irq_desc *desc)
unmask_irq(desc);
}
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postenable", desc);
}

static void __irq_disable(struct irq_desc *desc, bool mask)
{
+ if (irq_suspend_resume)
+ irq_trace_state("predisable", desc);
+
if (irqd_irq_disabled(&desc->irq_data)) {
if (mask)
mask_irq(desc);
@@ -331,6 +340,9 @@ static void __irq_disable(struct irq_des
mask_irq(desc);
}
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postdisable", desc);
}

/**
@@ -390,6 +402,9 @@ static inline void mask_ack_irq(struct i

void mask_irq(struct irq_desc *desc)
{
+ if (irq_suspend_resume)
+ irq_trace_state("premask", desc);
+
if (irqd_irq_masked(&desc->irq_data))
return;

@@ -397,17 +412,26 @@ void mask_irq(struct irq_desc *desc)
desc->irq_data.chip->irq_mask(&desc->irq_data);
irq_state_set_masked(desc);
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postmask", desc);
}

void unmask_irq(struct irq_desc *desc)
{
- if (!irqd_irq_masked(&desc->irq_data))
+ if (irq_suspend_resume)
+ irq_trace_state("preunmask", desc);
+
+ if (!irqd_irq_masked(&desc->irq_data) && !irq_suspend_resume)
return;

if (desc->irq_data.chip->irq_unmask) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
irq_state_clr_masked(desc);
}
+
+ if (irq_suspend_resume)
+ irq_trace_state("postunmask", desc);
}

void unmask_threaded_irq(struct irq_desc *desc)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -459,3 +459,11 @@ static inline void irq_remove_debugfs_en
{
}
#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
+
+bool irq_suspend_resume;
+
+static inline void irq_trace_state(const char *what, struct irq_desc *desc)
+{
+ trace_printk("%s %d state %08x\n", what, irq_desc_get_irq(desc),
+ irqd_get(&desc->irq_data));
+}
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -14,6 +14,8 @@

#include "internals.h"

+bool irq_suspend_resume;
+
bool irq_pm_check_wakeup(struct irq_desc *desc)
{
if (irqd_is_wakeup_armed(&desc->irq_data)) {
@@ -120,6 +122,7 @@ void suspend_device_irqs(void)
struct irq_desc *desc;
int irq;

+ irq_suspend_resume = true;
for_each_irq_desc(irq, desc) {
unsigned long flags;
bool sync;
@@ -127,7 +130,9 @@ void suspend_device_irqs(void)
if (irq_settings_is_nested_thread(desc))
continue;
raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_trace_state("presuspend", desc);
sync = suspend_device_irq(desc);
+ irq_trace_state("postsuspend", desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);

if (sync)
@@ -172,9 +177,14 @@ static void resume_irqs(bool want_early)
continue;

raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_trace_state("preresume", desc);
resume_irq(desc);
+ irq_trace_state("postresume", desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+
+ if (!want_early)
+ irq_suspend_resume = false;
}

/**