[patch] high-res timers: PIT broadcasting fix

From: Ingo Molnar
Date: Sat Dec 09 2006 - 06:42:09 EST


Subject: [patch] high-res timers: PIT broadcasting fix
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

systems that enter C3 and have to turn off the APIC we
fall back to the PIT as the clock events source which
emulates a local events source. Dynticks exposed a bug in
the broadcast/local-events emulation code: if the PIT
IRQ came earlier than the next high-res timer on an idle
CPU would have needed, then the PIT was not reprogrammed
for followup irqs.

(also, clean things up a bit by splitting out the broadcast
reprogramming logic into clockevents_reprogram_broadcast())

this bug can explain certain rare boot-time hangs on C3-capable
laptops that run with HIGH_RES_TIMERS and NO_HZ enabled.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
kernel/time/clockevents.c | 60 ++++++++++++++++++++++++++++++++--------------
1 file changed, 42 insertions(+), 18 deletions(-)

Index: linux/kernel/time/clockevents.c
===================================================================
--- linux.orig/kernel/time/clockevents.c
+++ linux/kernel/time/clockevents.c
@@ -529,6 +529,32 @@ static cpumask_t local_event_broadcast;
static void (*broadcast_function)(cpumask_t *mask);
static void (*global_event_handler)(struct pt_regs *regs);

+/*
+ * Reprogram the broadcast device:
+ *
+ * Called with events_lock held and interrupts disabled.
+ */
+static void clockevents_reprogram_broadcast(void)
+{
+ struct clock_event_device *glblevt = global_eventdevice.event;
+ struct local_events *dev;
+ ktime_t expires = { .tv64 = KTIME_MAX };
+ int64_t delta;
+ int cpu;
+
+ for (cpu = first_cpu(local_event_broadcast); cpu != NR_CPUS;
+ cpu = next_cpu(cpu, local_event_broadcast)) {
+ dev = &per_cpu(local_eventdevices, cpu);
+ if (dev->expires_next.tv64 < expires.tv64)
+ expires = dev->expires_next;
+ }
+
+ if (expires.tv64 != KTIME_MAX) {
+ delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
+ do_clockevents_set_next_event(glblevt, delta);
+ }
+}
+
/**
* clockevents_set_broadcast - switch next event device from/to broadcast mode
*
@@ -538,10 +564,7 @@ static void (*global_event_handler)(stru
void clockevents_set_broadcast(struct clock_event_device *evt, int broadcast)
{
struct local_events *devices = &__get_cpu_var(local_eventdevices);
- struct clock_event_device *glblevt = global_eventdevice.event;
int cpu = smp_processor_id();
- ktime_t expires = { .tv64 = KTIME_MAX };
- int64_t delta;
unsigned long flags;

if (devices->nextevt != evt)
@@ -558,19 +581,7 @@ void clockevents_set_broadcast(struct cl
if (devices->expires_next.tv64 != KTIME_MAX)
clockevents_set_next_event(devices->expires_next, 1);
}
-
- /* Reprogram the broadcast device */
- for (cpu = first_cpu(local_event_broadcast); cpu != NR_CPUS;
- cpu = next_cpu(cpu, local_event_broadcast)) {
- devices = &per_cpu(local_eventdevices, cpu);
- if (devices->expires_next.tv64 < expires.tv64)
- expires = devices->expires_next;
- }
-
- if (expires.tv64 != KTIME_MAX) {
- delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
- do_clockevents_set_next_event(glblevt, delta);
- }
+ clockevents_reprogram_broadcast();

spin_unlock_irqrestore(&events_lock, flags);
}
@@ -637,9 +648,22 @@ static void handle_nextevt_broadcast(str
cpu_set(cpu, mask);
}
}
+ if (!cpus_empty(mask)) {
+ /*
+ * Wakeup the cpus which have an expired event. The
+ * global event is reprogrammed in the return from
+ * idle code.
+ */
+ broadcast_function(&mask);
+ } else {
+ /*
+ * The global event did not expire any CPU local
+ * events. This happens in dyntick mode, as the
+ * maximum PIT delta is quite small.
+ */
+ clockevents_reprogram_broadcast();
+ }
spin_unlock(&events_lock);
- /* Wakeup the cpus which have an expired event */
- broadcast_function(&mask);
}

/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/