[PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

From: Sergei Shtylyov
Date: Thu May 17 2007 - 13:41:12 EST


Add PowerPC decrementer clock event driver.

Every effort has been made to support the different implementations of the
decrementer: the classic one (with 970 series variation), 40x and Book E
specific ones.

I had to make CONFIG_GENERIC_CLOCKEVENTS option selectable for the
compatibility reasons -- this option is not compatible with the PPC64
deterministic time accounting.

Thanks to Daniel Walker and Thomas Gleixner for the suggestions they made...

Signed-off-by: Sergei Shtylyov <sshtylyov@xxxxxxxxxxxxx>

---
This patch has been reworked against the 2.6.21 clockevents framework.
It has only been tested on the Book E 32-bit CPU this time, so re-testing on
"classic" PowerPC CPUs is needed (used to work as of 2.6.18-rt7)...

CONFIG_PPC_MULTIPLATFORM was the best option I was able to come up with
to cover machines built on 970 series CPUs...

arch/powerpc/Kconfig | 12 +++-
arch/powerpc/kernel/time.c | 124 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 134 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/Kconfig
+++ linux-2.6/arch/powerpc/Kconfig
@@ -317,7 +317,7 @@ config PPC_STD_MMU_32

config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
- depends on PPC64
+ depends on PPC64 && !GENERIC_CLOCKEVENTS
default y
help
Select this option to enable more accurate task and CPU time
@@ -760,6 +760,16 @@ config HIGHMEM
depends on PPC32

source kernel/Kconfig.hz
+
+config GENERIC_CLOCKEVENTS
+ bool "Clock event devices support"
+ default n
+ help
+ Enable support for the clock event devices necessary for the
+ high-resolution timers and the tickless system support.
+ NOTE: This is not compatible with the deterministic time accounting
+ option on PPC64.
+
source kernel/Kconfig.preempt

config RWSEM_GENERIC_SPINLOCK
Index: linux-2.6/arch/powerpc/kernel/time.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/time.c
+++ linux-2.6/arch/powerpc/kernel/time.c
@@ -52,6 +52,7 @@
#include <linux/jiffies.h>
#include <linux/posix-timers.h>
#include <linux/irq.h>
+#include <linux/clockchips.h>

#include <asm/io.h>
#include <asm/processor.h>
@@ -128,6 +129,83 @@ unsigned long ppc_tb_freq;
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
static DEFINE_PER_CPU(u64, last_jiffy);

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#define DECREMENTER_MAX 0xffffffff
+#else
+#define DECREMENTER_MAX 0x7fffffff /* setting MSB triggers an interrupt */
+#endif
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev)
+{
+#if defined(CONFIG_40x)
+ mtspr(SPRN_PIT, evt); /* 40x has a hidden PIT auto-reload register */
+#elif defined(CONFIG_BOOKE)
+ mtspr(SPRN_DECAR, evt); /* Book E has separate auto-reload register */
+ set_dec(evt);
+#else
+ set_dec(evt - 1); /* Classic decrementer interrupts at -1 */
+#endif
+ return 0;
+}
+
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev)
+{
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ u32 tcr = mfspr(SPRN_TCR);
+
+ tcr |= TCR_DIE;
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ tcr |= TCR_ARE;
+ break;
+ case CLOCK_EVT_MODE_ONESHOT:
+ tcr &= ~TCR_ARE;
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ tcr &= ~TCR_DIE;
+ break;
+ }
+ mtspr(SPRN_TCR, tcr);
+#endif
+ if (mode == CLOCK_EVT_MODE_PERIODIC)
+ decrementer_set_next_event(tb_ticks_per_jiffy, dev);
+}
+
+static struct clock_event_device decrementer_clockevent = {
+ .name = "decrementer",
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+#else
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+#endif
+ .shift = 32,
+ .rating = 200,
+ .irq = -1,
+ .set_next_event = decrementer_set_next_event,
+ .set_mode = decrementer_set_mode,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+
+static void register_decrementer(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *decrementer = &per_cpu(decrementers, cpu);
+
+ memcpy(decrementer, &decrementer_clockevent, sizeof(*decrementer));
+
+ decrementer->cpumask = cpumask_of_cpu(cpu);
+
+ clockevents_register_device(decrementer);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Factors for converting from cputime_t (timebase ticks) to
@@ -313,6 +391,9 @@ void snapshot_timebase(void)
{
__get_cpu_var(last_jiffy) = get_tb();
snapshot_purr();
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ register_decrementer();
+#endif
}

void __delay(unsigned long loops)
@@ -470,7 +551,31 @@ void timer_interrupt(struct pt_regs * re
old_regs = set_irq_regs(regs);
irq_enter();

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ /*
+ * We must write a positive value to the decrementer to clear
+ * the interrupt on the IBM 970 CPU series. In periodic mode,
+ * this happens when the decrementer gets reloaded later, but
+ * in one-shot mode, we have to do it here since an event handler
+ * may skip loading the new value...
+ */
+ if (per_cpu(decrementers, cpu).mode != CLOCK_EVT_MODE_PERIODIC)
+ set_dec(DECREMENTER_MAX);
+#endif
+ /*
+ * We can't disable the decrementer, so in the period between
+ * CPU being marked offline and calling stop-self, it's taking
+ * timer interrupts...
+ */
+ if (!cpu_is_offline(cpu)) {
+ struct clock_event_device *dev = &per_cpu(decrementers, cpu);
+
+ dev->event_handler(dev);
+ }
+#else
profile_tick(CPU_PROFILING);
+#endif
calculate_steal_time();

#ifdef CONFIG_PPC_ISERIES
@@ -486,6 +591,7 @@ void timer_interrupt(struct pt_regs * re
if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000)
per_cpu(last_jiffy, cpu) -= 1000000000;

+#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* We cannot disable the decrementer, so in the period
* between this cpu's being marked offline in cpu_online_map
@@ -495,6 +601,7 @@ void timer_interrupt(struct pt_regs * re
*/
if (!cpu_is_offline(cpu))
account_process_time(regs);
+#endif

/*
* No need to check whether cpu is offline here; boot_cpuid
@@ -507,15 +614,19 @@ void timer_interrupt(struct pt_regs * re
tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
tb_last_jiffy = tb_next_jiffy;
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
do_timer(1);
+#endif
/*timer_recalc_offset(tb_last_jiffy);*/
timer_check_rtc();
}
write_sequnlock(&xtime_lock);
}
-
+
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
next_dec = tb_ticks_per_jiffy - ticks;
set_dec(next_dec);
+#endif

#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
@@ -770,8 +881,19 @@ void __init time_init(void)
-xtime.tv_sec, -xtime.tv_nsec);
write_sequnlock_irqrestore(&xtime_lock, flags);

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
+ decrementer_clockevent.shift);
+ decrementer_clockevent.max_delta_ns =
+ clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+ decrementer_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xf, &decrementer_clockevent);
+
+ register_decrementer();
+#else
/* Not exact, but the timer interrupt takes care of this */
set_dec(tb_ticks_per_jiffy);
+#endif
}

#define FEBRUARY 2

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/