[PATCH][2.6][3/5]Support for HPET based timer

From: Pallipadi, Venkatesh
Date: Tue Aug 19 2003 - 20:36:02 EST


[Resend - The original mail hasn't yet appeared on lkml, even 5 hours
after posting]

3/5 - hpet3.patch - All changes required to support timer services
(gettimeofday) with HPET.

diff -purN linux-2.6.0-test1/arch/i386/kernel/timers/timer.c linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer.c
--- linux-2.6.0-test1/arch/i386/kernel/timers/timer.c 2003-07-13 20:38:41.000000000 -0700
+++ linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer.c 2003-08-18 14:09:26.000000000 -0700
@@ -3,11 +3,22 @@
#include <linux/string.h>
#include <asm/timer.h>

+#ifdef CONFIG_HPET_TIMER
+/*
+ * HPET memory read is slower than tsc reads, but is more dependable as it
+ * always runs at constant frequency and reduces complexity due to
+ * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
+ * timer_pit when HPET is active. So, we default to timer_tsc.
+ */
+#endif
/* list of timers, ordered by preference, NULL terminated */
static struct timer_opts* timers[] = {
#ifdef CONFIG_X86_CYCLONE_TIMER
&timer_cyclone,
#endif
+#ifdef CONFIG_HPET_TIMER
+ &timer_hpet,
+#endif
&timer_tsc,
&timer_pit,
NULL,
diff -purN linux-2.6.0-test1/arch/i386/kernel/timers/timer_hpet.c linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer_hpet.c
--- linux-2.6.0-test1/arch/i386/kernel/timers/timer_hpet.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer_hpet.c 2003-08-18 17:20:36.000000000 -0700
@@ -0,0 +1,243 @@
+/*
+ * This code largely moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+
+#include <asm/timer.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "io_ports.h"
+#include "mach_timer.h"
+#include <asm/hpet.h>
+
+extern volatile unsigned long jiffies;
+
+static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */
+static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
+static unsigned long hpet_last; /* hpet counter value at last tick*/
+static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
+static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
+static unsigned long long monotonic_base;
+static rwlock_t monotonic_lock = RW_LOCK_UNLOCKED;
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ * ns = cycles * (10^3 / cpu_mhz)
+ *
+ * Then we use scaling math (suggested by george@xxxxxxxxxx) to get:
+ * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ * -johnstul@xxxxxxxxxx "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+ cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+static unsigned long long monotonic_clock_hpet(void)
+{
+ unsigned long long last_offset, this_offset, base;
+
+ /* atomically read monotonic base & last_offset */
+ read_lock_irq(&monotonic_lock);
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ base = monotonic_base;
+ read_unlock_irq(&monotonic_lock);
+
+ /* Read the Time Stamp Counter */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return base + cycles_2_ns(this_offset - last_offset);
+}
+
+static unsigned long get_offset_hpet(void)
+{
+ register unsigned long eax, edx;
+
+ eax = hpet_readl(HPET_COUNTER);
+ eax -= hpet_last; /* hpet delta */
+
+ /*
+ * Time offset = (hpet delta) * ( usecs per HPET clock )
+ * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+ * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
+ *
+ * Where,
+ * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
+ *
+ * Using a mull instead of a divl saves some cycles in critical path.
+ */
+ ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
+
+ /* our adjusted time offset in microseconds */
+ return edx;
+}
+
+static void mark_offset_hpet(void)
+{
+ unsigned long long this_offset, last_offset;
+ unsigned long offset;
+
+ write_lock(&monotonic_lock);
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ rdtsc(last_tsc_low, last_tsc_high);
+
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+ int lost_ticks = (offset - hpet_last) / hpet_tick;
+ jiffies += lost_ticks;
+ }
+ hpet_last = offset;
+
+ /* update the monotonic base value */
+ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ monotonic_base += cycles_2_ns(this_offset - last_offset);
+ write_unlock(&monotonic_lock);
+}
+
+void delay_hpet(unsigned long loops)
+{
+ unsigned long hpet_start, hpet_end;
+ unsigned long eax;
+
+ /* loops is the number of cpu cycles. Convert it to hpet clocks */
+ ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ do {
+ rep_nop();
+ hpet_end = hpet_readl(HPET_COUNTER);
+ } while ((hpet_end - hpet_start) < (loops));
+}
+
+/* ------ Calibrate the TSC -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
+ * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
+ * calibrate_tsc() calibrates the processor TSC by comparing
+ * it to the HPET timer of known frequency.
+ * Too much 64-bit arithmetic here to do this cleanly in C
+ */
+#define CALIBRATE_CNT_HPET (5 * hpet_tick)
+#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
+
+static unsigned long __init calibrate_tsc(void)
+{
+ unsigned long tsc_startlow, tsc_starthigh;
+ unsigned long tsc_endlow, tsc_endhigh;
+ unsigned long hpet_start, hpet_end;
+ unsigned long result, remain;
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtsc(tsc_startlow, tsc_starthigh);
+ do {
+ hpet_end = hpet_readl(HPET_COUNTER);
+ } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
+ rdtsc(tsc_endlow, tsc_endhigh);
+
+ /* 64-bit subtract - gcc just messes up with long longs */
+ __asm__("subl %2,%0\n\t"
+ "sbbl %3,%1"
+ :"=a" (tsc_endlow), "=d" (tsc_endhigh)
+ :"g" (tsc_startlow), "g" (tsc_starthigh),
+ "0" (tsc_endlow), "1" (tsc_endhigh));
+
+ /* Error: ECPUTOOFAST */
+ if (tsc_endhigh)
+ goto bad_calibration;
+
+ /* Error: ECPUTOOSLOW */
+ if (tsc_endlow <= CALIBRATE_TIME_HPET)
+ goto bad_calibration;
+
+ ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
+ if (remain > (tsc_endlow >> 1))
+ result++; /* rounding the result */
+
+ ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
+ CALIBRATE_CNT_HPET);
+ if (remain > (tsc_endlow >> 1))
+ tsc_hpet_quotient++; /* rounding the result */
+
+ return result;
+bad_calibration:
+ /*
+ * the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ return 0;
+}
+
+static int __init init_hpet(char* override)
+{
+
+ /* check clock override */
+ if (override[0] && strncmp(override,"hpet",4))
+ return -ENODEV;
+
+ if (!is_hpet_enabled())
+ return -ENODEV;
+
+ printk("Using HPET for gettimeofday\n");
+ if (cpu_has_tsc) {
+ unsigned long tsc_quotient = calibrate_tsc();
+ if (tsc_quotient) {
+ /* report CPU clock rate in Hz.
+ * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+ * clock/second. Our precision is about 100 ppm.
+ */
+ { unsigned long eax=0, edx=1000;
+ ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
+ eax, edx);
+ printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
+ }
+ set_cyc2ns_scale(cpu_khz/1000);
+ }
+ }
+ {
+ /*
+ * Math to calculate hpet to usec multiplier
+ * Look for the comments at get_offset_hpet()
+ */
+ unsigned long result, remain;
+ ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
+ if (remain > (hpet_tick >> 1))
+ result++; /* rounding the result */
+
+ hpet_usec_quotient = result;
+ }
+ return 0;
+}
+
+/************************************************************/
+
+/* tsc timer_opts struct */
+struct timer_opts timer_hpet = {
+ .init = init_hpet,
+ .mark_offset = mark_offset_hpet,
+ .get_offset = get_offset_hpet,
+ .monotonic_clock = monotonic_clock_hpet,
+ .delay = delay_hpet,
+};
+
diff -purN linux-2.6.0-test1/arch/i386/kernel/timers/timer_tsc.c linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.6.0-test1/arch/i386/kernel/timers/timer_tsc.c 2003-07-13 20:35:57.000000000 -0700
+++ linux-2.6.0-test1-hpet/arch/i386/kernel/timers/timer_tsc.c 2003-08-18 17:25:50.000000000 -0700
@@ -19,9 +19,18 @@
#include "io_ports.h"
#include "mach_timer.h"

+#include <asm/hpet.h>
+
+#ifdef CONFIG_HPET_TIMER
+static unsigned long hpet_usec_quotient;
+static unsigned long hpet_last;
+struct timer_opts timer_tsc;
+#endif
+
int tsc_disable __initdata = 0;

extern spinlock_t i8253_lock;
+extern volatile unsigned long jiffies;

static int use_tsc;
/* Number of usecs that the last interrupt was delayed */
@@ -232,7 +241,7 @@ static void delay_tsc(unsigned long loop

#define CALIBRATE_TIME (5 * 1000020/HZ)

-unsigned long __init calibrate_tsc(void)
+static unsigned long __init calibrate_tsc(void)
{
mach_prepare_counter();

@@ -282,6 +291,107 @@ bad_ctc:
return 0;
}

+#ifdef CONFIG_HPET_TIMER
+static void mark_offset_tsc_hpet(void)
+{
+ unsigned long long this_offset, last_offset;
+ unsigned long offset, temp, hpet_current;
+
+ write_lock(&monotonic_lock);
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ /*
+ * It is important that these two operations happen almost at
+ * the same time. We do the RDTSC stuff first, since it's
+ * faster. To avoid any inconsistencies, we need interrupts
+ * disabled locally.
+ */
+ /*
+ * Interrupts are just disabled locally since the timer irq
+ * has the SA_INTERRUPT flag set. -arca
+ */
+ /* read Pentium cycle counter */
+
+ hpet_current = hpet_readl(HPET_COUNTER);
+ rdtsc(last_tsc_low, last_tsc_high);
+
+ /* lost tick compensation */
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+ int lost_ticks = (offset - hpet_last) / hpet_tick;
+ jiffies += lost_ticks;
+ }
+ hpet_last = hpet_current;
+
+ /* update the monotonic base value */
+ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ monotonic_base += cycles_2_ns(this_offset - last_offset);
+ write_unlock(&monotonic_lock);
+
+ /* calculate delay_at_last_interrupt */
+ /*
+ * Time offset = (hpet delta) * ( usecs per HPET clock )
+ * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+ * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
+ * Where,
+ * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
+ */
+ delay_at_last_interrupt = hpet_current - offset;
+ ASM_MUL64_REG(temp, delay_at_last_interrupt,
+ hpet_usec_quotient, delay_at_last_interrupt);
+}
+
+/* ------ Calibrate the TSC based on HPET timer -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+ * calibrate_tsc() calibrates the processor TSC by comparing
+ * it to the HPET timer of known frequency.
+ * Too much 64-bit arithmetic here to do this cleanly in C
+ */
+
+#define CALIBRATE_CNT_HPET (5 * hpet_tick)
+#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
+
+unsigned long __init calibrate_tsc_hpet(void)
+{
+ unsigned long tsc_startlow, tsc_starthigh;
+ unsigned long tsc_endlow, tsc_endhigh;
+ unsigned long hpet_start, hpet_end;
+ unsigned long result, remain;
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtsc(tsc_startlow, tsc_starthigh);
+ do {
+ hpet_end = hpet_readl(HPET_COUNTER);
+ } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
+ rdtsc(tsc_endlow, tsc_endhigh);
+
+ /* 64-bit subtract - gcc just messes up with long longs */
+ __asm__("subl %2,%0\n\t"
+ "sbbl %3,%1"
+ :"=a" (tsc_endlow), "=d" (tsc_endhigh)
+ :"g" (tsc_startlow), "g" (tsc_starthigh),
+ "0" (tsc_endlow), "1" (tsc_endhigh));
+
+ /* Error: ECPUTOOFAST */
+ if (tsc_endhigh)
+ goto bad_calibration;
+
+ /* Error: ECPUTOOSLOW */
+ if (tsc_endlow <= CALIBRATE_TIME_HPET)
+ goto bad_calibration;
+
+ ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
+ if (remain > (tsc_endlow >> 1))
+ result++; /* rounding the result */
+
+ return result;
+bad_calibration:
+ /*
+ * the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ return 0;
+}
+#endif

#ifdef CONFIG_CPU_FREQ
static unsigned int ref_freq = 0;
@@ -333,8 +443,16 @@ static int __init init_tsc(char* overrid
{

/* check clock override */
- if (override[0] && strncmp(override,"tsc",3))
+ if (override[0] && strncmp(override,"tsc",3)) {
+#ifdef CONFIG_HPET_TIMER
+ if (is_hpet_enabled()) {
+ printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
+ } else
+#endif
+ {
return -ENODEV;
+ }
+ }

/*
* If we have APM enabled or the CPU clock speed is variable
@@ -368,7 +486,29 @@ static int __init init_tsc(char* overrid
count2 = LATCH; /* initialize counter for mark_offset_tsc() */

if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
+ unsigned long tsc_quotient;
+#ifdef CONFIG_HPET_TIMER
+ if (is_hpet_enabled()){
+ unsigned long result, remain;
+ printk("Using TSC for gettimeofday\n");
+ tsc_quotient = calibrate_tsc_hpet();
+ timer_tsc.mark_offset = &mark_offset_tsc_hpet;
+ /*
+ * Math to calculate hpet to usec multiplier
+ * Look for the comments at get_offset_tsc_hpet()
+ */
+ ASM_DIV64_REG(result, remain, hpet_tick,
+ 0, KERNEL_TICK_USEC);
+ if (remain > (hpet_tick >> 1))
+ result++; /* rounding the result */
+
+ hpet_usec_quotient = result;
+ } else
+#endif
+ {
+ tsc_quotient = calibrate_tsc();
+ }
+
if (tsc_quotient) {
fast_gettimeoffset_quotient = tsc_quotient;
use_tsc = 1;
diff -purN linux-2.6.0-test1/include/asm-i386/timer.h linux-2.6.0-test1-hpet/include/asm-i386/timer.h
--- linux-2.6.0-test1/include/asm-i386/timer.h 2003-07-13 20:32:28.000000000 -0700
+++ linux-2.6.0-test1-hpet/include/asm-i386/timer.h 2003-08-18 13:52:52.000000000 -0700
@@ -38,4 +38,8 @@ extern struct timer_opts timer_tsc;
extern struct timer_opts timer_cyclone;
#endif

+#ifdef CONFIG_HPET_TIMER
+extern struct timer_opts timer_hpet;
+#endif
+
#endif


Attachment: hpet3.ZIP
Description: hpet3.ZIP