Re: [patch] recover losed timer interrupt using the TSC [Re: [patch]

Andrea Arcangeli (andrea@e-mind.com)
Mon, 15 Mar 1999 01:35:05 +0100 (CET)


On Sun, 14 Mar 1999, Ingo Molnar wrote:

>some of my old code in time.c was doing something like that, and you'll
>notice that my patch changes it back. I found it to be a wrong assumption

Yes.

>to 'hand-schedule' IO (especially _p IO) with whatever other instructions,
>the CPU will not do anything better. I thought there might be some

I was in the hope of some kind of pipeline since the code in the middle in
my case was only moving a memory address to a register or to another
memory address. But as you said the I/O latency is so high that probably
would obfuscate any kind of clever optimization so I agree that it's
better to make the code cleaner.

I still think my patch is superior. With your patch when you get the
printk you will know you should reset the clock and that jiffies has losen
some jiffy. With my patch you are _allowed_ do do anything you want with
irq disabled for quite long times (not more than a tsc_low_ wrap). You'll
get back a KERN_NOTICE that will tell you how much ticks you lose. Since
we can do that with a minimal overhead, why not be robust?

So now I merged the good from your patch I agree with and I rediffed a new
global patch called timer-2.2.3-C:

Index: arch/i386/kernel/time.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/time.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 time.c
--- time.c 1999/03/09 01:28:37 1.1.1.4
+++ linux/arch/i386/kernel/time.c 1999/03/15 00:30:18
@@ -28,6 +28,11 @@
* 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
* Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
* serialize accesses to xtime/lost_ticks).
+ * 1999-03-14 Andrea Arcangeli
+ * Developed recover_lost_timer(): using the TSC information we won't
+ * ever miss a timer irq anymore.
+ * Improved calibrate_tcs() doing replication in time of the algorithm
+ * to get a safer tsc2usec-quotient value.
*/

#include <linux/errno.h>
@@ -76,6 +81,7 @@
static unsigned long fast_gettimeoffset_quotient=0;

extern rwlock_t xtime_lock;
+extern volatile unsigned long lost_ticks;

static inline unsigned long do_fast_gettimeoffset(void)
{
@@ -156,18 +162,10 @@
unsigned long jiffies_t;

/* timer count may underflow right here */
- outb_p(0x00, 0x43); /* latch the count ASAP */
+ count = get_8254_timer_count();

- count = inb_p(0x40); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
jiffies_t = jiffies;

- count |= inb_p(0x40) << 8;
-
/*
* avoiding timer inconsistencies (they are rare, but they happen)...
* there are two kinds of problems that must be avoided here:
@@ -236,7 +234,6 @@
*/
void do_gettimeofday(struct timeval *tv)
{
- extern volatile unsigned long lost_ticks;
unsigned long flags;
unsigned long usec, sec;

@@ -412,14 +409,48 @@
static int use_tsc = 0;

/*
+ * Using a bit better the TSC information now we are also able to recover
+ * from lost timer interrupts. -arca
+ */
+static inline void recover_lost_timer(unsigned long delta_cycles,
+ int delay_usec, struct pt_regs *regs)
+{
+ /*
+ * The algorithm I invented to know if we losed an irq in the meantime
+ * works this way:
+ *
+ * - convert delta from cycles to usec
+ * - remove from the delta_usec the latency of the irqs
+ * - convert from usec to timer ticks
+ *
+ * -arca
+ */
+
+ register unsigned long delta_usec;
+
+ __asm__("mull %2"
+ :"=a" (delta_cycles), "=d" (delta_usec)
+ :"g" (fast_gettimeoffset_quotient), "0" (delta_cycles));
+ delta_usec -= delay_usec;
+ delta_usec = (delta_usec + 500000/HZ) / (1000000/HZ);
+
+ if ((long) delta_usec <= 1)
+ return;
+
+ delta_usec -= 1;
+ printk(KERN_NOTICE "recover_lost_timer: lost %lu ticks from %08lx\n",
+ delta_usec, regs->eip);
+ lost_ticks += delta_usec;
+ jiffies += delta_usec;
+}
+
+/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- int count;
-
/*
* Here we are in the timer irq handler. We just have irqs locally
* disabled but we don't know if the timer_bh is running on the other
@@ -443,16 +474,21 @@
* has the SA_INTERRUPT flag set. -arca
*/

- /* read Pentium cycle counter */
- __asm__("rdtsc" : "=a" (last_tsc_low) : : "edx");
-
- outb_p(0x00, 0x43); /* latch the count ASAP */
+ unsigned long old_cycles = last_tsc_low;
+ int old_delay, count;

- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
+ /* read Pentium cycle counter */
+ __asm__ __volatile__("rdtsc" : "=a" (last_tsc_low) : : "edx");

+ count = get_8254_timer_count();
count = ((LATCH-1) - count) * TICK_SIZE;
+
+ old_delay = delay_at_last_interrupt;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+ recover_lost_timer(last_tsc_low - old_cycles,
+ delay_at_last_interrupt - old_delay,
+ regs);
}

do_timer_interrupt(irq, NULL, regs);
@@ -543,43 +579,60 @@
* device.
*/

+/*
+ * To get a more safe quotient value (that it will be used forever) we
+ * try many times and we'll stop if we'll get the same value for two times
+ * consecutively. -arca
+ */
+
#define CALIBRATE_LATCH (5 * LATCH)
-#define CALIBRATE_TIME (5 * 1000020/HZ)
+/*
+ * The timer chip will decrease the latch for CALIBRATE_LATCH times. The
+ * frequency between every latch change is CLOCK_TICK_RATE. -arca
+ */

-__initfunc(static unsigned long calibrate_tsc(void))
+static unsigned long __init calibrate_tsc(void)
{
- /* Set the Gate high, disable speaker */
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ int i;
+ unsigned long last_quotient = -999992 /* be quiet compiler */;
+ unsigned long calibrate_latch = CALIBRATE_LATCH, calibrate_time;

/*
- * Now let's take care of CTC channel 2
- *
- * Set the Gate high, program CTC channel 2 for mode 0,
- * (interrupt on terminal count mode), binary count,
- * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+ * This asm does `CALIBRATE_LATCH * 1000000 / CLOCK_TICK_RATE', but
+ * using 64bit arithmetic to avoid overflowing. -arca
*/
- outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+ __asm__("mull %1\n\t"
+ "divl %4"
+ : "=a" (calibrate_time)
+ : "r" (calibrate_latch), "0" (1000000), "d" (0),
+ "r" (CLOCK_TICK_RATE)
+ : "edx");

+ for (i=0; i<20; i++)
{
unsigned long startlow, starthigh;
unsigned long endlow, endhigh;
- unsigned long count;

+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2
+ *
+ * Set the Gate high, program CTC channel 2 for mode 0,
+ * (interrupt on terminal count mode), binary count,
+ * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+ */
+ outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(calibrate_latch & 0xff, 0x42); /* LSB of count */
+ outb(calibrate_latch >> 8, 0x42); /* MSB of count */
+
__asm__ __volatile__("rdtsc":"=a" (startlow),"=d" (starthigh));
- count = 0;
- do {
- count++;
- } while ((inb(0x61) & 0x20) == 0);
+ while ((inb(0x61) & 0x20) == 0);
__asm__ __volatile__("rdtsc":"=a" (endlow),"=d" (endhigh));

last_tsc_low = endlow;

- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
/* 64-bit subtract - gcc just messes up with long longs */
__asm__("subl %2,%0\n\t"
"sbbl %3,%1"
@@ -592,15 +645,26 @@
goto bad_ctc;

/* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
+ if (endlow <= calibrate_time)
goto bad_ctc;

__asm__("divl %2"
:"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
+ :"r" (endlow), "0" (0), "1" (calibrate_time));
+
+ if (i && last_quotient == endlow)
+ {
+ printk("calibrate_tsc: consistent quotient %lu "
+ "found after %d tries\n", endlow, i);
+ return endlow;
+ }

- return endlow;
+ last_quotient = endlow;
}
+
+ printk("calibrate_tsc: inconsistent quotient using the last %lu!\n",
+ last_quotient);
+ return last_quotient;

/*
* The CTC wasn't reliable: we got a hit on the very first read,
Index: arch/i386/kernel/smp.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/smp.c,v
retrieving revision 1.1.1.5
diff -u -r1.1.1.5 smp.c
--- smp.c 1999/02/20 15:38:03 1.1.1.5
+++ linux/arch/i386/kernel/smp.c 1999/03/14 23:25:28
@@ -847,10 +848,9 @@
/*
* Set up our APIC timer.
*/
+ sti();
setup_APIC_clock();

- __sti();
-
#ifdef CONFIG_MTRR
/* Must be done before calibration delay is computed */
mtrr_init_secondary_cpu ();
@@ -1831,22 +1855,6 @@
*/

/*
- * The timer chip is already set up at HZ interrupts per second here,
- * but we do not accept timer interrupts yet. We only allow the BP
- * to calibrate.
- */
-static unsigned int __init get_8254_timer_count(void)
-{
- unsigned int count;
-
- outb_p(0x00, 0x43);
- count = inb_p(0x40);
- count |= inb_p(0x40) << 8;
-
- return count;
-}
-
-/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
* this function twice, once with a bogus timeout value, second
@@ -1887,25 +1895,16 @@
apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
}

+/*
+ * This is more exact than it looks like. Yep we have the timer IRQ's
+ * latency added, _but_ we have it always and it's a constant, thus
+ * it doesnt matter.
+ */
void __init wait_8254_wraparound(void)
{
- unsigned int curr_count, prev_count=~0;
- int delta;
-
- curr_count = get_8254_timer_count();
-
- do {
- prev_count = curr_count;
- curr_count = get_8254_timer_count();
- delta = curr_count-prev_count;
-
- /*
- * This limit for delta seems arbitrary, but it isn't, it's
- * slightly above the level of error a buggy Mercury/Neptune
- * chipset timer can cause.
- */
+ unsigned long prev_jiffies = jiffies;

- } while (delta<300);
+ while (jiffies == prev_jiffies) mb();
}

/*
@@ -1937,12 +1936,12 @@
*/
setup_APIC_timer(1000000000);

+ sti();
/*
* The timer chip counts down to zero. Let's wait
* for a wraparound to start exact measurement:
* (the current tick might have been already half done)
*/
-
wait_8254_wraparound ();

/*
@@ -1994,13 +1993,8 @@

void __init setup_APIC_clock(void)
{
- unsigned long flags;
-
static volatile int calibration_lock;

- __save_flags(flags);
- __cli();
-
SMP_PRINTK(("setup_APIC_clock() called.\n"));

/*
@@ -2028,16 +2022,7 @@
/*
* Now set up the timer for real.
*/
-
setup_APIC_timer (calibration_result);
-
- /*
- * We ACK the APIC, just in case there is something pending.
- */
-
- ack_APIC_irq ();
-
- __restore_flags(flags);
}

/*
Index: arch/i386/kernel/irq.h
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/irq.h,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 irq.h
--- irq.h 1999/02/20 15:38:01 1.1.1.3
+++ linux/arch/i386/kernel/irq.h 1999/03/14 23:25:28
@@ -249,6 +252,20 @@
eip = prof_len-1;
atomic_inc((atomic_t *)&prof_buffer[eip]);
}
+}
+
+/*
+ * Get the timer chip's counter.
+ */
+extern inline unsigned int get_8254_timer_count(void)
+{
+ unsigned int count;
+
+ outb_p(0x00, 0x43); /* latch the counter */
+ count = inb_p(0x40); /* get counter LSB */
+ count |= inb_p(0x40) << 8; /* get counter MSB */
+
+ return count;
}

#endif
Index: kernel/sched.c
===================================================================
RCS file: /var/cvs/linux/kernel/sched.c,v
retrieving revision 1.1.1.7
retrieving revision 1.1.2.19
diff -u -r1.1.1.7 -r1.1.2.19
--- sched.c 1999/02/23 16:48:13 1.1.1.7
+++ linux/kernel/sched.c 1999/03/14 17:58:07 1.1.2.19
@@ -1294,15 +1299,13 @@
static void update_wall_time(unsigned long ticks)
{
do {
- ticks--;
update_wall_time_one_tick();
- } while (ticks);
-
- if (xtime.tv_usec >= 1000000) {
- xtime.tv_usec -= 1000000;
- xtime.tv_sec++;
- second_overflow();
- }
+ while (xtime.tv_usec >= 1000000) {
+ xtime.tv_usec -= 1000000;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+ } while (--ticks);
}

static inline void do_process_times(struct task_struct *p,

Comments?

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/