Re: [PATCH 3/4] new timeofday x86-64 arch specific changes (v. B1)

From: George Anzinger
Date: Thu Jun 09 2005 - 19:54:51 EST


Parag Warudkar wrote:
On Sunday 05 June 2005 13:05, Dominik Brodowski wrote:

IIRC (from the comment above) several chipsets suffer from this
inconsistency, namely the widely used PIIX4(E) and ICH(4 only? or also
other ICH-ones?). Therefore, we'd need at least some sort of boot-time
check to decide which method to use... and based on the method, we can
adjust the priority maybe?


To begin with, will the simple proof-of-concept patch like below work? (It tests the chipset read in the same do{}while loop - if the loop executes only once, it considers the chipset good - in which case it executes the faster read_pmtmr_fast function.) Or does it need wider testing under different circumstances to conclude that chipset is good?

I tested the patch under Virtual PC which emulates a PIIX4 chipset. Test passes there, meaning the do {}while loop executes only once.

As I understand the problem it is that the counter is, sometimes, read while it is is rippling the carry up to the higher bits. This will only happen if the read is done just as the hardware is bumping the count and even then only if there is a carry. Exactly where in the carry sequency this happens is likely just a guess.

In the HRT patch I take care of this by testing the read against a prior read (up to 1/HZ away). If the number is bigger and to too much bigger, I just do one read. Here is the full routine:

quick_get_cpuctr(void)
{
static unsigned long last_read = 0;
static int qgc_max = 0;
int i;

unsigned long rd_delta, rd_ans, rd = inl(acpi_pm_tmr_address);

/*
* This will be REALLY big if ever we move backward in time...
*/
rd_delta = (rd - last_read) & SIZE_MASK;
last_read = rd;

rd_ans = (rd - last_update) & SIZE_MASK;

if (likely((rd_ans < (arch_cycles_per_jiffy << 1)) &&
(rd_delta < (arch_cycles_per_jiffy << 1))))
return rd_ans;

for (i = 0; i < 10; i++) {
rd = inl(acpi_pm_tmr_address);
rd_delta = (rd - last_read) & SIZE_MASK;
last_read = rd;
if (unlikely(i > qgc_max))
qgc_max = i;
/*
* On my test machine (800MHZ dual PIII) this is always
* seven. Seems long, but we will give it some slack...
* We note that rd_delta (and all the vars) unsigned so
* a backward movement will show as a really big number.
*/
if (likely(rd_delta < 20))
return (rd - last_update) & SIZE_MASK;
}
return (rd - last_update) & SIZE_MASK;
}
George
--

Parag


------------------------------------------------------------------------

--- linux-2.6-orig/arch/i386/kernel/timers/timer_pm.c 2005-03-02 02:37:48.000000000 -0500
+++ linux-2.6.12-rc5/arch/i386/kernel/timers/timer_pm.c 2005-06-05 23:01:15.000000000 -0400
@@ -35,6 +35,10 @@
* in arch/i386/acpi/boot.c */
u32 pmtmr_ioport = 0;
+struct pmtmr_rd_func {
+ u32 (*read_pmtmr) (void);
+}pmtmr_rd_func;
+
/* value of the Power timer at last timer interrupt */
static u32 offset_tick;
@@ -45,6 +49,11 @@
#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+static inline u32 read_pmtmr_fast(void)
+{
+ return inl(pmtmr_ioport);
+}
+
/*helper function to safely read acpi pm timesource*/
static inline u32 read_pmtmr(void)
{
@@ -76,14 +85,14 @@
unsigned long count, delta;
mach_prepare_counter();
- value1 = read_pmtmr();
+ value1 = pmtmr_rd_func.read_pmtmr();
mach_countup(&count);
- value2 = read_pmtmr();
+ value2 = pmtmr_rd_func.read_pmtmr();
delta = (value2 - value1) & ACPI_PM_MASK;
/* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
+ if (delta < (PMTMR_EXPECTED_RATE * 18) / 20 ||
+ delta > (PMTMR_EXPECTED_RATE * 22) / 20) {
printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
return -1;
}
@@ -95,9 +104,16 @@
static int init_pmtmr(char* override)
{
u32 value1, value2;
- unsigned int i;
+ u32 v1=0,v2=0,v3=0;
+ unsigned int i, loop_cnt=0;
- if (override[0] && strncmp(override,"pmtmr",5))
+ /* Use slower but probably more correct read function by
+ * default. This is overriden with a fast one if it is
+ * suitable to do so below.
+ */
+ pmtmr_rd_func.read_pmtmr = read_pmtmr;
+
+ if (override[0] && strncmp(override,"pmtmr",5))
return -ENODEV;
if (!pmtmr_ioport)
@@ -106,9 +122,32 @@
/* we use the TSC for delay_pmtmr, so make sure it exists */
if (!cpu_has_tsc)
return -ENODEV;
+ /* It has been reported that because of various broken
+ * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+ * source is not latched, so you must read it multiple
+ * times to insure a safe value is read.
+ */
+ do {
+ v1 = inl(pmtmr_ioport);
+ v2 = inl(pmtmr_ioport);
+ v3 = inl(pmtmr_ioport);
+ loop_cnt++;
+ } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2));
+
+ if(loop_cnt == 1) { + /*We have a good chipset*/
+ printk(KERN_INFO "PM Timer: Chipset passes port read test\n");
+ pmtmr_rd_func.read_pmtmr = read_pmtmr_fast;
+ }
+
+ else {
+ printk(KERN_INFO "PM Timer: Chipset fails port read test:");
+ printk(KERN_INFO "Working around it.");
+ }
/* "verify" this timing source */
- value1 = read_pmtmr();
+ value1 = pmtmr_rd_func.read_pmtmr();
for (i = 0; i < 10000; i++) {
value2 = read_pmtmr();
if (value2 == value1)
@@ -156,7 +195,7 @@
write_seqlock(&monotonic_lock);
- offset_tick = read_pmtmr();
+ offset_tick = pmtmr_rd_func.read_pmtmr();
/* calculate tick interval */
delta = (offset_tick - last_offset) & ACPI_PM_MASK;
@@ -202,7 +241,7 @@
} while (read_seqretry(&monotonic_lock, seq));
/* Read the pmtmr */
- this_offset = read_pmtmr();
+ this_offset = pmtmr_rd_func.read_pmtmr();
/* convert to nanoseconds */
ret = (this_offset - last_offset) & ACPI_PM_MASK;
@@ -232,7 +271,7 @@
u32 now, offset, delta = 0;
offset = offset_tick;
- now = read_pmtmr();
+ now = pmtmr_rd_func.read_pmtmr();
delta = (now - offset)&ACPI_PM_MASK;
return (unsigned long) offset_delay + cyc2us(delta);

--
George Anzinger george@xxxxxxxxxx
HRT (High-res-timers): http://sourceforge.net/projects/high-res-timers/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/