[patch] SMP fixes #3, for pre-2.2.2-1

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Wed, 3 Feb 1999 23:05:56 +0100 (CET)


some boards still had the 'spurious APIC interrupt, ayiee, should never
happen.' problem, this one is now nailed for all boards tested. (thanks to
Ramon Huerta, Andre M. Hedrick and Alex Buell for testing it out)

plus i've included James Robertson's 'clear the IOAPIC on reboot' patch,
this should solve some of the reboot problems. (please let me know if
there are still reboot problems unique to SMP)

the attached patch also includes a fixed version of the 'timestamp
synchronization code'. (those who do not want this code should simply
leave out the smp.c and time.c part of the patch)

-- mingo

--- linux/arch/i386/kernel/smp.c.orig Wed Feb 3 08:17:33 1999
+++ linux/arch/i386/kernel/smp.c Wed Feb 3 08:17:55 1999
@@ -784,6 +784,179 @@
return memory_start;
}

+#ifdef CONFIG_X86_TSC
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS] = { 0, };
+
+#define NR_LOOPS 5
+
+extern unsigned long fast_gettimeoffset_quotient;
+
+/*
+ * accurate 64-bit division, expanded to 32-bit divisions. Not terribly
+ * optimized but we need it at boot time only anyway.
+ *
+ * result == a / b
+ * == (a1 + a2*(2^32)) / b
+ * == a1/b + a2*(2^32/b)
+ * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ * ^---- (this multiplication can overflow)
+ */
+
+unsigned long long div64 (unsigned long long a, unsigned long long b)
+{
+ unsigned int a1, a2, b0;
+ unsigned long long res;
+
+ if (b > 0x00000000ffffffffULL)
+ return 0;
+ if (!b)
+ panic("huh?\n");
+
+ b0 = (unsigned int) b;
+ a1 = ((unsigned int*)&a)[0];
+ a2 = ((unsigned int*)&a)[1];
+
+ res = a1/b0 +
+ (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+ a2 / b0 +
+ (a2 * (0xffffffff % b0)) / b0;
+
+ return res;
+}
+
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ unsigned long one_usec;
+ int buggy = 0;
+
+ printk("checking TSC synchronization across CPUs: ");
+
+ one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2);
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ READ_TSC(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ CLEAR_TSC;
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!(cpu_online_map & (1 << i)))
+ continue;
+
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ avg = div64(sum, smp_num_cpus);
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!(cpu_online_map & (1 << i)))
+ continue;
+
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = div64(delta, one_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+ i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * smp_num_cpus is not necessarily known at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+ READ_TSC(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ CLEAR_TSC;
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+ }
+}
+#undef NR_LOOPS
+
+#endif
+
extern void calibrate_delay(void);

void __init smp_callin(void)
@@ -861,6 +1034,13 @@
* Allow the master to continue.
*/
set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
+
+#ifdef CONFIG_X86_TSC
+ /*
+ * Synchronize the TSC with the BP
+ */
+ synchronize_tsc_ap ();
+#endif
}

int cpucount = 0;
@@ -1382,8 +1562,15 @@
#endif

smp_done:
-}

+#ifdef CONFIG_X86_TSC
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpucount)
+ synchronize_tsc_bp();
+#endif
+}

/*
* the following functions deal with sending IPIs between CPUs.
--- linux/arch/i386/kernel/io_apic.c.orig Wed Feb 3 08:17:37 1999
+++ linux/arch/i386/kernel/io_apic.c Wed Feb 3 08:51:00 1999
@@ -202,7 +202,7 @@
DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync()) /* mask = 1 */
DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */

-static void __init clear_IO_APIC_pin(unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int pin)
{
struct IO_APIC_route_entry entry;

@@ -215,6 +215,13 @@
io_apic_write(0x11 + 2 * pin, *(((int *)&entry) + 1));
}

+static void clear_IO_APIC (void)
+{
+ int pin;
+
+ for (pin = 0; pin < nr_ioapic_registers; pin++)
+ clear_IO_APIC_pin(pin);
+}

/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -625,7 +632,7 @@
/*
* Set up a certain pin as ExtINT delivered interrupt
*/
-void __init setup_ExtINT_pin(unsigned int pin)
+void __init setup_ExtINT_pin(unsigned int pin, int irq)
{
struct IO_APIC_route_entry entry;

@@ -635,17 +642,16 @@
memset(&entry,0,sizeof(entry));

entry.delivery_mode = dest_ExtINT;
- entry.dest_mode = 1; /* logical delivery */
+ entry.dest_mode = 0; /* physical delivery */
entry.mask = 0; /* unmask IRQ now */
/*
- * Careful with this one. We do not use 'true' logical
- * delivery, as we set local APICs to LDR == 0. But
- * 0xff logical destination is special (broadcast).
- * Any other combination will cause problems.
+ * We use physical delivery to get the timer IRQ
+ * to the boot CPU. 'boot_cpu_id' is the physical
+ * APIC ID of the boot CPU.
*/
- entry.dest.logical.logical_dest = 0xff;
+ entry.dest.physical.physical_dest = boot_cpu_id;

- entry.vector = 0; /* it's ignored */
+ entry.vector = assign_irq_vector(irq);

entry.polarity = 0;
entry.trigger = 0;
@@ -760,7 +766,7 @@

static void __init init_sym_mode(void)
{
- int i, pin;
+ int i;

for (i = 0; i < PIN_MAP_SIZE; i++) {
irq_2_pin[i].pin = -1;
@@ -790,8 +796,7 @@
/*
* Do not trust the IO-APIC being empty at bootup
*/
- for (pin = 0; pin < nr_ioapic_registers; pin++)
- clear_IO_APIC_pin(pin);
+ clear_IO_APIC();
}

/*
@@ -799,6 +804,15 @@
*/
void init_pic_mode(void)
{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+ /*
+ * Put it back into PIC mode (has an effect only on
+ * certain boards)
+ */
printk("disabling symmetric IO mode... ");
outb_p(0x70, 0x22);
outb_p(0x00, 0x23);
@@ -1184,7 +1198,7 @@

if (pin2 != -1) {
printk(".. (found pin %d) ...", pin2);
- setup_ExtINT_pin(pin2);
+ setup_ExtINT_pin(pin2, 0);
make_8259A_irq(0);
}

--- linux/arch/i386/kernel/time.c.orig Wed Feb 3 08:17:51 1999
+++ linux/arch/i386/kernel/time.c Wed Feb 3 08:18:04 1999
@@ -86,7 +86,7 @@
* Equal to 2^32 * (1 / (clocks per usec) ).
* Initialized in time_init.
*/
-static unsigned long fast_gettimeoffset_quotient=0;
+unsigned long fast_gettimeoffset_quotient=0;

extern rwlock_t xtime_lock;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/