Re: New pentium bug workaround - please test..

Mikael Pettersson (Mikael.Pettersson@sophia.inria.fr)
Wed, 19 Nov 1997 16:46:13 +0100 (MET)


Hans Lermen wrote:

> On Tue, 18 Nov 1997, Linus Torvalds wrote:
>
> > Now, that implies that we can actually _keep_ the IDT mapped, and instead
> > of marking it not present we can mark it read-only.
>
> Ah, this one is _much_ better then the Intel solution.
> Congratulation Linus !
>
> > Anyway, this approach still needs to be debugged, but I'm told by intel
> > that it should indeed work, and I have a patch for people to try out.
> > Right now the patch is only for 2.1.x (relative to 2.1.65, in fact), but
> ^^^^^^^^^^^^^^
> Nope, just I made a 2.0.32 backport, look at the appended patch ;-)
> ^^^^^^
> Works fine here, also with SMP. No f00f crashes atall.
>
> For 2.0.x, however, the kernel needs to write to the idt, hence the below
> patch has 2 alias mapped addresses for the idt:
>
> - one writeprotected for LIDT
> - one writeable for the kernel.

But AFAIK, only arch/i386/kernel/irq.c writes to the idt entries
(in 2.0.x that is), and it only updates entries >= 0x20.
So instead of write-protecting the entire ldt page and setting
up an alias page, we should be able to keep the 2.0.32 scheme,
but *write protect* instead of *unmap* the lower page.
I think this is a simpler solution than the alias-page trick.

/Mikael
mikpe@{sophia.inria.fr,ida.liu.se}

diff --recursive -u linux-2.0.32/arch/i386/kernel/traps.c linux-2.0.32-f00f/arch/i386/kernel/traps.c
--- linux-2.0.32/arch/i386/kernel/traps.c Fri Nov 14 17:12:25 1997
+++ linux-2.0.32-f00f/arch/i386/kernel/traps.c Wed Nov 19 15:59:40 1997
@@ -366,19 +366,18 @@
idt = new_idt;

/*
- * Unmap lower page:
+ * Write protect lower page:
*/
twopage = VMALLOC_VMADDR(twopage);
pgd = pgd_offset(current->mm, twopage);
pmd = pmd_offset(pgd, twopage);
pte = pte_offset(pmd, twopage);

- pte_clear(pte);
+ *pte = pte_wrprotect(*pte);
flush_tlb_all();

printk(" ... done\n");
}
-

void trap_init(void)
{
diff --recursive -u linux-2.0.32/arch/i386/mm/fault.c linux-2.0.32-f00f/arch/i386/mm/fault.c
--- linux-2.0.32/arch/i386/mm/fault.c Mon Nov 17 02:12:14 1997
+++ linux-2.0.32-f00f/arch/i386/mm/fault.c Wed Nov 19 15:51:57 1997
@@ -21,128 +21,10 @@

extern void die_if_kernel(const char *,struct pt_regs *,long);

-asmlinkage void do_divide_error (struct pt_regs *, unsigned long);
-asmlinkage void do_debug (struct pt_regs *, unsigned long);
-asmlinkage void do_nmi (struct pt_regs *, unsigned long);
-asmlinkage void do_int3 (struct pt_regs *, unsigned long);
-asmlinkage void do_overflow (struct pt_regs *, unsigned long);
-asmlinkage void do_bounds (struct pt_regs *, unsigned long);
asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
-asmlinkage void do_general_protection (struct pt_regs *, unsigned long);

extern int pentium_f00f_bug;

-static int handle_intx_eip_adjust(struct pt_regs *regs)
-{
- unsigned char *addr, *csp = 0;
- int wrap = 0;
- int count = 8; /* only check for reasonable number of bytes
- * else we do it the save 'simple way' */
- unsigned long _eip;
-#define XX_WRAP(x) (wrap ? *((unsigned short *)&x) : x)
-
- /* We rely on being able to access the memory pointed to by cs:eip
- * and the bytes behind it up to the faulting instruction,
- * because we just got an exception for this instruction and
- * hence the memory should just be successfully accessed.
- * In case of crossing a page boundary or when accessing kernel space
- * we just do the simple fix (increase eip by one).
- * This assumption also obsoletes checking of segment limit.
- * ( should be veryfied, however, if this assumption is true )
- */
-
- if (regs->cs == KERNEL_CS) {
- /* not what we expect */
- regs->eip++;
- return 0;
- }
-
- if (regs->eflags & VM_MASK) {
- /* we have real mode type selector */
- wrap = 1;
- csp = (unsigned char *)((unsigned long)regs->cs << 4);
- }
- else if (regs->cs & 4) {
- /* we have a LDT selector */
- struct desc_struct *p, *ldt = current->ldt;
- if (!ldt)
- ldt = (struct desc_struct*) &default_ldt;
- p = ldt + (regs->cs >> 3);
- csp = (unsigned char *)((p->a >> 16) | ((p->b & 0xff) << 16) | (p->b & 0xFF000000));
- if (!(p->b & 0x400000))
- wrap = 1; /* 16-bit segment */
- }
-
- _eip = regs->eip;
- addr = csp+XX_WRAP(_eip);
- while (count-- > 0) {
- if ((unsigned long)addr >= TASK_SIZE) {
- /* accessing kernel space, do the simple case */
- regs->eip++;
- return 0;
- }
- switch (get_user(addr)) {
-
- case 0xCC: /* single byte INT3 */
- XX_WRAP(_eip)++;
- regs->eip = _eip;
- return 0;
-
- case 0xCD: /* two byte INT 3 */
- XX_WRAP(_eip)++;
- /* fall through */
- case 0xCE: /* INTO, single byte */
- XX_WRAP(_eip)++;
- if ( (regs->eflags & VM_MASK)
- && ((regs->eflags & IOPL_MASK) != IOPL_MASK)) {
- /* not allowed, do GP0 fault */
- do_general_protection(regs, 0);
- return -1;
- }
- regs->eip = _eip;
- return 0;
-
- /* the prefixes from the Intel patch */
- case 0xF2 ... 0xF3:
- case 0x2E:
- case 0x36:
- case 0x3E:
- case 0x26:
- case 0x64 ... 0x67:
- break; /* just skipping them */
-
- default:
- /* not what we handle here,
- * just doing the simple fix
- */
- regs->eip++;
- return 0;
- }
-
- if ( !(++XX_WRAP(_eip)) ) {
- /* we wrapped around */
- regs->eip++;
- return 0;
- }
-
- addr = csp+XX_WRAP(_eip);
- if ( !((unsigned long)addr & ~(PAGE_SIZE -1)) ) {
- /* we would cross page boundary, not good,
- * doing the simple fix
- */
- regs->eip++;
- return 0;
- }
- }
-
- /* if we come here something weird happened,
- * just doing the simple fix
- */
- regs->eip++;
- return 0;
-}
-
-
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -248,23 +130,11 @@

nr = (address - TASK_SIZE - (unsigned long) idt) >> 3;

- if (nr < 7) {
- static void (*handler[])(struct pt_regs *, unsigned long) = {
- do_divide_error, /* 0 - divide overflow */
- do_debug, /* 1 - debug trap */
- do_nmi, /* 2 - NMI */
- do_int3, /* 3 - int 3 */
- do_overflow, /* 4 - overflow */
- do_bounds, /* 5 - bound range */
- do_invalid_op }; /* 6 - invalid opcode */
- if ((nr == 3) || (nr == 4))
- if (handle_intx_eip_adjust(regs))
- return;
- handler[nr](regs, error_code);
+ if (nr == 6) {
+ do_invalid_op(regs, 0);
return;
}
}
-

/*
* Oops. The kernel tried to access some bad page. We'll have to