Note that this is a very partial fix - "int 3" can be generated not only
with the single-byte "int3" instruction, it can be generated with a
two-byte "int $3" instruction too, and there can be a large number of
prefix bytes so that essentially we can't know whether the instruction is
1 or 15 bytes long.
Admittedly, debugging tends to always use the single-byte instruction, and
it may be that this simple fix is thus acceptable.
However, there are other cases too, and many of these are actually handled
in a patch made by Intel. I'm appending the Intel-made patch to plain
2.0.31, which is actually reversed (so you have to use "patch -R -p0" to
apply it. It would be good if somebody would look into trying to integrate
as much as possible of the Intel patch.
Note that the intel patch too is not very good: it doesn't really take
"special" segments into account, which can happen with Wine. That's why
I'd suggest doing it all in C code rather than assembly as the Intel patch
does it - by the time we notice that it's supposed to be a debug trap
we're no longer very timing-critical so there isn't much point in trying
to optimize that path.
Note that intel got a lot of bad publicity here, but they actually did
mostly do the right thing. They had this stupid NDA thing for a few days,
but that's probably some generic Intel policy. So don't beat up on them
too much. They had a bug, but they also had engineers working hard on
making a Linux patch available. It's different from the one we came up
with ourselves, but its more sophisticated in some ways (and more broken
in other ways - you can confuse the Intel patch badly by loading code
segments into the LDT and executing debug instructions from strange places
in non-zero-base segments).
I really hope that there are x86 wizards out there that would be
interested in cleaning up the current 2.0.32-4 patch and the 2.1.64 stuff
to be more complete. I tried to make 2.1.64 a bit easier to work with wrt
this patch.
Note that the way to avoid taking any hit at all for page faults on
non-Pentium processors is to have a _different_ page fault handler in the
original IDT (one that doesn't care about this bug), and then have a
"pentium_bug_page_fault_handler" for the case when we have copied the IDT
to vmalloc'ed memory.
Linus
----- Intel reversed patch to 2.0.x -----
diff -r -u f00f_fix/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- f00f_fix/arch/i386/kernel/entry.S Fri Nov 14 08:10:00 1997
+++ linux/arch/i386/kernel/entry.S Fri Nov 14 09:14:22 1997
@@ -389,7 +389,6 @@
decl %eax # eax = -1
pushl %ecx
pushl %ebx
-error_code1:
cld
xorl %ebx,%ebx # zero ebx
xchgl %eax, ORIG_EAX(%esp) # orig_eax (get the error code. )
@@ -502,169 +501,7 @@
ENTRY(page_fault)
pushl $ SYMBOL_NAME(do_page_fault)
- push %fs
- push %es
- push %ds
- pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- movl $(KERNEL_DS),%edx
- mov %dx,%ds
- movl %cr2,%eax
- movl %eax,%ecx
- movl SYMBOL_NAME(idt),%ebx
- orl $0xc0000000,%ebx
- andl $0xfffff000,%ebx
- andl $0xfffff000,%eax
- cmpl %eax,%ebx
- jz trap_perrata
-go_error:
- xorl %eax,%eax
- decl %eax # eax = -1
- jmp error_code1
-
- # This is the pentium locked cmpxchg bug
-trap_perrata:
- #
- # check for user access to IDT
- #
- movb 44(%esp),%al
- andb $4,%al
- jnz go_error # user access, do page fault
-
- movl SYMBOL_NAME(idt),%eax
- orl $0xc0000000,%eax
- subl %eax,%ecx
- shrl $3,%ecx # ivt vector number
- shll $2,%ecx # offset into vector table
- movl %ecx,%ebx # save it for later
- movl ivt_handlers(%ebx),%eax
- movl %eax,40(%esp)
- #
- # skip all prefix bytes preceeding the faulting instruction
- #
- movl $(USER_DS),%edx
- mov %dx,%fs
- movl 48(%esp),%eax
- call skip_prefix
- #
- # check for s/w int opcode, if called from user and interrupt
- # is not 3, 4 or 5 then dispatch the fault to the GP fault handler
- #
- cmpb $0xcd,%fs:(%eax)
- je 2f # int instr. with immediate data
- cmpb $0xce,%fs:(%eax)
- jne 4f # not INT n or INTO
-2:
- #
- # check for user/V86 mode access
- #
- testl $0x20000,56(%esp) # check VM flag
- jz prot_mode
- cmpb $0xcc,%fs:(%eax) # is it and INT3 instr?
- je 5f
- movl 56(%esp),%ecx
- andl $0x3000,%ecx # check IOPL
- cmpl $0x3000,%ecx
- je 7f
- jmp gp_fault
-
-prot_mode:
- cmpw $(KERNEL_CS),52(%esp) # check if fault happend in kernel mode
- je 3f
- cmpb $3*4,%bl # check if it was INT3
- je 7f
- cmpb $4*4,%bl # check if it was INTO
- je 7f
- cmpb $5*4,%bl # check if it was BOUNDS
- je 7f
-gp_fault:
- movl $0,44(%esp)
- movl $SYMBOL_NAME(do_general_protection),40(%esp)
- jmp go_error
-
- #
- # Increment EIP as appropriate
- #
-3:
- cmpb $0xce,%fs:(%eax) # check if it was INTO
- je 5f
-7:
- incl %eax
-5:
- incl %eax
- movl %eax,48(%esp) # update EIP
- jmp go_error
-
- #
- # Increment EIP and update EFLAGS as needed
- #
-4:
- cmpb $3*4,%bl # check if DEBUG fault
- je 5f
- cmpb $4*4,%bl # check if INTO fault
- je 5f
- cmpb $1*4,%bl # check if TRACE trap
- je 6f
- cmpb $2*4,%bl # check if NMI
- jne go_error
- cmpb $0xf4,%fs:(%eax) # check for HALT
- jne go_error
-5:
- incl %eax
- movl %eax,48(%esp) # update EIP
-6:
- andl $~0x10000,56(%esp) # clear RF flag
- jmp go_error
-
-#
-# Examine the code space and increment the EIP
-# past any prefix bytes that might be present
-#
-skip_prefix:
- xorl %ecx, %ecx
- movb %fs:(%eax), %cl
- cmpb $0xF2, %cl
- jne 1f
- incl %eax
- jmp skip_prefix
-1:
- cmpb $0xF3, %cl
- jne 2f
- incl %eax
- jmp skip_prefix
-2:
- cmpb $0x2E, %cl
- jne 3f
- incl %eax
- jmp skip_prefix
-3:
- cmpb $0x36, %cl
- jne 4f
- incl %eax
- jmp skip_prefix
-4:
- cmpb $0x3E, %cl
- jne 5f
- incl %eax
- jmp skip_prefix
-5:
- cmpb $0x26, %cl
- jne 6f
- incl %eax
- jmp skip_prefix
-6:
- andb $0xFC, %cl /* prefix 64, 65, 66, 67 */
- cmpb $0x64, %cl
- jne 7f
- incl %eax
- jmp skip_prefix
-7:
- ret /* no more prefix bytes */
+ jmp error_code
ENTRY(spurious_interrupt_bug)
pushl $0
@@ -840,12 +677,3 @@
.long 0,0
.long SYMBOL_NAME(sys_vm86)
.space (NR_syscalls-166)*4
-
- ivt_handlers:
- .long SYMBOL_NAME(do_divide_error)
- .long SYMBOL_NAME(do_debug)
- .long SYMBOL_NAME(do_nmi)
- .long SYMBOL_NAME(do_int3)
- .long SYMBOL_NAME(do_overflow)
- .long SYMBOL_NAME(do_bounds)
- .long SYMBOL_NAME(do_invalid_op)
diff -r -u f00f_fix/arch/i386/kernel/head.S linux/arch/i386/kernel/head.S
--- f00f_fix/arch/i386/kernel/head.S Fri Nov 14 08:10:01 1997
+++ linux/arch/i386/kernel/head.S Fri Nov 14 09:14:08 1997
@@ -260,7 +260,7 @@
movw %dx,%ax /* selector = 0x0010 = cs */
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
- lea SYMBOL_NAME(idt_table),%edi
+ lea SYMBOL_NAME(idt),%edi
mov $256,%ecx
rp_sidt:
movl %eax,(%edi)
@@ -376,12 +376,9 @@
.word 0
idt_descr:
.word 256*8-1 # idt contains 256 entries
- .long 0xc0000000+SYMBOL_NAME(idt_table)
+ .long 0xc0000000+SYMBOL_NAME(idt)
ENTRY(idt)
- .long SYMBOL_NAME(idt_table)
-
-ENTRY(idt_table)
.fill 256,8,0 # idt is uninitialized
ALIGN
diff -r -u f00f_fix/arch/i386/mm/init.c linux/arch/i386/mm/init.c
--- f00f_fix/arch/i386/mm/init.c Fri Nov 14 08:10:34 1997
+++ linux/arch/i386/mm/init.c Fri Nov 14 09:14:41 1997
@@ -281,7 +281,6 @@
if (wp_works_ok < 0)
wp_works_ok = 0;
}
- fix_illop();
return;
}
@@ -304,49 +303,5 @@
}
val->totalram <<= PAGE_SHIFT;
val->sharedram <<= PAGE_SHIFT;
- return;
-}
-
-#include <linux/malloc.h>
-
- void
-fix_illop(void)
-{
- extern desc_table *idt, idt_table[];
- extern void bcopy(void *, void *, int);
- short idt_descr[3];
- int *ip;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
-
- idt = (desc_table *)vmalloc(2 * 4096);
- idt = (desc_table *)(PAGE_ALIGN((int)idt + 1) - (7*8));
- printk("New IDT - %x\n", (int)idt);
- bcopy(idt_table, idt, 256*8);
- pgd = pgd_offset(current->mm, 0xc0000000 | (int)idt);
- if (pgd_none(*pgd)) {
- printk("set_idt0:no pgd\n");
- return;
- }
- if (pgd_bad(*pgd)) {
- printk("set_idt0:bad pgd %x\n", pgd_val(*pgd));
- return;
- }
- pmd = pmd_offset(pgd, 0xc0000000 | (int)idt);
- if (pmd_none(*pmd)) {
- printk("set_idt0:no pmd\n");
- return;
- }
- if (pmd_bad(*pmd)) {
- printk("set_idt0:bad pmd %x\n", pmd_val(*pmd));
- return;
- }
- pte = pte_offset(pmd, 0xc0000000 | (int)idt);
- pte->pte &= ~_PAGE_PRESENT;
- ip = (int *)(&idt_descr[1]);
- *ip = 0xc0000000 | (int)idt;
- idt_descr[0] = (256*8) - 1;
- __asm__ __volatile__("\tlidt %0": "=m" (idt_descr));
return;
}
diff -r -u f00f_fix/include/linux/head.h linux/include/linux/head.h
--- f00f_fix/include/linux/head.h Fri Nov 14 08:11:37 1997
+++ linux/include/linux/head.h Fri Nov 14 09:15:01 1997
@@ -3,10 +3,9 @@
typedef struct desc_struct {
unsigned long a,b;
-} desc_table;
+} desc_table[256];
-extern desc_table gdt[256];
-extern desc_table *idt;
+extern desc_table idt,gdt;
#define GDT_NUL 0
#define GDT_CODE 1