NexGen kernel/config patch

Nathan Myers (ncm@cantrip.org)
Fri, 05 Apr 1996 18:37:24 -0800


This is a patch against kernel 1.3.83. It is intended to
improve the performance of NexGen Nx586-based systems.

Notes:
1. I assume that the code in head.S is supposed to do its
best to identify the CPU it's running on, regardless of what
CPU the kernel was compiled for, and not trap.

2. The performance improvement comes from eliminating the
"invlpg" instruction, which NexGen claims is unnecessary
on their chip. (And it traps if you try to execute it.)
[It might be better to configure tlb_flush_page at runtime,
as is done on the SPARC port, than to set it according to
the compile mode.]

3. With CONFIG_MNEXGEN we still compile -m386 and -DCPU=386

4. Most of this stuff is just for config; the only real change in
code executed after boot is at the very end.

[Am I doing the right thing with this patch, to get it folded into
the mainline sources? If not, what should I do differently?]

Nathan Myers
ncm@cantrip.org

-----------------------------
diff -u -r lin1/Documentation/Configure.help lin2/Documentation/Configure.help
--- lin1/Documentation/Configure.help Fri Apr 5 16:43:03 1996
+++ lin2/Documentation/Configure.help Fri Apr 5 16:41:18 1996
@@ -586,13 +586,13 @@
This is the processor type of your CPU. It is used for optimizing
purposes. In order to compile a kernel that can run on all CPU types
(albeit not optimally fast), you can specify "386" here. If you
- specify "486" or "Pentium" or "PPro", then the kernel will run on
- 486 and Pentium (=586) and Pentium Pro (=686) CPUs. In rare cases,
- it can make sense to specify "Pentium" even if running a 486: the
- kernel will be smaller but slower. On the other hand, if you use a
- compiler before gcc 2.7 (say "gcc -v" to find out), then you have to
- say "386" or "486" here even if running on a Pentium or PPro
- machine. If you don't know what to do, say "386".
+ specify "486", "Pentium", "PPro", or "Nx586" then the kernel will
+ run on 486, Pentium (=586), Pentium Pro (=686), and NexGen CPUs
+ (respectively). In rare cases, it can make sense to specify "Pentium"
+ even if running a 486: the kernel will be smaller but slower. On the
+ other hand, if you use a compiler before gcc 2.7 (say "gcc -v" to
+ find out), then you have to say "386" or "486" here even if running
+ on a Pentium or PPro machine. If you don't know what to do, say "386".

Compile the kernel into the ELF object format
CONFIG_ELF_KERNEL
Only in lin2/Documentation: Configure.help.orig
diff -u -r lin1/arch/i386/Makefile lin2/arch/i386/Makefile
--- lin1/arch/i386/Makefile Thu Apr 4 19:17:05 1996
+++ lin2/arch/i386/Makefile Thu Apr 4 19:26:36 1996
@@ -76,6 +76,10 @@
CFLAGS := $(CFLAGS) -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2 -DCPU=686
endif

+ifdef CONFIG_MNEXGEN
+CFLAGS := $(CFLAGS) -m386 -DCPU=386
+endif
+
ifdef SMP
CFLAGS := $(CFLAGS) -D__SMP__
endif
diff -u -r lin1/arch/i386/config.in lin2/arch/i386/config.in
--- lin1/arch/i386/config.in Thu Apr 4 19:16:23 1996
+++ lin2/arch/i386/config.in Thu Apr 4 20:28:08 1996
@@ -33,7 +33,8 @@
"386 CONFIG_M386 \
486 CONFIG_M486 \
Pentium CONFIG_M586 \
- PPro CONFIG_M686" Pentium
+ PPro CONFIG_M686 \
+ Nx586 CONFIG_MNEXGEN" Pentium
endmenu

source drivers/block/Config.in
diff -u -r lin1/arch/i386/defconfig lin2/arch/i386/defconfig
--- lin1/arch/i386/defconfig Fri Apr 5 16:44:17 1996
+++ lin2/arch/i386/defconfig Fri Apr 5 16:51:00 1996
@@ -25,6 +25,7 @@
# CONFIG_M486 is not set
CONFIG_M586=y
# CONFIG_M686 is not set
+# CONFIG_MNEXGEN is not set

#
# Floppy, IDE, and other block devices
diff -u -r lin1/arch/i386/kernel/head.S lin2/arch/i386/kernel/head.S
--- lin1/arch/i386/kernel/head.S Thu Apr 4 19:21:02 1996
+++ lin2/arch/i386/kernel/head.S Fri Apr 5 15:32:39 1996
@@ -103,11 +103,9 @@
checkCPUtype:
#endif

-/* check if it is 486 or 386. */
+/* check if it is 386, 486, or better. */
/*
- * XXX - this does a lot of unnecessary setup. Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
+ * XXX - this does unnecessary cleanup: we don't need to preserve eflags.
*/
movl $3, SYMBOL_NAME(x86)
pushfl # push EFLAGS
@@ -120,8 +118,14 @@
popl %eax # put it in eax
xorl %ecx,%eax # change in flags
andl $0x40000,%eax # check if AC bit changed
- je is386
- movl $4,SYMBOL_NAME(x86)
+ jne not386
+ movw $0x5555,%ax # a non-zero value
+ xorw %dx,%dx # set ZF=1
+ movw $2,%bx # (clobber eax, ebx, edx.)
+ divw %bx # i386 div clears ZF, Nx586 leaves it set
+ jnz is386
+ jmp isnew
+not386: incl SYMBOL_NAME(x86)
movl %ecx,%eax
xorl $0x200000,%eax # check ID flag
pushl %eax
diff -u -r lin1/include/asm-i386/bugs.h lin2/include/asm-i386/bugs.h
--- lin1/include/asm-i386/bugs.h Thu Apr 4 19:14:53 1996
+++ lin2/include/asm-i386/bugs.h Thu Apr 4 19:42:37 1996
@@ -112,7 +112,13 @@

static void check_tlb(void)
{
-#ifndef CONFIG_M386
+#ifdef CONFIG_MNEXGEN
+ if (memcmp(x86_vendor_id,"NexGenDriven",12) != 0) {
+ printk("This kernel was compiled for Nx586, and it\n"
+ " depends on Nx586 MMU enhancements. Giving up.\n");
+ for (;;) ;
+ }
+#elif !defined(CONFIG_M386)
/*
* The 386 chips don't support TLB finegrained invalidation.
* They will fault when they hit a invlpg instruction.
diff -u -r lin1/include/asm-i386/byteorder.h lin2/include/asm-i386/byteorder.h
--- lin1/include/asm-i386/byteorder.h Thu Apr 4 19:21:40 1996
+++ lin2/include/asm-i386/byteorder.h Thu Apr 4 19:15:07 1996
@@ -32,7 +32,7 @@
extern __inline__ unsigned long int
__ntohl(unsigned long int x)
{
-#if defined(__KERNEL__) && !defined(CONFIG_M386)
+#if defined(__KERNEL__) && !defined(CONFIG_M386) && !defined(CONFIG_MNEXGEN)
__asm__("bswap %0" : "=r" (x) : "0" (x));
#else
__asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */
diff -u -r lin1/include/asm-i386/pgtable.h lin2/include/asm-i386/pgtable.h
--- lin1/include/asm-i386/pgtable.h Fri Apr 5 16:45:38 1996
+++ lin2/include/asm-i386/pgtable.h Fri Apr 5 16:47:16 1996
@@ -44,6 +44,8 @@

#ifdef CONFIG_M386
#define __flush_tlb_one(addr) flush_tlb()
+#elif defined(CONFIG_MNEXGEN) /* XXX - || defined(CONFIG_M686) ? */
+#define __flush_tlb_one(addr) /* nothing, not needed on Nx586 */
#else
#define __flush_tlb_one(addr) \
__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))