Re: Possible workaround for buggy E801 call in 2.2.x

david parsons (orc@pell.portland.or.us)
23 Dec 1999 22:03:25 -0800


In article <linux.kernel.001501bf4bd3$4fca3da0$13f938ca@home-computer>,
Prashant TR <prashant_tr@yahoo.com> wrote:
>> One of the FIC slot-1 motherboards of recent vintage has exactly
>> that (the VB-601; I can't say the vintage of the bios, because
>> all the VB-601's I administer are parked about 4 miles north of
>> me right now.) That's what provoked the e820 implementation in
>> the first place -- e801 would tell me I had half a gig, 88 would
>> tell me 64mb, and e820 returns a memory map containing the correct
>> 128mb of system ram.
>
>If the E820 returns the memory addresses and if they happen to be
>in increasing order without an overlap,

If it returns memory addresses at all, it's working; personally,
I don't care if any type >4 memory segments overlap anything else,
because we only care about type 1(ram) and 3(nvs reclaim, though
there are reports that you can't reclaim that memory.)

Looking for increasing order is a convenience that I used in early
(and rejected by a certain L. Torvalds) version of the patch. It's
incorrect; I have a FIC <something>-11 motherboard that returns
type 1 in ascending order, but intermingles those segments with type
2-4 segments in DESCENDING order.

>If we were talking of having any options disabled by default, then I
>guess it should be the E801 call (considering the frequency of errors
>of E820 and E801).

I suspect that in cases where there's no e820 (thus meaning don't
restrict e820 past the point where the spec says) that 801 will
work, due to Microsoft.

If anyone is interested in actually trying the memory detect side
of the patch Nathan and I have been merging together, here it is
-- I'd like to find machines that continue fail with this version of
the patch over the published one.

____
david parsons \bi/ works for me (tm). Bummer binfmt_aout is in ``I hate
\/ you!!' mode.

--- ./include/asm-i386/e820.h.orig Thu Nov 18 19:25:28 1999
+++ ./include/asm-i386/e820.h Sun Dec 19 23:54:48 1999
@@ -5,16 +5,17 @@
* In a nutshell, arch/i386/boot/setup.S populates a scratch table
* in the empty_zero_block that contains a list of usable address/size
* duples. In arch/i386/kernel/setup.c, this information is
- * transferred into the e820map, and in arch/i386/mm/init.c, that
- * new information is used to mark pages reserved or not.
+ * transferred into the bios[], then converted into a list of valid
+ * memory regions in region[], and that new information is used in
+ * arch/i386/mm/init.c to mark pages available or not.
*
*/
#ifndef __E820_HEADER
#define __E820_HEADER

-#define E820MAP 0x2d0 /* our map */
-#define E820MAX 32 /* number of entries in E820MAP */
-#define E820NR 0x1e8 /* # entries in E820MAP */
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 32 /* number of entries in E820MAP */
+#define E820NR 0x1e8 /* # entries in E820MAP */

#define E820_RAM 1
#define E820_RESERVED 2
@@ -23,18 +24,35 @@

#define HIGH_MEMORY (1024*1024)

+/* HACK: These macros map between page numbers and physical addresses.
+ * They used to be in arch/i386/kernel/setup.c, but have been moved here
+ * so that they can also be used in arch/i386/mm/init.c.
+ */
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
+
#ifndef __ASSEMBLY__

-struct e820map {
- int nr_map;
- struct {
- long long addr; /* start of memory segment */
- long long size; /* size of memory segment */
- long type; /* type of memory segment */
- } map[E820MAX];
-};
+struct e820 {
+ __u64 addr; /* addr,size,type are returned by e820 bios call */
+ __u64 size;
+ __u32 type;
+} ;
+
+struct physical_region {
+ __u64 start;
+ __u64 end;
+} ;
+
+struct physical_memory {
+ int nr_bios; /* bios[] holds memory region */
+ int nr_region; /* region[] holds valid mem */
+ struct e820 bios[E820MAX]; /* returned by the bios */
+ struct physical_region region[E820MAX]; /* that we want to allocate */
+} ;

-extern struct e820map e820;
+extern struct physical_memory physical_memory;
#endif/*!__ASSEMBLY__*/

#endif/*__E820_HEADER*/
--- ./arch/i386/boot/setup.S.orig Fri Dec 3 20:37:07 1999
+++ ./arch/i386/boot/setup.S Mon Dec 20 00:13:51 1999
@@ -27,6 +27,9 @@
* Video handling moved to video.S by Martin Mares, March 1996
* <mj@k332.feld.cvut.cz>
*
+ * Extended memory detection made more paranoid by orc@pell.chi.il.us (david
+ * parsons) and Nathan Zook (nathan.zook@amd.com), December 1999.
+ *
* Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
* parsons) to avoid loadlin confusion, July 1997
*
@@ -255,91 +258,167 @@
loader_panic_mess: .string "Wrong loader, giving up..."

loader_ok:
-# Get memory size (extended mem, kB)
+/* MEMORY DETECTION CODE */

xorl %eax, %eax
movl %eax, (0x1e0)
-#ifndef STANDARD_MEMORY_BIOS_CALL
movb %al, (E820NR)
-# Try three different memory detection schemes. First, try
-# e820h, which lets us assemble a memory map, then try e801h,
-# which returns a 32-bit memory size, and finally 88h, which
-# returns 0-64m
-
-# method E820H:
-# the memory map from hell. e820h returns memory classified into
-# a whole bunch of different types, and allows memory holes and
-# everything. We scan through this memory map and build a list
-# of the first 32 memory areas, which we return at [E820MAP].
-#
+
+/*
+ * Try three different memory detection schemes. First, try
+ * e820h, which lets us assemble a memory map, then try e801h,
+ * which returns a 32-bit memory size, and finally 88h, which
+ * returns 0-64m
+ */
+
+#if CONFIG_MEM_E820
+
+/*
+ * method E820H:
+ * the memory map from hell. e820h returns memory classified into
+ * a whole bunch of different types, and allows memory holes and
+ * everything. We scan through this memory map and build a list
+ * of the first E820MAX memory areas, which we return at [E820MAP].
+ *
+ * sanity checking: There are two levels of sanity checking here,
+ * depending on how much you trust the bios. The permissive level
+ * merely drops out of the memory detection loop when carry is set
+ * (signifying either no such function or finished) or %ebx == 0
+ * (end of memory map). The e820 map is abandoned if %ecx != 20 (we
+ * want 20 bytes, so if we don't get 20 bytes something is wrong)
+ * or %eax != 'SMAP' (the call is supposed to copy %edx over to %eax.)
+ *
+ * The paranoid level of sanity checking also saves ds over the call,
+ * in case this is some malicious bios the eats registers that it's
+ * not supposed to, and it abandons the e820 map if es:di change over
+ * the course of the call (the memory buffer is passed in via es:di,
+ * and should not be randomly shuffled around)
+ */

meme820:
- movl $0x534d4150, %edx # ascii `SMAP'
- xorl %ebx, %ebx # continuation counter
- movw $E820MAP, %di # point into the whitelist
- # so we can have the bios
- # directly write into it.
+ xorl %ebx, %ebx # continuation counter

+ pushw %ds # es:di points at the whitelist
+ popw %es # so that the call will write
+ movw $E820MAP, %di # directly into it.
jmpe820:
- movl $0x0000e820, %eax # e820, upper word zeroed
- movl $20, %ecx # size of the e820rec
- pushw %ds # data record.
- popw %es
- int $0x15 # make the call
- jc bail820 # fall to e801 if it fails
-
- cmpl $0x534d4150, %eax # check the return is `SMAP'
- jne bail820 # fall to e801 if it fails
-
-# cmpl $1, 16(%di) # is this usable memory?
-# jne again820
-
- # If this is usable memory, we save it by simply advancing %di by
- # sizeof(e820rec).
- #
-good820:
- movb (E820NR), %al # up to 32 entries
- cmpb $E820MAX, %al
- jnl bail820
+ movl $0x534d4150, %edx # ascii `SMAP'
+ movl $0x0000e820, %eax # e820, upper word zeroed
+ movl $20, %ecx # 20 bytes is what we want.
+
+#ifdef CONFIG_E820_PARANOID
+ pushw %ds
+ pushw %di # save %di for later examination
+#endif
+ int $0x15 # make the call
+#ifdef CONFIG_E820_PARANOID
+ popw %dx # retrieve %di into %dx
+ popw %ds
+#endif
+ jc fin820 # fall to e801 if it fails
+
+/*
+ * do some sanity checking:
+ *
+ * check to see if the SMAP moved over to %eax, like
+ * G-d himself intended.
+ */
+ cmpl $0x534d4150, %eax # did the bios move for you?
+ je chksiz820
+
+abort820:
+ xor %ax, %ax
+ movb %al, (E820NR)
+ jmp fin820
+
+chksiz820:
+/*
+ * check that %ecx == 20.
+ */
+ cmpl $20, %ecx
+ jne abort820
+

- incb (E820NR)
+#if CONFIG_E820_PARANOID
+/*
+ * Then check that es:di is still the same.
+ * do this AFTER the SMAP test, because we need %eax as a scratch reg.
+ */
movw %di, %ax
+ cmp %ax, %dx
+ jne abort820
+
+/*
+ * If es:di have mysteriously moved, we can immediately write off
+ * e820 as fatally flawed
+ */
+ movw %es, %ax # how about %es?
+ movw %ds, %dx # it should be == %ds
+ cmpw %ax, %dx
+ jne abort820 # or it's the apocolypse
+#endif
+
+/*
+ * If this is usable memory, we save it by simply advancing %di by
+ * sizeof(e820rec).
+ */
+ movb (E820NR), %al # If the table gets too large
+ cmpb $E820MAX, %al # we must flee
+ jnl fin820
+
+ incb (E820NR) # up the # of records
+ movw %di, %ax # advance to next record
addw $20, %ax
movw %ax, %di
again820:
- cmpl $0, %ebx # check to see if
- jne jmpe820 # %ebx is set to EOF
-bail820:
-
+ cmp $0, %ebx # check to see if
+ jne jmpe820 # %ebx is set to EOF
+fin820:
+#endif

-# method E801H:
-# memory size is in 1k chunksizes, to avoid confusing loadlin.
-# we store the 0xe801 memory size in a completely different place,
-# because it will most likely be longer than 16 bits.
-# (use 1e0 because that's what Larry Augustine uses in his
-# alternative new memory detection scheme, and it's sensible
-# to write everything into the same place.)
+#if CONFIG_MEM_E801
+/*
+ * method E801H:
+ * memory size is in 1k chunksizes, to avoid confusing loadlin.
+ * we store the 0xe801 memory size in a completely different place,
+ * because it will most likely be longer than 16 bits.
+ * (use 1e0 because that's what Larry Augustine uses in his
+ * alternative new memory detection scheme, and it's sensible
+ * to write everything into the same place.)
+ *
+ * This scheme does not write to a memory map, but returns an
+ * accumulator containing all memory from 1-16mb + all memory from
+ * 16->up mb. If a memory hole exists, all memory above that hole
+ * is ignored.
+ */

meme801:
movw $0xe801, %ax
int $0x15
- jc mem88
+ jc fine801

+ andl $0xffff, %ecx # clear sign extend
+ movl %ecx, (0x1e0) # store 1kb between 1-16mb
+ cmpl $0x3C00, %ecx # if 1-16mb region isn't 15mb
+ jne fine801 # there's a memory hole and
+ # memory above the hole can't
+ # be used.
andl $0xffff, %edx # clear sign extend
shll $6, %edx # and go from 64k to 1k chunks
- movl %edx, (0x1e0) # store extended memory size
- andl $0xffff, %ecx # clear sign extend
- addl %ecx, (0x1e0) # and add lower memory into
- # total size.
-
-# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
-# 64mb, depending on the bios) in ax.
-mem88:
-
+ addl %edx, (0x1e0) # add high memory size.
+fine801:
+
#endif
+
+/*
+ * Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
+ * 64mb, depending on the bios) in ax.
+ */
movb $0x88, %ah
int $0x15
movw %ax, (2)
+
+/* END OF MEMORY DETECTION CODE */

# Set the keyboard repeat rate to the max
movw $0x0305, %ax
--- ./arch/i386/mm/init.c.orig Mon Dec 13 15:39:23 1999
+++ ./arch/i386/mm/init.c Sun Dec 19 18:07:14 1999
@@ -523,21 +523,11 @@
{
int i;

- for (i = 0; i < e820.nr_map; i++) {
- unsigned long addr, end;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
+ for (i = 0; i < physical_memory.nr_region; i++)
+ if (pagenr >= PFN_UP(physical_memory.region[i].start)
+ && pagenr < PFN_DOWN(physical_memory.region[i].end) )
return 1;
- }
+
return 0;
}

--- ./arch/i386/kernel/setup.c.orig Tue Dec 7 23:01:40 1999
+++ ./arch/i386/kernel/setup.c Sun Dec 19 23:53:28 1999
@@ -102,7 +102,7 @@
unsigned char table[0];
};

-struct e820map e820 = { 0 };
+struct physical_memory physical_memory = { 0,0 };

unsigned char aux_device_present;

@@ -376,24 +376,43 @@
ret <<= 20;
(*retptr)++;
}
+ else if (**retptr == 'G' || **retptr == 'g') {
+ ret <<= 30;
+ (*retptr)++;
+ }
return ret;
} /* memparse */


-void __init add_memory_region(unsigned long start,
- unsigned long size, int type)
+void __init add_memory_region(__u64 start, __u64 size, __u32 type)
{
- int x = e820.nr_map;
+ int x = physical_memory.nr_region;

if (x == E820MAX) {
- printk("Ooops! Too many entries in the memory map!\n");
- return;
+ printk("Ooops! Too many entries in the memory map!\n");
+ return;
}

- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
+#if CONFIG_E820_PARANOID
+ /* HACK: don't allocate anything above 4gb? */
+ if (start+size > 0x100000000ULL)
+ return;
+#endif
+
+#if CONFIG_E820_PERMISSIVE == 0
+ if (start >= 0xA0000 && start < 0x10000)
+ return;
+#endif
+
+ /* don't bother to allocate non-ram regions or regions that
+ * are less than a page long
+ */
+ if ( type != E820_RAM || PFN_UP(start) >= PFN_DOWN(start+size) )
+ return;
+
+ physical_memory.region[x].start = start;
+ physical_memory.region[x].end = start+size;
+ physical_memory.nr_region++;
} /* add_memory_region */


@@ -406,9 +425,8 @@
void __init setup_memory_region(void)
{
#define E820_DEBUG 1
-#ifdef E820_DEBUG
+
int i;
-#endif

/*
* If we're lucky and live on a modern system, the setup code
@@ -418,22 +436,25 @@
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
+ * regions: one from 0 to 640k, and one from 1mb up.
*/
- if (E820_MAP_NR > 1) {
+ if ( (E820_MAP_NR > 1) && (E820_MAP_NR < E820MAX) ) {
/* got a memory map; copy it into a safe place.
*/
- e820.nr_map = E820_MAP_NR;
- if (e820.nr_map > E820MAX)
- e820.nr_map = E820MAX;
- memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]);
+ physical_memory.nr_bios = E820_MAP_NR;
+ memcpy(&(physical_memory.bios), E820_MAP,
+ E820_MAP_NR * sizeof physical_memory.bios[0]);
+ for (i=0; i < E820_MAP_NR; i++) {
+ add_memory_region(physical_memory.bios[i].addr,
+ physical_memory.bios[i].size,
+ physical_memory.bios[i].type);
#ifdef E820_DEBUG
- for (i=0; i < e820.nr_map; i++) {
- printk("e820: %08x @ %08x ", (int)e820.map[i].size,
- (int)e820.map[i].addr);
- switch (e820.map[i].type) {
+ printk("SMAP: %010Lx - %010Lx ",
+ physical_memory.bios[i].addr,
+ physical_memory.bios[i].addr
+ + physical_memory.bios[i].size);
+
+ switch (physical_memory.bios[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
@@ -445,11 +466,13 @@
case E820_NVS:
printk("(ACPI NVS)\n");
break;
- default: printk("type %lu\n", e820.map[i].type);
+ default:
+ printk("(type %lu)\n",
+ (unsigned long)physical_memory.bios[i].type);
break;
}
- }
#endif
+ }
}
else {
/* otherwise fake a memory map; one section from 0k->640k,
@@ -501,7 +524,7 @@
* and reinitialize it with the
* standard low-memory region.
*/
- e820.nr_map = 0;
+ physical_memory.nr_region = 0;
usermem = 1;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
}
@@ -569,10 +592,6 @@

parse_mem_cmdline(cmdline_p);

-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-
/*
* 128MB for vmalloc and initrd
*/
@@ -591,12 +610,9 @@
* Find the highest page frame number we have available
*/
max_pfn = 0;
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < physical_memory.nr_region; i++) {
unsigned long curr_pfn;
- /* RAM? */
- if (e820.map[i].type != E820_RAM)
- continue;
- curr_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ curr_pfn = PFN_DOWN(physical_memory.region[i].end);
if (curr_pfn > max_pfn)
max_pfn = curr_pfn;
}
@@ -642,23 +658,19 @@
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < physical_memory.nr_region; i++) {
unsigned long curr_pfn, last_pfn, size;
- /*
- * Reserve usable low memory
- */
- if (e820.map[i].type != E820_RAM)
- continue;
+
/*
* We are rounding up the start address of usable memory:
*/
- curr_pfn = PFN_UP(e820.map[i].addr);
+ curr_pfn = PFN_UP(physical_memory.region[i].start);
if (curr_pfn >= max_low_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ last_pfn = PFN_DOWN(physical_memory.region[i].end);

if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
@@ -728,30 +740,41 @@
* and also for regions reported as reserved by the e820.
*/
probe_roms();
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < physical_memory.nr_region; i++) {
struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
+ res->start = physical_memory.region[i].start;
+ res->end = physical_memory.region[i].end-1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->name = "System RAM";
request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
+ if (physical_memory.bios[i].type == E820_RAM) {
/*
- * We dont't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
+ * We dont't know which RAM region contains the
+ * kernel data, so we try it repeatedly and let
+ * the resource manager test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
+ }
+ for (i = 0; i < physical_memory.nr_bios; i++) {
+ struct resource *res;
+ if (physical_memory.bios[i].addr + physical_memory.bios[i].size > 0x100000000ULL)
+ continue;
+ if (physical_memory.bios[i].type == E820_RAM)
+ continue;
+
+ res = alloc_bootmem_low(sizeof(struct resource));
+ switch (physical_memory.bios[i].type) {
+ case E820_ACPI: res->name = "ACPI Tables"; break;
+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
+ default: res->name = "reserved";
+ }
+ res->start = physical_memory.bios[i].addr;
+ res->end = res->start + physical_memory.bios[i].size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ request_resource(&iomem_resource, res);
}
request_resource(&iomem_resource, &vram_resource);

--- ./arch/i386/config.in.orig Mon Dec 20 00:10:57 1999
+++ ./arch/i386/config.in Mon Dec 20 00:17:11 1999
@@ -54,6 +54,12 @@
define_bool CONFIG_X86_PAE y
fi

+bool 'Use bios call e820 to detect memory' CONFIG_MEM_E820
+if [ "$CONFIG_MEM_E820" = "y" ]; then
+ bool 'Be extra cautious with bios call e820' CONFIG_MEM_PARANOID
+fi
+bool 'Use bios call e801 to detect memory' CONFIG_MEM_E801
+
bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
--- ./arch/i386/defconfig.orig Mon Dec 20 00:19:37 1999
+++ ./arch/i386/defconfig Mon Dec 20 00:20:00 1999
@@ -25,6 +25,9 @@
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
CONFIG_NOHIGHMEM=y
+CONFIG_MEM_E820=y
+CONFIG_MEM_PARANOID=y
+CONFIG_MEM_E801=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
# CONFIG_MATH_EMULATION is not set

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/