This is the patch I posted for 2.3.33, updated for 2.3.35pre5 (one
reject in arch/i386/kernel/setup.c, fixed, and some chasing down of the
CONFIG_ items -- all tested on two machines locally; the 2.3.33 patch
was tested on a small farm of machines ranging from 386sx's to K7s -- I
don't have any Pentium II+'s around, but I do have a Celeron.) This
one might actually get around some of the reported cases where e820
lies and tells you that, no, really, system memory is usable, honest!
____
david parsons \bi/ Only 3 days to crash your machines before the
\/ Christian odometer rolls over!
--- arch/i386/boot/setup.S.orig Fri Dec 3 20:37:07 1999
+++ arch/i386/boot/setup.S Tue Dec 28 05:26:49 1999
@@ -27,6 +27,9 @@
* Video handling moved to video.S by Martin Mares, March 1996
* <mj@k332.feld.cvut.cz>
*
+ * Extended memory detection made more paranoid by orc@pell.chi.il.us (david
+ * parsons) and Nathan Zook (nathan.zook@amd.com), December 1999.
+ *
* Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
* parsons) to avoid loadlin confusion, July 1997
*
@@ -255,91 +258,167 @@
loader_panic_mess: .string "Wrong loader, giving up..."
loader_ok:
-# Get memory size (extended mem, kB)
+/* MEMORY DETECTION CODE */
xorl %eax, %eax
movl %eax, (0x1e0)
-#ifndef STANDARD_MEMORY_BIOS_CALL
movb %al, (E820NR)
-# Try three different memory detection schemes. First, try
-# e820h, which lets us assemble a memory map, then try e801h,
-# which returns a 32-bit memory size, and finally 88h, which
-# returns 0-64m
-
-# method E820H:
-# the memory map from hell. e820h returns memory classified into
-# a whole bunch of different types, and allows memory holes and
-# everything. We scan through this memory map and build a list
-# of the first 32 memory areas, which we return at [E820MAP].
-#
+
+/*
+ * Try three different memory detection schemes. First, try
+ * e820h, which lets us assemble a memory map, then try e801h,
+ * which returns a 32-bit memory size, and finally 88h, which
+ * returns 0-64m
+ */
+
+#if CONFIG_MEM_E820
+
+/*
+ * method E820H:
+ * the memory map from hell. e820h returns memory classified into
+ * a whole bunch of different types, and allows memory holes and
+ * everything. We scan through this memory map and build a list
+ * of the first E820MAX memory areas, which we return at [E820MAP].
+ *
+ * sanity checking: There are two levels of sanity checking here,
+ * depending on how much you trust the bios. The permissive level
+ * merely drops out of the memory detection loop when carry is set
+ * (signifying either no such function or finished) or %ebx == 0
+ * (end of memory map). The e820 map is abandoned if %ecx != 20 (we
+ * want 20 bytes, so if we don't get 20 bytes something is wrong)
+ * or %eax != 'SMAP' (the call is supposed to copy %edx over to %eax.)
+ *
+ * The paranoid level of sanity checking also saves ds over the call,
+ * in case this is some malicious bios the eats registers that it's
+ * not supposed to, and it abandons the e820 map if es:di change over
+ * the course of the call (the memory buffer is passed in via es:di,
+ * and should not be randomly shuffled around)
+ */
meme820:
- movl $0x534d4150, %edx # ascii `SMAP'
- xorl %ebx, %ebx # continuation counter
- movw $E820MAP, %di # point into the whitelist
- # so we can have the bios
- # directly write into it.
+ xorl %ebx, %ebx # continuation counter
+ pushw %ds # es:di points at the whitelist
+ popw %es # so that the call will write
+ movw $E820MAP, %di # directly into it.
jmpe820:
- movl $0x0000e820, %eax # e820, upper word zeroed
- movl $20, %ecx # size of the e820rec
- pushw %ds # data record.
- popw %es
- int $0x15 # make the call
- jc bail820 # fall to e801 if it fails
-
- cmpl $0x534d4150, %eax # check the return is `SMAP'
- jne bail820 # fall to e801 if it fails
-
-# cmpl $1, 16(%di) # is this usable memory?
-# jne again820
-
- # If this is usable memory, we save it by simply advancing %di by
- # sizeof(e820rec).
- #
-good820:
- movb (E820NR), %al # up to 32 entries
- cmpb $E820MAX, %al
- jnl bail820
+ movl $0x534d4150, %edx # ascii `SMAP'
+ movl $0x0000e820, %eax # e820, upper word zeroed
+ movl $20, %ecx # 20 bytes is what we want.
+
+#ifdef CONFIG_E820_PARANOID
+ pushw %ds
+ pushw %di # save %di for later examination
+#endif
+ int $0x15 # make the call
+#ifdef CONFIG_E820_PARANOID
+ popw %dx # retrieve %di into %dx
+ popw %ds
+#endif
+ jc fin820 # fall to e801 if it fails
+
+/*
+ * do some sanity checking:
+ *
+ * check to see if the SMAP moved over to %eax, like
+ * G-d himself intended.
+ */
+ cmpl $0x534d4150, %eax # did the bios move for you?
+ je chksiz820
+
+abort820:
+ xor %ax, %ax
+ movb %al, (E820NR)
+ jmp fin820
+
+chksiz820:
+/*
+ * check that %ecx == 20.
+ */
+ cmpl $20, %ecx
+ jne abort820
+
- incb (E820NR)
+#if CONFIG_E820_PARANOID
+/*
+ * Then check that es:di is still the same.
+ * do this AFTER the SMAP test, because we need %eax as a scratch reg.
+ */
movw %di, %ax
+ cmp %ax, %dx
+ jne abort820
+
+/*
+ * If es:di have mysteriously moved, we can immediately write off
+ * e820 as fatally flawed
+ */
+ movw %es, %ax # how about %es?
+ movw %ds, %dx # it should be == %ds
+ cmpw %ax, %dx
+ jne abort820 # or it's the apocolypse
+#endif
+
+/*
+ * If this is usable memory, we save it by simply advancing %di by
+ * sizeof(e820rec).
+ */
+ movb (E820NR), %al # If the table gets too large
+ cmpb $E820MAX, %al # we must flee
+ jnl fin820
+
+ incb (E820NR) # up the # of records
+ movw %di, %ax # advance to next record
addw $20, %ax
movw %ax, %di
again820:
- cmpl $0, %ebx # check to see if
- jne jmpe820 # %ebx is set to EOF
-bail820:
-
+ cmp $0, %ebx # check to see if
+ jne jmpe820 # %ebx is set to EOF
+fin820:
+#endif
-# method E801H:
-# memory size is in 1k chunksizes, to avoid confusing loadlin.
-# we store the 0xe801 memory size in a completely different place,
-# because it will most likely be longer than 16 bits.
-# (use 1e0 because that's what Larry Augustine uses in his
-# alternative new memory detection scheme, and it's sensible
-# to write everything into the same place.)
+#if CONFIG_MEM_E801
+/*
+ * method E801H:
+ * memory size is in 1k chunksizes, to avoid confusing loadlin.
+ * we store the 0xe801 memory size in a completely different place,
+ * because it will most likely be longer than 16 bits.
+ * (use 1e0 because that's what Larry Augustine uses in his
+ * alternative new memory detection scheme, and it's sensible
+ * to write everything into the same place.)
+ *
+ * This scheme does not write to a memory map, but returns an
+ * accumulator containing all memory from 1-16mb + all memory from
+ * 16->up mb. If a memory hole exists, all memory above that hole
+ * is ignored.
+ */
meme801:
movw $0xe801, %ax
int $0x15
- jc mem88
+ jc fine801
+ andl $0xffff, %ecx # clear sign extend
+ movl %ecx, (0x1e0) # store 1kb between 1-16mb
+ cmpl $0x3C00, %ecx # if 1-16mb region isn't 15mb
+ jne fine801 # there's a memory hole and
+ # memory above the hole can't
+ # be used.
andl $0xffff, %edx # clear sign extend
shll $6, %edx # and go from 64k to 1k chunks
- movl %edx, (0x1e0) # store extended memory size
- andl $0xffff, %ecx # clear sign extend
- addl %ecx, (0x1e0) # and add lower memory into
- # total size.
-
-# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
-# 64mb, depending on the bios) in ax.
-mem88:
-
+ addl %edx, (0x1e0) # add high memory size.
+fine801:
+
#endif
+
+/*
+ * Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
+ * 64mb, depending on the bios) in ax.
+ */
movb $0x88, %ah
int $0x15
movw %ax, (2)
+
+/* END OF MEMORY DETECTION CODE */
# Set the keyboard repeat rate to the max
movw $0x0305, %ax
--- arch/i386/mm/init.c.orig Mon Dec 13 15:39:23 1999
+++ arch/i386/mm/init.c Tue Dec 28 05:26:49 1999
@@ -523,21 +523,11 @@
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long addr, end;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
+ for (i = 0; i < physical_memory.nr_region; i++)
+ if (pagenr >= PFN_UP(physical_memory.region[i].start)
+ && pagenr < PFN_DOWN(physical_memory.region[i].end) )
return 1;
- }
+
return 0;
}
--- arch/i386/kernel/setup.c.orig Tue Dec 28 12:24:27 1999
+++ arch/i386/kernel/setup.c Tue Dec 28 05:26:49 1999
@@ -102,7 +102,7 @@
unsigned char table[0];
};
-struct e820map e820 = { 0 };
+struct physical_memory physical_memory = { 0,0 };
unsigned char aux_device_present;
@@ -376,24 +376,44 @@
ret <<= 20;
(*retptr)++;
}
+ else if (**retptr == 'G' || **retptr == 'g') {
+ ret <<= 30;
+ (*retptr)++;
+ }
return ret;
} /* memparse */
-void __init add_memory_region(unsigned long start,
- unsigned long size, int type)
+void __init add_memory_region(__u64 start, __u64 size, __u32 type)
{
- int x = e820.nr_map;
+ int x = physical_memory.nr_region;
if (x == E820MAX) {
- printk("Ooops! Too many entries in the memory map!\n");
- return;
+ printk("Ooops! Too many entries in the memory map!\n");
+ return;
}
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
+#if ! CONFIG_MEM_PERMISSIVE
+ /* HACK: don't allocate anything above 4gb? */
+ if (start+size > 0x100000000ULL)
+ return;
+
+ /* PC high memory (640k->1mg) is not usable, no matter how nicely
+ * the caller asks.
+ */
+ if (start >= 0xA0000 && start < 0x10000)
+ return;
+#endif
+
+ /* don't bother to use non-ram regions or regions that
+ * are less than a page long
+ */
+ if ( type != E820_RAM || PFN_UP(start) >= PFN_DOWN(start+size) )
+ return;
+
+ physical_memory.region[x].start = start;
+ physical_memory.region[x].end = start+size;
+ physical_memory.nr_region++;
} /* add_memory_region */
@@ -406,9 +426,8 @@
void __init setup_memory_region(void)
{
#define E820_DEBUG 1
-#ifdef E820_DEBUG
+
int i;
-#endif
/*
* If we're lucky and live on a modern system, the setup code
@@ -418,22 +437,25 @@
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
+ * regions: one from 0 to 640k, and one from 1mb up.
*/
- if (E820_MAP_NR > 1) {
+ if ( (E820_MAP_NR > 1) && (E820_MAP_NR < E820MAX) ) {
/* got a memory map; copy it into a safe place.
*/
- e820.nr_map = E820_MAP_NR;
- if (e820.nr_map > E820MAX)
- e820.nr_map = E820MAX;
- memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]);
+ physical_memory.nr_bios = E820_MAP_NR;
+ memcpy(&(physical_memory.bios), E820_MAP,
+ E820_MAP_NR * sizeof physical_memory.bios[0]);
+ for (i=0; i < E820_MAP_NR; i++) {
+ add_memory_region(physical_memory.bios[i].addr,
+ physical_memory.bios[i].size,
+ physical_memory.bios[i].type);
#ifdef E820_DEBUG
- for (i=0; i < e820.nr_map; i++) {
- printk("e820: %08x @ %08x ", (int)e820.map[i].size,
- (int)e820.map[i].addr);
- switch (e820.map[i].type) {
+ printk("SMAP: %010Lx - %010Lx ",
+ physical_memory.bios[i].addr,
+ physical_memory.bios[i].addr
+ + physical_memory.bios[i].size);
+
+ switch (physical_memory.bios[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
@@ -445,11 +467,13 @@
case E820_NVS:
printk("(ACPI NVS)\n");
break;
- default: printk("type %lu\n", e820.map[i].type);
+ default:
+ printk("(type %lu)\n",
+ (unsigned long)physical_memory.bios[i].type);
break;
}
- }
#endif
+ }
}
else {
/* otherwise fake a memory map; one section from 0k->640k,
@@ -501,7 +525,7 @@
* and reinitialize it with the
* standard low-memory region.
*/
- e820.nr_map = 0;
+ physical_memory.nr_region = 0;
usermem = 1;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
}
@@ -569,10 +593,6 @@
parse_mem_cmdline(cmdline_p);
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-
/*
* 128MB for vmalloc and initrd
*/
@@ -588,21 +608,16 @@
start_pfn = PFN_UP(__pa(&_end));
/*
- * Find the highest page frame number we have available
- */
- max_pfn = 0;
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long start, end;
- /* RAM? */
- if (e820.map[i].type != E820_RAM)
- continue;
- start = PFN_UP(e820.map[i].addr);
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- if (start >= end)
- continue;
- if (end > max_pfn)
- max_pfn = end;
- }
+ * Find the highest page frame number we have available
+ */
+ max_pfn = 0;
+ for (i = 0; i < physical_memory.nr_region; i++) {
+ unsigned long start, end;
+ start = PFN_UP(physical_memory.region[i].start);
+ end = PFN_DOWN(physical_memory.region[i].end);
+ if ( (start < end) && (end > max_pfn) )
+ max_pfn = end;
+ }
/*
* Determine low and high memory ranges:
@@ -645,23 +660,19 @@
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < physical_memory.nr_region; i++) {
unsigned long curr_pfn, last_pfn, size;
- /*
- * Reserve usable low memory
- */
- if (e820.map[i].type != E820_RAM)
- continue;
+
/*
* We are rounding up the start address of usable memory:
*/
- curr_pfn = PFN_UP(e820.map[i].addr);
+ curr_pfn = PFN_UP(physical_memory.region[i].start);
if (curr_pfn >= max_low_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ last_pfn = PFN_DOWN(physical_memory.region[i].end);
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
@@ -731,30 +742,41 @@
* and also for regions reported as reserved by the e820.
*/
probe_roms();
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < physical_memory.nr_region; i++) {
struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
+ res->start = physical_memory.region[i].start;
+ res->end = physical_memory.region[i].end-1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->name = "System RAM";
request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
+ if (physical_memory.bios[i].type == E820_RAM) {
/*
- * We dont't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
+ * We dont't know which RAM region contains the
+ * kernel data, so we try it repeatedly and let
+ * the resource manager test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
+ }
+ for (i = 0; i < physical_memory.nr_bios; i++) {
+ struct resource *res;
+ if (physical_memory.bios[i].addr + physical_memory.bios[i].size > 0x100000000ULL)
+ continue;
+ if (physical_memory.bios[i].type == E820_RAM)
+ continue;
+
+ res = alloc_bootmem_low(sizeof(struct resource));
+ switch (physical_memory.bios[i].type) {
+ case E820_ACPI: res->name = "ACPI Tables"; break;
+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
+ default: res->name = "reserved";
+ }
+ res->start = physical_memory.bios[i].addr;
+ res->end = res->start + physical_memory.bios[i].size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ request_resource(&iomem_resource, res);
}
request_resource(&iomem_resource, &vram_resource);
--- arch/i386/config.in.orig Tue Dec 28 12:24:26 1999
+++ arch/i386/config.in Tue Dec 28 05:26:49 1999
@@ -54,6 +54,13 @@
define_bool CONFIG_X86_PAE y
fi
+bool 'Use bios call e820 to detect memory' CONFIG_MEM_E820
+if [ "$CONFIG_MEM_E820" = "y" ]; then
+ bool 'Be extra cautious with bios call e820' CONFIG_E820_PARANOID
+ bool 'Allow dubious memory regions (DANGEROUS)' CONFIG_MEM_PERMISSIVE
+fi
+bool 'Use bios call e801 to detect memory' CONFIG_MEM_E801
+
bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
--- arch/i386/defconfig.orig Tue Dec 28 12:24:26 1999
+++ arch/i386/defconfig Tue Dec 28 05:26:49 1999
@@ -25,6 +25,10 @@
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
CONFIG_NOHIGHMEM=y
+CONFIG_MEM_E820=y
+CONFIG_E820_PARANOID=y
+#CONFIG_MEM_PERMISSIVE is not set
+CONFIG_MEM_E801=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
# CONFIG_MATH_EMULATION is not set
--- include/asm-i386/e820.h.orig Thu Nov 18 19:25:28 1999
+++ include/asm-i386/e820.h Tue Dec 28 13:41:37 1999
@@ -5,16 +5,17 @@
* In a nutshell, arch/i386/boot/setup.S populates a scratch table
* in the empty_zero_block that contains a list of usable address/size
* duples. In arch/i386/kernel/setup.c, this information is
- * transferred into the e820map, and in arch/i386/mm/init.c, that
- * new information is used to mark pages reserved or not.
+ * transferred into the bios[], then converted into a list of valid
+ * memory regions in region[], and that new information is used in
+ * arch/i386/mm/init.c to mark pages available or not.
*
*/
#ifndef __E820_HEADER
#define __E820_HEADER
-#define E820MAP 0x2d0 /* our map */
-#define E820MAX 32 /* number of entries in E820MAP */
-#define E820NR 0x1e8 /* # entries in E820MAP */
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 32 /* number of entries in E820MAP */
+#define E820NR 0x1e8 /* # entries in E820MAP */
#define E820_RAM 1
#define E820_RESERVED 2
@@ -23,18 +24,35 @@
#define HIGH_MEMORY (1024*1024)
+/* HACK: These macros map between page numbers and physical addresses.
+ * They used to be in arch/i386/kernel/setup.c, but have been moved here
+ * so that they can also be used in arch/i386/mm/init.c.
+ */
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
+
#ifndef __ASSEMBLY__
-struct e820map {
- int nr_map;
- struct {
- long long addr; /* start of memory segment */
- long long size; /* size of memory segment */
- long type; /* type of memory segment */
- } map[E820MAX];
-};
+struct e820 {
+ __u64 addr; /* addr,size,type are returned by e820 bios call */
+ __u64 size;
+ __u32 type;
+} ;
+
+struct physical_region {
+ __u64 start;
+ __u64 end;
+} ;
+
+struct physical_memory {
+ int nr_bios; /* bios[] holds memory region */
+ int nr_region; /* region[] holds valid mem */
+ struct e820 bios[E820MAX]; /* returned by the bios */
+ struct physical_region region[E820MAX]; /* that we want to allocate */
+} ;
-extern struct e820map e820;
+extern struct physical_memory physical_memory;
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/