Re: [Bug 11388] New: 2.6.27-rc3 warns about MTRR range; only 3 of 16gb of memory is usable

From: Yinghai Lu
Date: Wed Aug 20 2008 - 21:49:41 EST


On Wed, Aug 20, 2008 at 6:20 PM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
> On Wed, Aug 20, 2008 at 6:04 PM, Andrew Morton
> <akpm@xxxxxxxxxxxxxxxxxxxx> wrote:
>>
>> (switched to email. Please respond via emailed reply-to-all, not via the
>> bugzilla web interface).
>>
>> On Wed, 20 Aug 2008 17:38:59 -0700 (PDT)
>> bugme-daemon@xxxxxxxxxxxxxxxxxxx wrote:
>>
>>> http://bugzilla.kernel.org/show_bug.cgi?id=11388
>>>
>>> Summary: 2.6.27-rc3 warns about MTRR range; only 3 of 16gb of
>>> memory is usable
>>> Product: Memory Management
>>> Version: 2.5
>>> KernelVersion: 2.6.27-rc3
>>> Platform: All
>>> OS/Version: Linux
>>> Tree: Mainline
>>> Status: NEW
>>> Severity: normal
>>> Priority: P1
>>> Component: MTTR
>>> AssignedTo: akpm@xxxxxxxx
>>> ReportedBy: j_kernel@xxxxxxxxxxx
>>>
>>>
>>> Latest working kernel version: 2.4.24.2 (possibly later)
>>> Earliest failing kernel version: 2.6.27-rc3-21328-ga7f5aaf (from netdev-2.6)
>>> Distribution: Gentoo
>>> Hardware Environment: 2x Intel X5482
>>> Software Environment:
>>> Problem Description:
>>>
>>> [ 0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing
>>> 13056MB of RAM.
>>> [ 0.000000] ------------[ cut here ]------------
>>> [ 0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1561
>>> mtrr_trim_uncached_memory+0x508/0x550()
>>> [ 0.000000] Modules linked in:
>>> [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.27-rc3-21328-ga7f5aaf #8
>>> [ 0.000000]
>>> [ 0.000000] Call Trace:
>>> [ 0.000000] [<ffffffff80234c3e>] warn_on_slowpath+0x51/0x77
>>> [ 0.000000] [<ffffffff8023570a>] printk+0x4e/0x56
>>> [ 0.000000] [<ffffffff803add02>] sort+0xfa/0x18c
>>> [ 0.000000] [<ffffffff808283d3>] cmp_range+0x0/0x6
>>> [ 0.000000] [<ffffffff80828a47>] mtrr_trim_uncached_memory+0x508/0x550
>>> [ 0.000000] [<ffffffff802178e1>] post_set+0x20/0x3d
>>> [ 0.000000] [<ffffffff80824f99>] setup_arch+0x39d/0x6be
>>> [ 0.000000] [<ffffffff8081e962>] start_kernel+0x74/0x341
>>> [ 0.000000] [<ffffffff8081e394>] x86_64_start_kernel+0xe3/0xe7
>>> [ 0.000000]
>>> [ 0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>>>
>>>
>>> Steps to reproduce:
>>>
>>> This warning isn't present under 2.6.24.2 and the full range of physical memory
>>> is usable.
>>
>> Looks like a post-2.6.26 regression caused by
>> 12031a624af7816ec7660b82be648aa3703b4ebe.
> reg00: base=0xd0000000 (3328MB), size=196864MB: uncachable, count=1
> reg01: base=0xe0000000 (3584MB), size=197120MB: uncachable, count=1
> reg02: base=0x00000000 ( 0MB), size=212992MB: write-back, count=1
> reg03: base=0x400000000 (16384MB), size=197120MB: write-back, count=1
> reg04: base=0x420000000 (16896MB), size=196864MB: write-back, count=1
>
> the size mtrr looks crazy.

please apply attached patch and boot with show_msr=1 to dump the msr
(including mtrr)

YH
[PATCH] x86_64: printout msr

commandline show_msr=1 for bsp, show_msr=32 for all 32 cpus.

Signed-off-by: Yinghai Lu <yhlu.kernel@xxxxxxxxx>

---
arch/x86/kernel/cpu/common_64.c | 46 ++++++++++++++++++++++++++++++++++++++++
include/asm-x86/msr.h | 23 ++++++++++++++++++++
2 files changed, 69 insertions(+)

Index: linux-2.6/arch/x86/kernel/cpu/common_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/common_64.c
+++ linux-2.6/arch/x86/kernel/cpu/common_64.c
@@ -430,6 +430,49 @@ static __init int setup_noclflush(char *
}
__setup("noclflush", setup_noclflush);

+struct msr_range {
+ unsigned min;
+ unsigned max;
+};
+
+static struct msr_range msr_range_array[] __cpuinitdata = {
+ { 0x00000000, 0x00000418},
+ { 0xc0000000, 0xc000040b},
+ { 0xc0010000, 0xc0010142},
+ { 0xc0011000, 0xc001103b},
+};
+
+static void __cpuinit print_cpu_msr(void)
+{
+ unsigned index;
+ u64 val;
+ int i;
+ unsigned index_min, index_max;
+
+ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+ index_min = msr_range_array[i].min;
+ index_max = msr_range_array[i].max;
+ for (index = index_min; index < index_max; index++) {
+ if (rdmsrl_amd_safe(index, &val))
+ continue;
+ printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+ }
+ }
+}
+
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+ int num;
+
+ get_option(&arg, &num);
+
+ if (num > 0)
+ show_msr = num;
+ return 1;
+}
+__setup("show_msr=", setup_show_msr);
+
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
if (c->x86_model_id[0])
@@ -439,6 +482,9 @@ void __cpuinit print_cpu_info(struct cpu
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
printk(KERN_CONT "\n");
+
+ if (c->cpu_index < show_msr)
+ print_cpu_msr();
}

static __init int setup_disablecpuid(char *arg)
Index: linux-2.6/include/asm-x86/msr.h
===================================================================
--- linux-2.6.orig/include/asm-x86/msr.h
+++ linux-2.6/include/asm-x86/msr.h
@@ -63,6 +63,22 @@ static inline unsigned long long native_
return EAX_EDX_VAL(val, low, high);
}

+static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
+ int *err)
+{
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("2: rdmsr ; xor %0,%0\n"
+ "1:\n\t"
+ ".section .fixup,\"ax\"\n\t"
+ "3: mov %3,%0 ; jmp 1b\n\t"
+ ".previous\n\t"
+ _ASM_EXTABLE(2b, 3b)
+ : "=r" (*err), EAX_EDX_RET(val, low, high)
+ : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
+ return EAX_EDX_VAL(val, low, high);
+}
+
static inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
@@ -158,6 +174,13 @@ static inline int rdmsrl_safe(unsigned m
*p = native_read_msr_safe(msr, &err);
return err;
}
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+ int err;
+
+ *p = native_read_msr_amd_safe(msr, &err);
+ return err;
+}

#define rdtscl(low) \
((low) = (u32)native_read_tsc())