[RFC] x86 mtrr handling

From: Jan Beulich
Date: Fri Nov 12 2004 - 10:15:44 EST


Appearantly I am not the first one to observe and be puzzled about log
messages pointing to inconsistent MTRR setup of the video frame buffer;
a patch to address this at the consumer end of the MTRR code (in vesafb)
was posted by Kurt Garloff a while ago, but appearantly wasn't applied,
yet.

Regardless of whether that patch would be applied, and regardless of
considering it correct, I believe that it only hides a shortcoming in
the MTRR code itself, in that that code is not symmetric with respect to
the ordering of attempts to set up two (or more) regions where one
contains the other. In the current shape, it permits only setting up
sub-regions of pre-exisiting ones. The patch below makes this
symmetric.

While working on that I noticed a few more inconsistencies in that
code, namely
- use of 'unsigned int' for sizes in many, but not all places (the
patch is converting this to use 'unsigned long' everywhere, which
specifically might be necessary for x86-64 once a processor supporting
more than 44 physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly compute value would never get
used to be stored in the respective MSR
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other

Thanks, Jan

diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/amd.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/amd.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/amd.c 2004-10-18
23:54:29.000000000 +0200
+++ linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/amd.c 2004-11-12
15:41:38.213655912 +0100
@@ -7,7 +7,7 @@

static void
amd_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
+ unsigned long *size, mtrr_type * type)
{
unsigned long low, high;

diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/centaur.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/centaur.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/centaur.c 2004-10-18
23:53:45.000000000 +0200
+++
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/centaur.c 2004-11-12
15:41:38.229653480 +0100
@@ -17,7 +17,7 @@ static u8 centaur_mcr_type; /* 0 for win
*/

static int
-centaur_get_free_region(unsigned long base, unsigned long size)
+centaur_get_free_region(unsigned long base, unsigned long size, int
replace_reg)
/* [SUMMARY] Get a free MTRR.
<base> The starting (base) address of the region.
<size> The size (in bytes) of the region.
@@ -26,10 +26,11 @@ centaur_get_free_region(unsigned long ba
{
int i, max;
mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
+ unsigned long lbase, lsize;

max = num_var_ranges;
+ if (replace_reg >= 0 && replace_reg < max)
+ return replace_reg;
for (i = 0; i < max; ++i) {
if (centaur_mcr_reserved & (1 << i))
continue;
@@ -49,7 +50,7 @@ mtrr_centaur_report_mcr(int mcr, u32 lo,

static void
centaur_get_mcr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
+ unsigned long *size, mtrr_type * type)
{
*base = centaur_mcr[reg].high >> PAGE_SHIFT;
*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/cyrix.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/cyrix.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/cyrix.c 2004-10-18
23:54:31.000000000 +0200
+++ linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/cyrix.c 2004-11-12
15:41:38.232653024 +0100
@@ -9,7 +9,7 @@ int arr3_protected;

static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
+ unsigned long *size, mtrr_type * type)
{
unsigned long flags;
unsigned char arr, ccr3, rcr, shift;
@@ -77,7 +77,7 @@ cyrix_get_arr(unsigned int reg, unsigned
}

static int
-cyrix_get_free_region(unsigned long base, unsigned long size)
+cyrix_get_free_region(unsigned long base, unsigned long size, int
replace_reg)
/* [SUMMARY] Get a free ARR.
<base> The starting (base) address of the region.
<size> The size (in bytes) of the region.
@@ -86,9 +86,24 @@ cyrix_get_free_region(unsigned long base
{
int i;
mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
+ unsigned long lbase, lsize;

+ switch (replace_reg) {
+ case 7:
+ if (size < 0x40)
+ break;
+ case 6:
+ case 5:
+ case 4:
+ return replace_reg;
+ case 3:
+ if (arr3_protected)
+ break;
+ case 2:
+ case 1:
+ case 0:
+ return replace_reg;
+ }
/* If we are to set up a region >32M then look at ARR7
immediately */
if (size > 0x2000) {
cyrix_get_arr(7, &lbase, &lsize, &ltype);
diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/generic.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/generic.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/generic.c 2004-10-18
23:54:32.000000000 +0200
+++
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/generic.c 2004-11-12
15:41:38.235652568 +0100
@@ -94,7 +94,7 @@ void __init mtrr_state_warn(void)
}


-int generic_get_free_region(unsigned long base, unsigned long size)
+int generic_get_free_region(unsigned long base, unsigned long size,
int replace_reg)
/* [SUMMARY] Get a free MTRR.
<base> The starting (base) address of the region.
<size> The size (in bytes) of the region.
@@ -103,10 +103,11 @@ int generic_get_free_region(unsigned lon
{
int i, max;
mtrr_type ltype;
- unsigned long lbase;
- unsigned lsize;
+ unsigned long lbase, lsize;

max = num_var_ranges;
+ if (replace_reg >= 0 && replace_reg < max)
+ return replace_reg;
for (i = 0; i < max; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
if (lsize == 0)
@@ -116,7 +117,7 @@ int generic_get_free_region(unsigned lon
}

void generic_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
+ unsigned long *size, mtrr_type * type)
{
unsigned int mask_lo, mask_hi, base_lo, base_hi;

@@ -199,7 +200,9 @@ static int set_mtrr_var_ranges(unsigned
return changed;
}

-static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
+static u32 deftype_lo, deftype_hi;
+
+static unsigned long set_mtrr_state(void)
/* [SUMMARY] Set the MTRR state for this CPU.
<state> The MTRR state information to read.
<ctxt> Some relevant CPU context.
@@ -221,7 +224,7 @@ static unsigned long set_mtrr_state(u32
so to set it we fiddle with the saved value */
if ((deftype_lo & 0xff) != mtrr_state.def_type
|| ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
- deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled
<< 10);
+ deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type
| (mtrr_state.enabled << 10);
change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
}

@@ -230,7 +233,6 @@ static unsigned long set_mtrr_state(u32


static unsigned long cr4 = 0;
-static u32 deftype_lo, deftype_hi;
static spinlock_t set_atomicity_lock = SPIN_LOCK_UNLOCKED;

static void prepare_set(void)
@@ -261,7 +263,7 @@ static void prepare_set(void)
rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);

/* Disable MTRRs, and set the default type to uncached */
- wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
+ wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
}

static void post_set(void)
@@ -289,7 +291,7 @@ static void generic_set_all(void)
prepare_set();

/* Actually set the state */
- mask = set_mtrr_state(deftype_lo,deftype_hi);
+ mask = set_mtrr_state();

post_set();

@@ -351,7 +353,7 @@ int generic_validate_add_page(unsigned l
}
}

- if (base + size < 0x100) {
+ if (base < 0x100) {
printk(KERN_WARNING "mtrr: cannot set region below 1 MiB
(0x%lx000,0x%lx000)\n",
base, size);
return -EINVAL;
diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/if.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/if.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/if.c 2004-10-18
23:53:43.000000000 +0200
+++ linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/if.c 2004-11-12
15:41:38.239651960 +0100
@@ -151,6 +151,7 @@ mtrr_ioctl(struct inode *inode, struct f
{
int err;
mtrr_type type;
+ unsigned long size;
struct mtrr_sentry sentry;
struct mtrr_gentry gentry;
void __user *arg = (void __user *) __arg;
@@ -201,15 +202,15 @@ mtrr_ioctl(struct inode *inode, struct f
return -EFAULT;
if (gentry.regnum >= num_var_ranges)
return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size,
&type);
+ mtrr_if->get(gentry.regnum, &gentry.base, &size,
&type);

/* Hide entries that go above 4GB */
- if (gentry.base + gentry.size > 0x100000
- || gentry.size == 0x100000)
+ if (gentry.base + size - 1 >= (1UL << (8 *
sizeof(gentry.size) - PAGE_SHIFT))
+ || size >= (1UL << (8 * sizeof(gentry.size) -
PAGE_SHIFT)))
gentry.base = gentry.size = gentry.type = 0;
else {
gentry.base <<= PAGE_SHIFT;
- gentry.size <<= PAGE_SHIFT;
+ gentry.size = size << PAGE_SHIFT;
gentry.type = type;
}

@@ -259,8 +260,14 @@ mtrr_ioctl(struct inode *inode, struct f
return -EFAULT;
if (gentry.regnum >= num_var_ranges)
return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size,
&type);
- gentry.type = type;
+ mtrr_if->get(gentry.regnum, &gentry.base, &size,
&type);
+ /* Hide entries that would overflow */
+ if (size != (__typeof__(gentry.size))size)
+ gentry.base = gentry.size = gentry.type = 0;
+ else {
+ gentry.size = size;
+ gentry.type = type;
+ }

if (copy_to_user(arg, &gentry, sizeof gentry))
return -EFAULT;
@@ -319,8 +326,7 @@ static int mtrr_seq_show(struct seq_file
char factor;
int i, max, len;
mtrr_type type;
- unsigned long base;
- unsigned int size;
+ unsigned long base, size;

len = 0;
max = num_var_ranges;
@@ -339,7 +345,7 @@ static int mtrr_seq_show(struct seq_file
}
/* RED-PEN: base can be > 32bit */
len += seq_printf(seq,
- "reg%02i: base=0x%05lx000 (%4liMB),
size=%4i%cB: %s, count=%d\n",
+ "reg%02i: base=0x%05lx000 (%4luMB),
size=%4lu%cB: %s, count=%d\n",
i, base, base >> (20 - PAGE_SHIFT), size,
factor,
mtrr_attrib_to_str(type), usage_table[i]);
}
diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/main.c
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/main.c
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/main.c 2004-10-18
23:53:08.000000000 +0200
+++ linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/main.c 2004-11-12
15:41:38.250650288 +0100
@@ -170,6 +170,13 @@ static void ipi_handler(void *info)

#endif

+static inline int types_compatible(mtrr_type type1, mtrr_type type2)
{
+ return type1 == MTRR_TYPE_UNCACHABLE ||
+ type2 == MTRR_TYPE_UNCACHABLE ||
+ (type1 == MTRR_TYPE_WRTHROUGH && type2 ==
MTRR_TYPE_WRBACK) ||
+ (type1 == MTRR_TYPE_WRBACK && type2 ==
MTRR_TYPE_WRTHROUGH);
+}
+
/**
* set_mtrr - update mtrrs on all processors
* @reg: mtrr in question
@@ -305,11 +312,9 @@ static void set_mtrr(unsigned int reg, u
int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, char increment)
{
- int i;
+ int i, replace, error;
mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
- int error;
+ unsigned long lbase, lsize;

if (!mtrr_if)
return -ENXIO;
@@ -329,32 +334,45 @@ int mtrr_add_page(unsigned long base, un
return -ENOSYS;
}

+ if (!size) {
+ printk(KERN_WARNING "mtrr: zero sized request\n");
+ return -EINVAL;
+ }
+
if (base & size_or_mask || size & size_or_mask) {
printk(KERN_WARNING "mtrr: base or size exceeds the MTRR
width\n");
return -EINVAL;
}

error = -EINVAL;
+ replace = -1;

/* Search for existing MTRR */
down(&main_lock);
for (i = 0; i < num_var_ranges; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (base >= lbase + lsize)
- continue;
- if ((base < lbase) && (base + size <= lbase))
+ if (!lsize || base > lbase + lsize - 1 || base + size -
1 < lbase)
continue;
/* At this point we know there is some kind of
overlap/enclosure */
- if ((base < lbase) || (base + size > lbase + lsize)) {
+ if (base < lbase || base + size - 1 > lbase + lsize - 1)
{
+ if (base <= lbase && base + size - 1 >= lbase +
lsize - 1) {
+ /* New region encloses an existing
region */
+ if (type == ltype) {
+ replace = replace == -1 ? i :
-2;
+ continue;
+ }
+ else if (types_compatible(type, ltype))
+ continue;
+ }
printk(KERN_WARNING
"mtrr: 0x%lx000,0x%lx000 overlaps
existing"
- " 0x%lx000,0x%x000\n", base, size,
lbase,
+ " 0x%lx000,0x%lx000\n", base, size,
lbase,
lsize);
goto out;
}
/* New region is enclosed by an existing region */
if (ltype != type) {
- if (type == MTRR_TYPE_UNCACHABLE)
+ if (types_compatible(type, ltype))
continue;
printk (KERN_WARNING "mtrr: type mismatch for
%lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
@@ -367,10 +385,18 @@ int mtrr_add_page(unsigned long base, un
goto out;
}
/* Search for an empty MTRR */
- i = mtrr_if->get_free_region(base, size);
+ i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
set_mtrr(i, base, size, type);
- usage_table[i] = 1;
+ if (likely(replace < 0))
+ usage_table[i] = 1;
+ else {
+ usage_table[i] = usage_table[replace] +
!!increment;
+ if (unlikely(replace != i)) {
+ set_mtrr(replace, 0, 0, 0);
+ usage_table[replace] = 0;
+ }
+ }
} else
printk(KERN_INFO "mtrr: no more MTRRs available\n");
error = i;
@@ -447,8 +473,7 @@ int mtrr_del_page(int reg, unsigned long
{
int i, max;
mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
+ unsigned long lbase, lsize;
int error = -EINVAL;

if (!mtrr_if)
@@ -559,7 +584,7 @@ static void __init init_other_cpus(void)
struct mtrr_value {
mtrr_type ltype;
unsigned long lbase;
- unsigned int lsize;
+ unsigned long lsize;
};

static struct mtrr_value * mtrr_state;
diff -apru linux-2.6.9/arch/i386/kernel/cpu/mtrr/mtrr.h
linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/mtrr.h
--- linux-2.6.9/arch/i386/kernel/cpu/mtrr/mtrr.h 2004-10-18
23:55:36.000000000 +0200
+++ linux-2.6.9-mtrr/arch/i386/kernel/cpu/mtrr/mtrr.h 2004-11-12
15:45:12.198125328 +0100
@@ -43,15 +43,16 @@ struct mtrr_ops {
void (*set_all)(void);

void (*get)(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type);
- int (*get_free_region) (unsigned long base, unsigned long
size);
-
+ unsigned long *size, mtrr_type * type);
+ int (*get_free_region)(unsigned long base, unsigned long
size,
+ int replace_reg);
int (*validate_add_page)(unsigned long base, unsigned long
size,
unsigned int type);
int (*have_wrcomb)(void);
};

-extern int generic_get_free_region(unsigned long base, unsigned long
size);
+extern int generic_get_free_region(unsigned long base, unsigned long
size,
+ int replace_reg);
extern int generic_validate_add_page(unsigned long base, unsigned long
size,
unsigned int type);


Attachment: linux-2.6.9-x86-mtrr.patch
Description: Binary data