[RFC PATCH 02/12] PAT 64b: Basic PAT implementation

From: venkatesh . pallipadi
Date: Thu Dec 13 2007 - 19:04:38 EST


Originally based on a patch from Eric Biederman, but heavily changed.

Forward port of pat-base.patch to x86 tree, with a bug fix.
Code was using 'PCD|PWT' i.e., PAT3 for WC mapping. So set the WC mapping at
correct PAT fields PA3/PA7.

TBD: KEXEC and other CPU offline paths may need pat_shutdown()?

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
Signed-off-by: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
---
Index: linux-2.6/arch/x86/kernel/setup64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup64.c 2007-12-11 03:30:46.000000000 -0800
+++ linux-2.6/arch/x86/kernel/setup64.c 2007-12-11 03:42:08.000000000 -0800
@@ -291,9 +291,11 @@

fpu_init();

+ pat_init();
raw_local_save_flags(kernel_eflags);
}

void cpu_shutdown(void)
{
+ pat_shutdown();
}
Index: linux-2.6/arch/x86/mm/Makefile_64
===================================================================
--- linux-2.6.orig/arch/x86/mm/Makefile_64 2007-12-11 03:30:34.000000000 -0800
+++ linux-2.6/arch/x86/mm/Makefile_64 2007-12-11 03:42:08.000000000 -0800
@@ -2,7 +2,7 @@
# Makefile for the linux x86_64-specific parts of the memory manager.
#

-obj-y := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap_64.o
+obj-y := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap_64.o pat.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NUMA) += numa_64.o
obj-$(CONFIG_K8_NUMA) += k8topology_64.o
Index: linux-2.6/arch/x86/mm/pat.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/arch/x86/mm/pat.c 2007-12-11 04:12:47.000000000 -0800
@@ -0,0 +1,57 @@
+/* Handle caching attributes in page tables (PAT) */
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/gfp.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+#include <asm/processor.h>
+
+static u64 boot_pat_state;
+
+enum {
+ PAT_UC = 0, /* uncached */
+ PAT_WC = 1, /* Write combining */
+ PAT_WT = 4, /* Write Through */
+ PAT_WP = 5, /* Write Protected */
+ PAT_WB = 6, /* Write Back (default) */
+ PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
+};
+
+#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
+
+void __cpuinit pat_init(void)
+{
+ /* Set PWT+PCD to Write-Combining. All other bits stay the same */
+ if (cpu_has_pat) {
+ u64 pat;
+ /* PTE encoding used in Linux:
+ PAT
+ |PCD
+ ||PWT
+ |||
+ 000 WB default
+ 010 UC_MINUS _PAGE_PCD
+ 011 WC _PAGE_WC
+ PAT bit unused */
+ pat = PAT(0,WB) | PAT(1,WT) | PAT(2,UC_MINUS) | PAT(3,WC) |
+ PAT(4,WB) | PAT(5,WT) | PAT(6,UC_MINUS) | PAT(7,WC);
+ rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+ __flush_tlb_all();
+ asm volatile("wbinvd");
+ }
+}
+
+#undef PAT
+
+void pat_shutdown(void)
+{
+ /* Restore CPU default pat state */
+ if (cpu_has_pat) {
+ wrmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+ __flush_tlb_all();
+ asm volatile("wbinvd");
+ }
+}
+
Index: linux-2.6/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/i386.c 2007-12-11 03:30:34.000000000 -0800
+++ linux-2.6/arch/x86/pci/i386.c 2007-12-11 03:42:08.000000000 -0800
@@ -300,8 +300,6 @@
int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine)
{
- unsigned long prot;
-
/* I/O space cannot be accessed via normal processor loads and
* stores on this platform.
*/
@@ -311,14 +309,11 @@
/* Leave vm_pgoff as-is, the PCI space address is the physical
* address on this platform.
*/
- prot = pgprot_val(vma->vm_page_prot);
- if (boot_cpu_data.x86 > 3)
- prot |= _PAGE_PCD | _PAGE_PWT;
- vma->vm_page_prot = __pgprot(prot);
+ if (write_combine)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

- /* Write-combine setting is ignored, it is changed via the mtrr
- * interfaces on this platform.
- */
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
vma->vm_end - vma->vm_start,
vma->vm_page_prot))
Index: linux-2.6/include/asm-x86/cpufeature_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/cpufeature_32.h 2007-12-11 03:30:34.000000000 -0800
+++ linux-2.6/include/asm-x86/cpufeature_32.h 2007-12-11 03:42:08.000000000 -0800
@@ -166,6 +166,8 @@
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)

+#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
+
#endif /* __ASM_I386_CPUFEATURE_H */

/*
Index: linux-2.6/include/asm-x86/msr-index.h
===================================================================
--- linux-2.6.orig/include/asm-x86/msr-index.h 2007-12-11 03:30:34.000000000 -0800
+++ linux-2.6/include/asm-x86/msr-index.h 2007-12-11 03:42:08.000000000 -0800
@@ -63,6 +63,7 @@
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de

+#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
Index: linux-2.6/include/asm-x86/pgtable_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/pgtable_64.h 2007-12-11 03:30:34.000000000 -0800
+++ linux-2.6/include/asm-x86/pgtable_64.h 2007-12-11 03:42:08.000000000 -0800
@@ -164,6 +164,12 @@
#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */
#define _PAGE_GLOBAL 0x100 /* Global TLB entry */

+/* We redefine PWT|PCD to be write combining. PAT bit is not used */
+
+#define _PAGE_WC (_PAGE_PWT|_PAGE_PCD)
+
+#define _PAGE_CACHE_MASK (_PAGE_PWT|_PAGE_PCD)
+
#define _PAGE_PROTNONE 0x080 /* If not present */
#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)

@@ -203,6 +209,7 @@
#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
@@ -299,8 +306,24 @@

/*
* Macro to mark a page protection value as "uncacheable".
+ * Accesses through a uncached translation bypasses the cache
+ * and do not allow for consecutive writes to be combined.
*/
-#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
+#define pgprot_noncached(prot) \
+ __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_MASK) | _PAGE_PCD)
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Accesses through a write-combining translation works bypasses the
+ * caches, but does allow for consecutive writes to be combined into
+ * single (but larger) write transactions.
+ * This is mostly useful for IO accesses, for memory it is often slower.
+ * It also implies uncached.
+ */
+#define pgprot_writecombine(prot) \
+ __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_MASK) | _PAGE_WC)
+
+#define pgprot_nonstd(prot) (pgprot_val(prot) & _PAGE_CACHE_MASK)

static inline int pmd_large(pmd_t pte) {
return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE;
@@ -414,6 +437,7 @@
#define pgtable_cache_init() do { } while (0)
#define check_pgt_cache() do { } while (0)

+/* AGP users use MTRRs for now. Need to add an ioctl to agpgart for WC */
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1

Index: linux-2.6/include/asm-x86/processor_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/processor_64.h 2007-12-11 03:30:46.000000000 -0800
+++ linux-2.6/include/asm-x86/processor_64.h 2007-12-11 03:42:08.000000000 -0800
@@ -105,6 +105,8 @@
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern unsigned short num_cache_leaves;
+extern void pat_init(void);
+extern void pat_shutdown(void);

/*
* Save the cr4 feature set we're using (ie

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/