[tip:x86/mm] x86: Make page cache mode a real type

From: tip-bot for Juergen Gross
Date: Sun Nov 16 2014 - 05:54:53 EST


Commit-ID: 281d4078bec366d60990add9d91a952953bd0d72
Gitweb: http://git.kernel.org/tip/281d4078bec366d60990add9d91a952953bd0d72
Author: Juergen Gross <jgross@xxxxxxxx>
AuthorDate: Mon, 3 Nov 2014 14:01:47 +0100
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Sun, 16 Nov 2014 11:04:24 +0100

x86: Make page cache mode a real type

At the moment there are a lot of places that handle setting or getting
the page cache mode by treating the pgprot bits equal to the cache mode.
This is only true because there are a lot of assumptions about the setup
of the PAT MSR. Otherwise the cache type needs to get translated into
pgprot bits and vice versa.

This patch tries to prepare for that by introducing a separate type
for the cache mode and adding functions to translate between those and
pgprot values.

To avoid too much performance penalty the translation between cache mode
and pgprot values is done via tables which contain the relevant
information. Write-back cache mode is hard-wired to be 0, all other
modes are configurable via those tables. For large pages there are
translation functions as the PAT bit is located at different positions
in the ptes of 4k and large pages.

Based-on-patch-by: Stefan Bader <stefan.bader@xxxxxxxxxxxxx>
Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: stefan.bader@xxxxxxxxxxxxx
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Cc: konrad.wilk@xxxxxxxxxx
Cc: ville.syrjala@xxxxxxxxxxxxxxx
Cc: david.vrabel@xxxxxxxxxx
Cc: jbeulich@xxxxxxxx
Cc: toshi.kani@xxxxxx
Cc: plagnioj@xxxxxxxxxxxx
Cc: tomi.valkeinen@xxxxxx
Cc: bhelgaas@xxxxxxxxxx
Link: http://lkml.kernel.org/r/1415019724-4317-2-git-send-email-jgross@xxxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/include/asm/pgtable_types.h | 73 +++++++++++++++++++++++++++++++++++-
arch/x86/mm/init.c | 29 ++++++++++++++
2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0778964..5124642 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -128,12 +128,34 @@
_PAGE_SOFT_DIRTY | _PAGE_NUMA)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)

-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
#define _PAGE_CACHE_WB (0)
#define _PAGE_CACHE_WC (_PAGE_PWT)
#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)

+/*
+ * The cache modes defined here are used to translate between pure SW usage
+ * and the HW defined cache mode bits and/or PAT entries.
+ *
+ * The resulting bits for PWT, PCD and PAT should be chosen in a way
+ * to have the WB mode at index 0 (all bits clear). This is the default
+ * right now and likely would break too much if changed.
+ */
+#ifndef __ASSEMBLY__
+enum page_cache_mode {
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_UC_MINUS = 2,
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+ _PAGE_CACHE_MODE_NUM = 8
+};
+#endif
+
+#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
@@ -341,6 +363,55 @@ static inline pmdval_t pmdnuma_flags(pmd_t pmd)
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )

+extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
+extern uint8_t __pte2cachemode_tbl[8];
+
+#define __pte2cm_idx(cb) \
+ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \
+ (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \
+ (((cb) >> _PAGE_BIT_PWT) & 1))
+
+static inline unsigned long cachemode2protval(enum page_cache_mode pcm)
+{
+ if (likely(pcm == 0))
+ return 0;
+ return __cachemode2pte_tbl[pcm];
+}
+static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
+{
+ return __pgprot(cachemode2protval(pcm));
+}
+static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
+{
+ unsigned long masked;
+
+ masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
+ if (likely(masked == 0))
+ return 0;
+ return __pte2cachemode_tbl[__pte2cm_idx(masked)];
+}
+static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
+{
+ pgprot_t new;
+ unsigned long val;
+
+ val = pgprot_val(pgprot);
+ pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
+ return new;
+}
+static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
+{
+ pgprot_t new;
+ unsigned long val;
+
+ val = pgprot_val(pgprot);
+ pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT_LARGE) >>
+ (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
+ return new;
+}
+

typedef struct page *pgtable_t;

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 66dba36..a9776ba 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -27,6 +27,35 @@

#include "mm_internal.h"

+/*
+ * Tables translating between page_cache_type_t and pte encoding.
+ * Minimal supported modes are defined statically, modified if more supported
+ * cache modes are available.
+ * Index into __cachemode2pte_tbl is the cachemode.
+ * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
+ * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ */
+uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
+ [_PAGE_CACHE_MODE_WB] = 0,
+ [_PAGE_CACHE_MODE_WC] = _PAGE_PWT,
+ [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD,
+ [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT,
+ [_PAGE_CACHE_MODE_WT] = _PAGE_PCD,
+ [_PAGE_CACHE_MODE_WP] = _PAGE_PCD,
+};
+EXPORT_SYMBOL_GPL(__cachemode2pte_tbl);
+uint8_t __pte2cachemode_tbl[8] = {
+ [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
+ [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
+ [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
+ [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+ [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+ [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
+};
+EXPORT_SYMBOL_GPL(__pte2cachemode_tbl);
+
static unsigned long __initdata pgt_buf_start;
static unsigned long __initdata pgt_buf_end;
static unsigned long __initdata pgt_buf_top;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/