[RFC][PATCH v2 06/21] x86/pti: Provide C variants of PTI switch CR3 macros

From: Alexandre Chartre
Date: Mon Nov 16 2020 - 09:49:25 EST


Page Table Isolation (PTI) use assembly macros to switch the CR3
register between kernel and user page-tables. Add C functions which
implement the same features. For now, these C functions are not
used but they will eventually replace using the assembly macros.

Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx>
---
arch/x86/include/asm/entry-common.h | 127 ++++++++++++++++++++++++++++
1 file changed, 127 insertions(+)

diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 6fe54b2813c1..46682b1433a4 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -7,6 +7,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
+#include <asm/tlbflush.h>

/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
@@ -81,4 +82,130 @@ static __always_inline void arch_exit_to_user_mode(void)
}
#define arch_exit_to_user_mode arch_exit_to_user_mode

+#ifndef MODULE
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK \
+ (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
+
+static __always_inline void write_kernel_cr3(unsigned long cr3)
+{
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr3 |= X86_CR3_PCID_NOFLUSH;
+
+ native_write_cr3(cr3);
+}
+
+static __always_inline void write_user_cr3(unsigned long cr3)
+{
+ unsigned short mask;
+ unsigned long asid;
+
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * Test if the ASID needs a flush.
+ */
+ asid = cr3 & 0x7ff;
+ mask = this_cpu_read(cpu_tlbstate.user_pcid_flush_mask);
+ if (mask & (1 << asid)) {
+ /* Flush needed, clear the bit */
+ this_cpu_and(cpu_tlbstate.user_pcid_flush_mask,
+ ~(1 << asid));
+ } else {
+ cr3 |= X86_CR3_PCID_NOFLUSH;
+ }
+ }
+
+ native_write_cr3(cr3);
+}
+
+static __always_inline void switch_to_kernel_cr3(unsigned long cr3)
+{
+ /*
+ * Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3
+ * at kernel pagetables.
+ */
+ write_kernel_cr3(cr3 & ~PTI_USER_PGTABLE_AND_PCID_MASK);
+}
+
+static __always_inline void switch_to_user_cr3(unsigned long cr3)
+{
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ /* Flip the ASID to the user version */
+ cr3 |= PTI_USER_PCID_MASK;
+ }
+
+ /* Flip the PGD to the user version */
+ write_user_cr3(cr3 | PTI_USER_PGTABLE_MASK);
+}
+
+static __always_inline unsigned long save_and_switch_to_kernel_cr3(void)
+{
+ unsigned long cr3;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return 0;
+
+ cr3 = __native_read_cr3();
+ if (cr3 & PTI_USER_PGTABLE_MASK)
+ switch_to_kernel_cr3(cr3);
+
+ return cr3;
+}
+
+static __always_inline void restore_cr3(unsigned long cr3)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ if (cr3 & PTI_USER_PGTABLE_MASK) {
+ switch_to_user_cr3(cr3);
+ } else {
+ /*
+ * The CR3 write could be avoided when not changing
+ * its value, but would require a CR3 read.
+ */
+ write_kernel_cr3(cr3);
+ }
+}
+
+static __always_inline void user_pagetable_enter(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ switch_to_user_cr3(__native_read_cr3());
+}
+
+static __always_inline void user_pagetable_exit(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ switch_to_kernel_cr3(__native_read_cr3());
+}
+
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION */
+
+static __always_inline unsigned long save_and_switch_to_kernel_cr3(void)
+{
+ return 0;
+}
+static __always_inline void restore_cr3(unsigned long cr3) {}
+
+static __always_inline void user_pagetable_enter(void) {};
+static __always_inline void user_pagetable_exit(void) {};
+
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* MODULE */
+
#endif
--
2.18.4