[patch V149 23/50] x86/mm/pti: Add mapping helper functions

From: Thomas Gleixner
Date: Sat Dec 16 2017 - 16:44:42 EST


From: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

Add the pagetable helper functions do manage the separate user space page
tables.

[ tglx: Split out from the big combo kaiser patch. Folded Andys
simplification ]

Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: David Laight <David.Laight@xxxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Eduardo Valentin <eduval@xxxxxxxxxx>
Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Will Deacon <will.deacon@xxxxxxx>
Cc: aliguori@xxxxxxxxxx
Cc: daniel.gruss@xxxxxxxxxxxxxx
Cc: hughd@xxxxxxxxxx
Cc: keescook@xxxxxxxxxx
---
arch/x86/include/asm/pgtable_64.h | 123 ++++++++++++++++++++++++++++++++++++++
1 file changed, 123 insertions(+)

--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,128 @@ static inline pud_t native_pudp_get_and_
#endif
}

+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
+ * the user one is in the last 4k. To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr |= BIT(bit);
+ return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr &= ~BIT(bit);
+ return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+ return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+ return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+ return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+ return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned. The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+ unsigned long ptr = (unsigned long)__ptr;
+
+ return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_PTI))
+ return pgd;
+
+ if (pgdp_maps_userspace(pgdp)) {
+ /*
+ * The user page tables get the full PGD,
+ * accessible from userspace:
+ */
+ kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+ /*
+ * If this is normal user memory, make it NX in the kernel
+ * pagetables so that, if we somehow screw up and return to
+ * usermode with the kernel CR3 loaded, we'll get a page
+ * fault instead of allowing user code to execute with
+ * the wrong CR3.
+ *
+ * As exceptions, we don't set NX if:
+ * - this is EFI or similar, the kernel may execute from it
+ * - we don't have NX support
+ * - we're clearing the PGD (i.e. pgd.pgd == 0).
+ */
+ if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+ pgd.pgd |= _PAGE_NX;
+ } else {
+ /*
+ * Changes to the high (kernel) portion of the kernelmode
+ * page tables are not automatically propagated to the
+ * usermode tables.
+ *
+ * Users should keep in mind that, unlike the kernelmode
+ * tables, there is no vmalloc_fault equivalent for the
+ * usermode tables. Top-level entries added to init_mm's
+ * usermode pgd after boot will not be automatically
+ * propagated to other mms.
+ */
+ }
+#endif
+
+ /* return the copy of the PGD we want the kernel to use: */
+ return pgd;
+}
+
+
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+ p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
*p4dp = p4d;
+#endif
}

static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +266,11 @@ static inline void native_p4d_clear(p4d_

static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
*pgdp = pgd;
+#endif
}

static inline void native_pgd_clear(pgd_t *pgd)