[patch V163 02/51] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy

From: Thomas Gleixner
Date: Mon Dec 18 2017 - 07:12:09 EST


From: Andy Lutomirski <luto@xxxxxxxxxx>

The kernel is very erratic as to which pagetables have _PAGE_USER set. The
vsyscall page gets lucky: it seems that all of the relevant pagetables are
among the apparently arbitrary ones that set _PAGE_USER. Rather than
relying on chance, just explicitly set _PAGE_USER.

This will let us clean up pagetable setup to stop setting _PAGE_USER. The
added code can also be reused by pagetable isolation to manage the
_PAGE_USER bit in the usermode tables.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: David Laight <David.Laight@xxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/entry/vsyscall/vsyscall_64.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)

--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -329,16 +329,47 @@ int in_gate_area_no_mm(unsigned long add
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
}

+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range. Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present. If so, we skip calling
+ * this.
+ */
+static void __init set_vsyscall_pgtable_user_bits(void)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset_k(VSYSCALL_ADDR);
+ pgd->pgd |= _PAGE_USER;
+ p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+ p4d->p4d |= _PAGE_USER;
+#endif
+ pud = pud_offset(p4d, VSYSCALL_ADDR);
+ pud->pud |= _PAGE_USER;
+ pmd = pmd_offset(pud, VSYSCALL_ADDR);
+ pmd->pmd |= _PAGE_USER;
+}
+
void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);

- if (vsyscall_mode != NONE)
+ if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
+ set_vsyscall_pgtable_user_bits();
+ }

BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);