Re: [PATCH 22/43] x86/mm/kaiser: Prepare assembly for entry/exit CR3 switching

From: Borislav Petkov
Date: Sun Nov 26 2017 - 06:51:56 EST


On Fri, Nov 24, 2017 at 06:23:50PM +0100, Ingo Molnar wrote:
> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> index 3fd8bc560fae..e1650da01323 100644
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -1,6 +1,7 @@
> /* SPDX-License-Identifier: GPL-2.0 */
> #include <linux/jump_label.h>
> #include <asm/unwind_hints.h>
> +#include <asm/cpufeatures.h>
>
> /*
>
> @@ -187,6 +188,70 @@ For 32-bit we have the following conventions - kernel is built with
> #endif
> .endm
>
> +#ifdef CONFIG_KAISER
> +
> +/* KAISER PGDs are 8k. Flip bit 12 to switch between the two halves: */
> +#define KAISER_SWITCH_MASK (1<<PAGE_SHIFT)

Btw, entry_64.o doesn't build when at this patch if you force-enable
CONFIG_KAISER by doing

#define CONFIG_KAISER

above it.

arch/x86/entry/entry_64.S: Assembler messages:
arch/x86/entry/entry_64.S:210: Error: invalid operands (*ABS* and *UND* sections) for `<<'
arch/x86/entry/entry_64.S:406: Error: invalid operands (*ABS* and *UND* sections) for `<<'
arch/x86/entry/entry_64.S:743: Error: invalid operands (*ABS* and *UND* sections) for `<<'
arch/x86/entry/entry_64.S:955: Error: invalid operands (*ABS* and *UND* sections) for `<<'
...

due to the missing PAGE_SHIFT definition in asm.

I'm assuming that is resolved later - not that it breaks bisection...

> +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
> + movq %cr3, %r\scratch_reg
> + movq %r\scratch_reg, \save_reg

What happened to making it uniform so that that macro can be invoked
like this:

SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ...

instead of "splitting" the arg?

IOW, hunk below builds here, and asm looks correct:

14bf: 31 db xor %ebx,%ebx
14c1: 0f 20 d8 mov %cr3,%rax
14c4: 49 89 c6 mov %rax,%r14
14c7: 48 a9 00 10 00 00 test $0x1000,%rax
14cd: 74 09 je 14d8 <paranoid_entry+0x78>
14cf: 48 25 ff ef ff ff and $0xffffffffffffefff,%rax
14d5: 0f 22 d8 mov %rax,%cr3

---
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e1650da01323..d528f7060774 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -188,10 +188,12 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm

+#define CONFIG_KAISER
+
#ifdef CONFIG_KAISER

/* KAISER PGDs are 8k. Flip bit 12 to switch between the two halves: */
-#define KAISER_SWITCH_MASK (1<<PAGE_SHIFT)
+#define KAISER_SWITCH_MASK (1<<12)

.macro ADJUST_KERNEL_CR3 reg:req
/* Clear "KAISER bit", point CR3 at kernel pagetables: */
@@ -216,17 +218,17 @@ For 32-bit we have the following conventions - kernel is built with
.endm

.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
- movq %cr3, %r\scratch_reg
- movq %r\scratch_reg, \save_reg
+ movq %cr3, \scratch_reg
+ movq \scratch_reg, \save_reg
/*
* Is the switch bit zero? This means the address is
* up in real KAISER patches in a moment.
*/
- testq $(KAISER_SWITCH_MASK), %r\scratch_reg
+ testq $(KAISER_SWITCH_MASK), \scratch_reg
jz .Ldone_\@

- ADJUST_KERNEL_CR3 %r\scratch_reg
- movq %r\scratch_reg, %cr3
+ ADJUST_KERNEL_CR3 \scratch_reg
+ movq \scratch_reg, %cr3

.Ldone_\@:
.endm
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4ac952080869..5a15d0852b2f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1256,7 +1256,7 @@ ENTRY(paranoid_entry)
xorl %ebx, %ebx

1:
- SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=ax save_reg=%r14
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14

ret
END(paranoid_entry)

> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 34e3110b0876..4ac952080869 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -168,6 +168,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
> /* Stash the user RSP. */
> movq %rsp, RSP_SCRATCH
>
> + /* Note: using %rsp as a scratch reg. */

Haha, yap, it just got freed :)

> + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
> +
> /* Load the top of the task stack into RSP */
> movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
>
> @@ -198,6 +201,13 @@ ENTRY(entry_SYSCALL_64)
>
> swapgs
> movq %rsp, PER_CPU_VAR(rsp_scratch)

<---- newline here.

> + /*
> + * The kernel CR3 is needed to map the process stack, but we
> + * need a scratch register to be able to load CR3. %rsp is
> + * clobberable right now, so use it as a scratch register.
> + * %rsp will be look crazy here for a couple instructions.

s/be // or "will be looking crazy" :-)

> + */
> + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp

Now, this is questionable: we did enter through the trampoline
entry_SYSCALL_64_trampoline so theoretically, we wouldn't need to switch
to CR3 here again because, well, we did already.

I.e., entry_SYSCALL_64 is not going to be called anymore. Unless we will
jump to it when we decide to jump over the trampolines in the kaiser
disabled case. Just pointing it out here so that we don't forget to deal
with this...

> @@ -1239,7 +1254,11 @@ ENTRY(paranoid_entry)
> js 1f /* negative -> in kernel */
> SWAPGS
> xorl %ebx, %ebx
> -1: ret
> +
> +1:
> + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=ax save_reg=%r14
> +
> + ret
> END(paranoid_entry)
>
> /*
> @@ -1261,6 +1280,7 @@ ENTRY(paranoid_exit)
> testl %ebx, %ebx /* swapgs needed? */
> jnz .Lparanoid_exit_no_swapgs
> TRACE_IRQS_IRETQ
> + RESTORE_CR3 %r14

RESTORE_CR3 save_reg=%r14

like the other invocation below.

But if the runtime disable gets changed to a boottime one, you don't
need that macro anymore.

> SWAPGS_UNSAFE_STACK
> jmp .Lparanoid_exit_restore
> .Lparanoid_exit_no_swapgs:

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.