[tip: x86/seves] x86/entry/64: Add entry code for #VC handler

From: tip-bot2 for Joerg Roedel
Date: Thu Sep 10 2020 - 05:32:40 EST


The following commit has been merged into the x86/seves branch of tip:

Commit-ID: a13644f3a53de4e95a7bce6459f834e832ea44c5
Gitweb: https://git.kernel.org/tip/a13644f3a53de4e95a7bce6459f834e832ea44c5
Author: Joerg Roedel <jroedel@xxxxxxx>
AuthorDate: Mon, 07 Sep 2020 15:15:46 +02:00
Committer: Borislav Petkov <bp@xxxxxxx>
CommitterDate: Wed, 09 Sep 2020 11:33:19 +02:00

x86/entry/64: Add entry code for #VC handler

The #VC handler needs special entry code because:

1. It runs on an IST stack

2. It needs to be able to handle nested #VC exceptions

To make this work, the entry code is implemented to pretend it doesn't
use an IST stack. When entered from user-mode or early SYSCALL entry
path it switches to the task stack. If entered from kernel-mode it tries
to switch back to the previous stack in the IRET frame.

The stack found in the IRET frame is validated first, and if it is not
safe to use it for the #VC handler, the code will switch to a
fall-back stack (the #VC2 IST stack). From there, it can cause nested
exceptions again.

Signed-off-by: Joerg Roedel <jroedel@xxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: https://lkml.kernel.org/r/20200907131613.12703-46-joro@xxxxxxxxxx
---
arch/x86/entry/entry_64.S | 80 ++++++++++++++++++++++++++++++++-
arch/x86/include/asm/idtentry.h | 44 ++++++++++++++++++-
arch/x86/include/asm/proto.h | 1 +-
arch/x86/include/asm/traps.h | 1 +-
arch/x86/kernel/traps.c | 45 ++++++++++++++++++-
5 files changed, 171 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea93..15aa189 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -101,6 +101,8 @@ SYM_CODE_START(entry_SYSCALL_64)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp

+SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
@@ -446,6 +448,84 @@ _ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
.endm

+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/**
+ * idtentry_vc - Macro to generate entry stub for #VC
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * The macro emits code to set up the kernel context for #VC. The #VC handler
+ * runs on an IST stack and needs to be able to cause nested #VC exceptions.
+ *
+ * To make this work the #VC entry code tries its best to pretend it doesn't use
+ * an IST stack by switching to the task stack if coming from user-space (which
+ * includes early SYSCALL entry path) or back to the stack in the IRET frame if
+ * entered from kernel-mode.
+ *
+ * If entered from kernel-mode the return stack is validated first, and if it is
+ * not safe to use (e.g. because it points to the entry stack) the #VC handler
+ * will switch to a fall-back stack (VC2) and call a special handler function.
+ *
+ * The macro is only used for one vector, but it is planned to be extended in
+ * the future for the #HV exception.
+ */
+.macro idtentry_vc vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
+
+ /*
+ * If the entry is from userspace, switch stacks and treat it as
+ * a normal entry.
+ */
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_switch_stack_\@
+
+ /*
+ * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
+ * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
+ */
+ call paranoid_entry
+
+ UNWIND_HINT_REGS
+
+ /*
+ * Switch off the IST stack to make it free for nested exceptions. The
+ * vc_switch_off_ist() function will switch back to the interrupted
+ * stack if it is safe to do so. If not it switches to the VC fall-back
+ * stack.
+ */
+ movq %rsp, %rdi /* pt_regs pointer */
+ call vc_switch_off_ist
+ movq %rax, %rsp /* Switch to new stack */
+
+ UNWIND_HINT_REGS
+
+ /* Update pt_regs */
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ call \cfunc
+
+ /*
+ * No need to switch back to the IST stack. The current stack is either
+ * identical to the stack in the IRET frame or the VC fall-back stack,
+ * so it is definitly mapped even with PTI enabled.
+ */
+ jmp paranoid_exit
+
+ /* Switch to the regular task stack */
+.Lfrom_usermode_switch_stack_\@:
+ idtentry_body safe_stack_\cfunc, has_error_code=1
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+#endif
+
/*
* Double fault entry. Straight paranoid. No checks from which context
* this comes because for the espfix induced #DF this would do the wrong
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a433661..840faaf 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -309,6 +309,18 @@ static __always_inline void __##func(struct pt_regs *regs)
__visible void noist_##func(struct pt_regs *regs)

/**
+ * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the
+ * safe_stack C handler.
+ */
+#define DECLARE_IDTENTRY_VC(vector, func) \
+ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
+ __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
+
+/**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
* @func: Function name of the entry point
*
@@ -347,6 +359,35 @@ static __always_inline void __##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_DF(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(func)

+/**
+ * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
+ which runs on a safe stack.
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
+ which runs on the VC fall-back stack
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_IST(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+
#else /* CONFIG_X86_64 */

/**
@@ -433,6 +474,9 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_XENCB(vector, func) \
DECLARE_IDTENTRY(vector, func)

+# define DECLARE_IDTENTRY_VC(vector, func) \
+ idtentry_vc vector asm_##func func
+
#else
# define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func)
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 28996fe..2c35f1c 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -10,6 +10,7 @@ void syscall_init(void);

#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+void entry_SYSCALL_64_safe_stack(void);
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 6a30835..1b86bb3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -15,6 +15,7 @@ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
void __init trap_init(void);
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif

#ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2dc32b0..fc1ed40 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -43,6 +43,7 @@
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
+#include <asm/realmode.h>
#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
@@ -673,6 +674,50 @@ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
return regs;
}

+#ifdef CONFIG_AMD_MEM_ENCRYPT
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
+{
+ unsigned long sp, *stack;
+ struct stack_info info;
+ struct pt_regs *regs_ret;
+
+ /*
+ * In the SYSCALL entry path the RSP value comes from user-space - don't
+ * trust it and switch to the current kernel stack
+ */
+ if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+ regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
+ sp = this_cpu_read(cpu_current_top_of_stack);
+ goto sync;
+ }
+
+ /*
+ * From here on the RSP value is trusted. Now check whether entry
+ * happened from a safe stack. Not safe are the entry or unknown stacks,
+ * use the fall-back stack instead in this case.
+ */
+ sp = regs->sp;
+ stack = (unsigned long *)sp;
+
+ if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+ info.type >= STACK_TYPE_EXCEPTION_LAST)
+ sp = __this_cpu_ist_top_va(VC2);
+
+sync:
+ /*
+ * Found a safe stack - switch to it as if the entry didn't happen via
+ * IST stack. The code below only copies pt_regs, the real switch happens
+ * in assembly code.
+ */
+ sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
+
+ regs_ret = (struct pt_regs *)sp;
+ *regs_ret = *regs;
+
+ return regs_ret;
+}
+#endif
+
struct bad_iret_stack {
void *error_entry_ret;
struct pt_regs regs;