[PATCH 7/8] x86/mce: Recover from poison found while copying from user space

From: Tony Luck
Date: Tue Sep 08 2020 - 13:56:01 EST


From: Youquan Song <youquan.song@xxxxxxxxx>

Existing kernel code can only recover from a machine check on code that
tagged in the exception table with a fault handling recovery path.

New field in the task structure mce_vaddr is initialized to the
user virtual address of the fault. This is so that kill_me_maybe()
can provide that information to the user SIGBUS handler.

Add code to recover from a machine check while copying data from user
space to the kernel. Action for this case is the same as if the user
touched the poison directly; unmap the page and send a SIGBUS to the task.

Signed-off-by: Youquan Song <youquan.song@xxxxxxxxx>
Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/kernel/cpu/mce/core.c | 51 ++++++++++++++++++++++++++++++++++
include/linux/sched.h | 1 +
2 files changed, 52 insertions(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5512318a07ae..2a3c42329c3f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -53,6 +53,8 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/reboot.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>

#include "internal.h"

@@ -1197,6 +1199,32 @@ static void kill_me_maybe(struct callback_head *cb)
kill_me_now(cb);
}

+/*
+ * Decode a kernel instruction that faulted while reading from a user
+ * address and return the linear address that was being read.
+ */
+static void __user *get_virtual_address(struct pt_regs *regs)
+{
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+
+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return (void __user *)~0ul;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_length(&insn);
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+
+ /*
+ * For MOVS[BWLQ] the source address is in %rsi
+ */
+ if (insn.opcode.value == 0xa4 || insn.opcode.value == 0xa5)
+ return (void __user *)regs->si;
+ else
+ return insn_get_addr_ref(&insn, regs);
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1342,6 +1370,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));

+ current->mce_vaddr = NULL;
current->mce_addr = m.addr;
current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(&m);
@@ -1350,6 +1379,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
} else {
+ /*
+ * Before fixing the exception IP, find the user address
+ * in the MCE_IN_KERNEL_COPYIN case
+ */
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ current->mce_vaddr = get_virtual_address(regs);
+
/*
* Handle an MCE which has happened in kernel space but from
* which the kernel can recover: ex_has_fault_handler() has
@@ -1363,6 +1399,21 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ /*
+ * MCE on user data while copying to kernel. Action here is
+ * very similar to the user hitting the poison themself.
+ * Poison page will be unmapped and signal sent to process.
+ */
+ if (m.kflags & MCE_IN_KERNEL_COPYIN) {
+ current->mce_addr = m.addr;
+ current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(&m);
+ current->mce_kill_me.func = kill_me_maybe;
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ task_work_add(current, &current->mce_kill_me, true);
+ }
}
}
EXPORT_SYMBOL_GPL(do_machine_check);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..fe384c097ce3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,7 @@ struct task_struct {
#endif

#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
--
2.21.1