[PATCH 3/4] mce/copyin: fix to not SIGBUS when copying from user hits poison

From: Tony Luck
Date: Thu Mar 25 2021 - 20:03:26 EST


Andy Lutomirski pointed out that sending SIGBUS to tasks that
hit poison in the kernel copying syscall parameters from user
address space is not the right semantic.

So stop doing that. Add a new kill_me_never() call back that
simply unmaps and offlines the poison page.

current-mce_vaddr is no longer used, so drop this field

---

Needs to be combined with other patches for bisectability
---
arch/x86/kernel/cpu/mce/core.c | 35 ++++++++++++++++--------------
arch/x86/kernel/cpu/mce/severity.c | 2 --
include/linux/sched.h | 1 -
3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7962355436da..1570310cadab 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1263,32 +1263,32 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;

- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
- !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}

- if (p->mce_vaddr != (void __user *)-1l) {
- force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
- } else {
- pr_err("Memory error not recovered");
- kill_me_now(cb);
- }
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
}

-static void queue_task_work(struct mce *m, int kill_current_task)
+static void kill_me_never(struct callback_head *cb)
+{
+ struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+
+ pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+ set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+}
+
+static void queue_task_work(struct mce *m, void (*func)(struct callback_head *))
{
current->mce_addr = m->addr;
current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m);
-
- if (kill_current_task)
- current->mce_kill_me.func = kill_me_now;
- else
- current->mce_kill_me.func = kill_me_maybe;
+ current->mce_kill_me.func = func;

task_work_add(current, &current->mce_kill_me, TWA_RESUME);
}
@@ -1426,7 +1426,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));

- queue_task_work(&m, kill_current_task);
+ if (kill_current_task)
+ queue_task_work(&m, kill_me_now);
+ else
+ queue_task_work(&m, kill_me_maybe);

} else {
/*
@@ -1444,7 +1447,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
}

if (m.kflags & MCE_IN_KERNEL_COPYIN)
- queue_task_work(&m, kill_current_task);
+ queue_task_work(&m, kill_me_never);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 83df991314c5..47810d12f040 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -251,8 +251,6 @@ static bool is_copy_from_user(struct pt_regs *regs)
if (fault_in_kernel_space(addr))
return false;

- current->mce_vaddr = (void __user *)addr;
-
return true;
}

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..2d213b52730c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1358,7 +1358,6 @@ struct task_struct {
#endif

#ifdef CONFIG_X86_MCE
- void __user *mce_vaddr;
__u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
--
2.29.2