Re: [RFC PATCH -next] arm64: ras: add uce kernel recovery support

From: Shuai Xue
Date: Thu Jun 16 2022 - 22:20:21 EST


Hi, Tiangen,

在 2022/3/23 AM11:37, Tong Tiangen 写道:
> uce means uncorrectable memory error.
>
> In the sea fault handling process of the kernel, if it is judged that the
> RAS error is consumed in the kernel, the current processing is kernel panic
> . However, it is not optimal. In some case, the page accessed in kernel is
> a user page (such as copy_from_user/get_user), in this case, kill the user
> process and isolate the user page with hardware error is a better choice.
>
> This feature provides the option. We implement it to add new extable type
> (XXX_UCE_RECOVERY), these new types indicates they can be fixup from
> uncorrectable memory error and it is fixuped in do_sea(). If the exception
> is fixuped correctly, the kernel can avoid panic.
>
> In copy_from_user, record the exception reason to regs->regs[0] at
> ex_handler_fixup_uce_recovery() which is used to check sea fault triggered.
>
> In get_user, the processing of EX_TYPE_UACCESS_ERR_ZERO and
> EX_TYPE_UACCESS_ERR_ZERO_UCE_RECOVERY is same and both return -EFAULT.
>
> Signed-off-by: Tong Tiangen <tongtiangen@xxxxxxxxxx>
> ---
> arch/arm64/Kconfig | 10 ++++++++
> arch/arm64/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++----
> arch/arm64/include/asm/asm-uaccess.h | 16 +++++++++++++
> arch/arm64/include/asm/esr.h | 5 ++++
> arch/arm64/include/asm/extable.h | 2 +-
> arch/arm64/include/asm/uaccess.h | 2 +-
> arch/arm64/kernel/probes/kprobes.c | 2 +-
> arch/arm64/lib/copy_from_user.S | 11 +++++----
> arch/arm64/mm/extable.c | 21 ++++++++++++++++-
> arch/arm64/mm/fault.c | 30 +++++++++++++++++++++++-
> 10 files changed, 119 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 962c84952c98..39f828c5b931 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1681,6 +1681,16 @@ config ARM64_CNP
> at runtime, and does not affect PEs that do not implement
> this feature.
>
> +config ARM64_UCE_KERNEL_RECOVERY
> + bool "Enable support for uncorrectable memory error(uce) kernel recovery"
> + default y
> + depends on ACPI_APEI_SEA
> + help
> + With ARM v8.2 RAS Extension, SEA are usually triggered when memory
> + error are consumed. In some cases, if the error address is in a
> + user page there is a chance to recover. we can isolate this page
> + and killing process instead of die.
> +
> endmenu
>
> menu "ARMv8.3 architectural features"
> diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
> index c39f2437e08e..9debab58c2b2 100644
> --- a/arch/arm64/include/asm/asm-extable.h
> +++ b/arch/arm64/include/asm/asm-extable.h
> @@ -2,11 +2,19 @@
> #ifndef __ASM_ASM_EXTABLE_H
> #define __ASM_ASM_EXTABLE_H
>
> -#define EX_TYPE_NONE 0
> -#define EX_TYPE_FIXUP 1
> -#define EX_TYPE_BPF 2
> -#define EX_TYPE_UACCESS_ERR_ZERO 3
> -#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
> +#define EX_TYPE_NONE 0
> +#define EX_TYPE_FIXUP 1
> +#define EX_TYPE_BPF 2
> +#define EX_TYPE_UACCESS_ERR_ZERO 3
> +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
> +
> +/* _UCE_RECOVERY indicates that can fixup from unrecoverable memory errors */
> +#define EX_TYPE_FIXUP_UCE_RECOVERY 5
> +#define EX_TYPE_UACCESS_ERR_ZERO_UCE_RECOVERY 6
> +
> +#define IS_EX_TYPE_UCE_RECOVERY(type) \
> + (type == EX_TYPE_FIXUP_UCE_RECOVERY || \
> + type == EX_TYPE_UACCESS_ERR_ZERO_UCE_RECOVERY)
>
> #ifdef __ASSEMBLY__
>
> @@ -27,6 +35,14 @@
> __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
> .endm
>
> +/*
> + * Create an exception table entry for `insn`, which will branch to `fixup`
> + * when an unhandled fault(include sea fault) is taken.
> + */
> + .macro _asm_extable_uce_recovery, insn, fixup
> + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP_UCE_RECOVERY, 0)
> + .endm
> +
> /*
> * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
> * do nothing.
> @@ -64,6 +80,15 @@
> #define EX_DATA_REG(reg, gpr) \
> "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
>
> +#define _ASM_EXTABLE_UACCESS_ERR_ZERO_UCE_RECOVERY(insn, fixup, err, zero) \
> + __DEFINE_ASM_GPR_NUMS \
> + __ASM_EXTABLE_RAW(#insn, #fixup, \
> + __stringify(EX_TYPE_UACCESS_ERR_ZERO_UCE_RECOVERY), \
> + "(" \
> + EX_DATA_REG(ERR, err) " | " \
> + EX_DATA_REG(ZERO, zero) \
> + ")")
> +
> #define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
> __DEFINE_ASM_GPR_NUMS \
> __ASM_EXTABLE_RAW(#insn, #fixup, \
> diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
> index 0557af834e03..36692ec1fdbb 100644
> --- a/arch/arm64/include/asm/asm-uaccess.h
> +++ b/arch/arm64/include/asm/asm-uaccess.h
> @@ -92,4 +92,20 @@ alternative_else_nop_endif
>
> _asm_extable 8888b,\l;
> .endm
> +
> + .macro user_ldp_uce_recovery l, reg1, reg2, addr, post_inc
> +8888: ldtr \reg1, [\addr];
> +8889: ldtr \reg2, [\addr, #8];
> + add \addr, \addr, \post_inc;
> +
> + _asm_extable_uce_recovery 8888b, \l;
> + _asm_extable_uce_recovery 8889b, \l;
> + .endm
> +
> + .macro user_ldst_uce_recovery l, inst, reg, addr, post_inc
> +8888: \inst \reg, [\addr];
> + add \addr, \addr, \post_inc;
> +
> + _asm_extable_uce_recovery 8888b, \l;
> + .endm
> #endif
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index d52a0b269ee8..11fcfc002654 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -330,6 +330,11 @@
> #ifndef __ASSEMBLY__
> #include <asm/types.h>
>
> +static inline bool esr_is_sea(u32 esr)
> +{
> + return (esr & ESR_ELx_FSC) == ESR_ELx_FSC_EXTABT;
> +}
> +
> static inline bool esr_is_data_abort(u32 esr)
> {
> const u32 ec = ESR_ELx_EC(esr);
> diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
> index 72b0e71cc3de..f7835b0f473b 100644
> --- a/arch/arm64/include/asm/extable.h
> +++ b/arch/arm64/include/asm/extable.h
> @@ -45,5 +45,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
> }
> #endif /* !CONFIG_BPF_JIT */
>
> -bool fixup_exception(struct pt_regs *regs);
> +bool fixup_exception(struct pt_regs *regs, unsigned int esr);
> #endif
> diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
> index e8dce0cc5eaa..a229f86e6542 100644
> --- a/arch/arm64/include/asm/uaccess.h
> +++ b/arch/arm64/include/asm/uaccess.h
> @@ -236,7 +236,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
> asm volatile( \
> "1: " load " " reg "1, [%2]\n" \
> "2:\n" \
> - _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
> + _ASM_EXTABLE_UACCESS_ERR_ZERO_UCE_RECOVERY(1b, 2b, %w0, %w1) \
> : "+r" (err), "=&r" (x) \
> : "r" (addr))
>
> diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
> index d9dfa82c1f18..16a069e8eec3 100644
> --- a/arch/arm64/kernel/probes/kprobes.c
> +++ b/arch/arm64/kernel/probes/kprobes.c
> @@ -285,7 +285,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
> * In case the user-specified fault handler returned
> * zero, try to fix up.
> */
> - if (fixup_exception(regs))
> + if (fixup_exception(regs, fsr))
> return 1;
> }
> return 0;
> diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
> index 34e317907524..f16104c63dde 100644
> --- a/arch/arm64/lib/copy_from_user.S
> +++ b/arch/arm64/lib/copy_from_user.S
> @@ -21,7 +21,7 @@
> */
>
> .macro ldrb1 reg, ptr, val
> - user_ldst 9998f, ldtrb, \reg, \ptr, \val
> + user_ldst_uce_recovery 9998f, ldtrb, \reg, \ptr, \val
> .endm
>
> .macro strb1 reg, ptr, val
> @@ -29,7 +29,7 @@
> .endm
>
> .macro ldrh1 reg, ptr, val
> - user_ldst 9997f, ldtrh, \reg, \ptr, \val
> + user_ldst_uce_recovery 9997f, ldtrh, \reg, \ptr, \val
> .endm
>
> .macro strh1 reg, ptr, val
> @@ -37,7 +37,7 @@
> .endm
>
> .macro ldr1 reg, ptr, val
> - user_ldst 9997f, ldtr, \reg, \ptr, \val
> + user_ldst_uce_recovery 9997f, ldtr, \reg, \ptr, \val
> .endm
>
> .macro str1 reg, ptr, val
> @@ -45,7 +45,7 @@
> .endm
>
> .macro ldp1 reg1, reg2, ptr, val
> - user_ldp 9997f, \reg1, \reg2, \ptr, \val
> + user_ldp_uce_recovery 9997f, \reg1, \reg2, \ptr, \val
> .endm
>
> .macro stp1 reg1, reg2, ptr, val
> @@ -62,7 +62,8 @@ SYM_FUNC_START(__arch_copy_from_user)
> ret
>
> // Exception fixups
> -9997: cmp dst, dstin
> +9997: cbz x0, 9998f
> + cmp dst, dstin
> b.ne 9998f
> // Before being absolutely sure we couldn't copy anything, try harder
> USER(9998f, ldtrb tmp1w, [srcin])
> diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
> index 489455309695..b6b76e839b4b 100644
> --- a/arch/arm64/mm/extable.c
> +++ b/arch/arm64/mm/extable.c
> @@ -9,6 +9,7 @@
>
> #include <asm/asm-extable.h>
> #include <asm/ptrace.h>
> +#include <asm/esr.h>
>
> static inline unsigned long
> get_ex_fixup(const struct exception_table_entry *ex)
> @@ -23,6 +24,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
> return true;
> }
>
> +static bool ex_handler_fixup_uce_recovery(const struct exception_table_entry *ex,
> + struct pt_regs *regs, unsigned int esr)
> +{
> + if (esr_is_sea(esr))
> + regs->regs[0] = 0;
> + else
> + regs->regs[0] = 1;
> +
> + regs->pc = get_ex_fixup(ex);
> + return true;
> +}
> +
> static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
> struct pt_regs *regs)
> {
> @@ -63,7 +76,7 @@ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
> return true;
> }
>
> -bool fixup_exception(struct pt_regs *regs)
> +bool fixup_exception(struct pt_regs *regs, unsigned int esr)
> {
> const struct exception_table_entry *ex;
>
> @@ -71,12 +84,18 @@ bool fixup_exception(struct pt_regs *regs)
> if (!ex)
> return false;
>
> + if (esr_is_sea(esr) && !IS_EX_TYPE_UCE_RECOVERY(ex->type))
> + return false;
> +
> switch (ex->type) {
> case EX_TYPE_FIXUP:
> return ex_handler_fixup(ex, regs);
> + case EX_TYPE_FIXUP_UCE_RECOVERY:
> + return ex_handler_fixup_uce_recovery(ex, regs, esr);
> case EX_TYPE_BPF:
> return ex_handler_bpf(ex, regs);
> case EX_TYPE_UACCESS_ERR_ZERO:
> + case EX_TYPE_UACCESS_ERR_ZERO_UCE_RECOVERY:
> return ex_handler_uaccess_err_zero(ex, regs);
> case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
> return ex_handler_load_unaligned_zeropad(ex, regs);
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 77341b160aca..47c447554d2a 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -361,7 +361,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
> * Are we prepared to handle this kernel fault?
> * We are almost certainly not prepared to handle instruction faults.
> */
> - if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
> + if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr))
> return;
>
> if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
> @@ -695,6 +695,30 @@ static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
> return 1; /* "fault" */
> }
>
> +static bool arm64_process_kernel_sea(unsigned long addr, unsigned int esr,
> + struct pt_regs *regs, int sig, int code)
> +{
> + if (!IS_ENABLED(CONFIG_ARM64_UCE_KERNEL_RECOVERY))
> + return false;
> +
> + if (user_mode(regs) || !current->mm)
> + return false;
> +
> + if (apei_claim_sea(regs) < 0)
> + return false;
> +
> + current->thread.fault_address = 0;
> + current->thread.fault_code = esr;
> +
> + if (!fixup_exception(regs, esr))
> + return false;
> +
> + arm64_force_sig_fault(sig, code, addr,
> + "Uncorrected hardware memory error in kernel-access\n");
> +
> + return true;
> +}

Based on X86 RAS implementation[1], I don't think we shoud use arm64_force_sig_fault to
kill process in this case. Instead, we could return -EFAULT in fixup_exception and left
the rest work in memory_failure.


> Sending a SIGBUS for a copy from user is not the correct semantic.
> System calls should return -EFAULT (or a short count for write(2)).

[1]https://lore.kernel.org/all/20210818002942.1607544-3-tony.luck@xxxxxxxxx/T/#m6707a73fc8316690818b6e7eea0329b5925e1f9d


Best Regards,
Shuai