Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe
From: Mahesh Jagannath Salgaonkar
Date: Wed Aug 14 2019 - 04:12:07 EST
On 8/12/19 2:52 PM, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
>
> Co-developed-by: Reza Arbab <arbab@xxxxxxxxxxxxx>
> Signed-off-by: Reza Arbab <arbab@xxxxxxxxxxxxx>
> Cc: Mahesh Salgaonkar <mahesh@xxxxxxxxxxxxx>
> Signed-off-by: Santosh Sivaraj <santosh@xxxxxxxxxx>
Looks good to me.
Reviewed-by: Mahesh Salgaonkar <mahesh@xxxxxxxxxxxxxxxxxx>
Thanks,
-Mahesh.
> ---
> arch/powerpc/include/asm/mce.h | 4 +++-
> arch/powerpc/kernel/mce.c | 16 ++++++++++++++++
> arch/powerpc/kernel/mce_power.c | 15 +++++++++++++--
> 3 files changed, 32 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
> enum MCE_UeErrorType ue_error_type:8;
> u8 effective_address_provided;
> u8 physical_address_provided;
> - u8 reserved_1[5];
> + u8 ignore_event;
> + u8 reserved_1[4];
> u64 effective_address;
> u64 physical_address;
> u8 reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
> enum MCE_Initiator initiator:8;
> enum MCE_ErrorClass error_class:8;
> bool sync_error;
> + bool ignore_event;
> };
>
> #define MAX_MC_EVT 100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
> if (phys_addr != ULONG_MAX) {
> mce->u.ue_error.physical_address_provided = true;
> mce->u.ue_error.physical_address = phys_addr;
> + mce->u.ue_error.ignore_event = mce_err->ignore_event;
> machine_check_ue_event(mce);
> }
> }
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
> /*
> * This should probably queued elsewhere, but
> * oh! well
> + *
> + * Don't report this machine check because the caller has a
> + * asked us to ignore the event, it has a fixup handler which
> + * will do the appropriate error handling and reporting.
> */
> if (evt->error_type == MCE_ERROR_TYPE_UE) {
> + if (evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_ue_count);
> + continue;
> + }
> +
> if (evt->u.ue_error.physical_address_provided) {
> unsigned long pfn;
>
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
> while (__this_cpu_read(mce_queue_count) > 0) {
> index = __this_cpu_read(mce_queue_count) - 1;
> evt = this_cpu_ptr(&mce_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE &&
> + evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_queue_count);
> + continue;
> + }
> machine_check_print_event_info(evt, false, false);
> __this_cpu_dec(mce_queue_count);
> }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>
> #include <linux/types.h>
> #include <linux/ptrace.h>
> +#include <linux/extable.h>
> #include <asm/mmu.h>
> #include <asm/mce.h>
> #include <asm/machdep.h>
> @@ -18,6 +19,7 @@
> #include <asm/pte-walk.h>
> #include <asm/sstep.h>
> #include <asm/exception-64s.h>
> +#include <asm/extable.h>
>
> /*
> * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
> return 0;
> }
>
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> + struct mce_error_info *mce_err)
> {
> long handled = 0;
> + const struct exception_table_entry *entry;
> +
> + entry = search_kernel_exception_table(regs->nip);
> + if (entry) {
> + mce_err->ignore_event = true;
> + regs->nip = extable_fixup(entry);
> + return 1;
> + }
>
> /*
> * On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
> &phys_addr);
>
> if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
> - handled = mce_handle_ue_error(regs);
> + handled = mce_handle_ue_error(regs, &mce_err);
>
> save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
>
>