[PATCH RFC 4/4] trace, ras: add ARM RAS extension trace event

From: Tyler Baicar OS
Date: Tue Jul 02 2019 - 12:52:07 EST


Add a trace event for hardware errors reported by the ARMv8.2
RAS extension registers.

Signed-off-by: Tyler Baicar <baicar@xxxxxxxxxxxxxxxxxxxxxx>
---
arch/arm64/kernel/ras.c | 3 +++
drivers/acpi/arm64/aest.c | 4 ++++
include/ras/ras_event.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+)

diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
index ca47efa..4e34d63 100644
--- a/arch/arm64/kernel/ras.c
+++ b/arch/arm64/kernel/ras.c
@@ -5,6 +5,7 @@
#include <linux/smp.h>

#include <asm/ras.h>
+#include <ras/ras_event.h>

void arch_arm_ras_report_error(void)
{
@@ -50,6 +51,8 @@ void arch_arm_ras_report_error(void)
regs.err_misc1);
}

+ trace_arm_ras_ext_event(0, cpu_num, &regs);
+
/*
* In the future, we will treat UER conditions as potentially
* recoverable.
diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c
index fd4f3b5..21ec583 100644
--- a/drivers/acpi/arm64/aest.c
+++ b/drivers/acpi/arm64/aest.c
@@ -13,6 +13,7 @@
#include <linux/ratelimit.h>

#include <asm/ras.h>
+#include <ras/ras_event.h>

#undef pr_fmt
#define pr_fmt(fmt) "ACPI AEST: " fmt
@@ -102,6 +103,9 @@ static void aest_proc(struct aest_node_data *data)

aest_print(data, regs, i);

+ trace_arm_ras_ext_event(data->node_type, data->data.proc.id,
+ &regs);
+
if (regs.err_status & ERR_STATUS_UE)
fatal = true;

diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 36c5c5e..8b76cb1 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -339,6 +339,52 @@
);

/*
+ * ARM RAS Extension Events Report
+ *
+ * This event is generated when an error reported by the ARM RAS extension
+ * hardware is detected.
+ */
+
+#ifdef CONFIG_ARM64
+#include <asm/ras.h>
+TRACE_EVENT(arm_ras_ext_event,
+
+ TP_PROTO(u8 type, u32 id, struct ras_ext_regs *regs),
+
+ TP_ARGS(type, id, regs),
+
+ TP_STRUCT__entry(
+ __field(u8, type)
+ __field(u32, id)
+ __field(u64, err_fr)
+ __field(u64, err_ctlr)
+ __field(u64, err_status)
+ __field(u64, err_addr)
+ __field(u64, err_misc0)
+ __field(u64, err_misc1)
+ ),
+
+ TP_fast_assign(
+ __entry->type = type;
+ __entry->id = id;
+ __entry->err_fr = regs->err_fr;
+ __entry->err_ctlr = regs->err_ctlr;
+ __entry->err_status = regs->err_status;
+ __entry->err_addr = regs->err_addr;
+ __entry->err_misc0 = regs->err_misc0;
+ __entry->err_misc1 = regs->err_misc1;
+ ),
+
+ TP_printk("type: %d; id: %d; ERR_FR: %llx; ERR_CTLR: %llx; "
+ "ERR_STATUS: %llx; ERR_ADDR: %llx; ERR_MISC0: %llx; "
+ "ERR_MISC1: %llx",
+ __entry->type, __entry->id, __entry->err_fr,
+ __entry->err_ctlr, __entry->err_status, __entry->err_addr,
+ __entry->err_misc0, __entry->err_misc1)
+);
+#endif
+
+/*
* memory-failure recovery action result event
*
* unsigned long pfn - Page Frame Number of the corrupted page
--
1.8.3.1