Hi,The ARM Architecture Reference Manual Supplement RAS document (https://developer.arm.com/documentation/ddi0587/latest) states in section 3.9 the following:
I haven't looked at this in great detail, but I spotted a few issues
from an initial scan.
On Wed, Nov 24, 2021 at 12:07:07PM -0500, Tyler Baicar wrote:
Add support for parsing the ARM Error Source Table and basic handling ofCan we actually assume that? What does the specification mandate?
errors reported through both memory mapped and system register interfaces.
Assume system register interfaces are only registered with private
peripheral interrupts (PPIs); otherwise there is no guarantee the
core handling the error is the core which took the error and has the
syndrome info in its system registers.
Yes, I have tested this on Ampere Altra hardware. I've tested both the PPI and SPI interrupt handling as well as system register and memory mapped interface options.Add logging for all detected errors and trigger a kernel panic if there isHas this been tested on any hardware or software platform?
any uncorrected error present.
Yes, this should be 16. I'll fix that in the next version.
[...]
+#define ERRDEVARCH_REV_SHIFT 0x16IIUC This should be 16, not 0x16 (i.e. 22).
Will do.+#define ERRDEVARCH_REV_MASK 0xfThese last four might be better an an array.
+
+#define RAS_REV_v1_1 0x1
+
+struct ras_ext_regs {
+ u64 err_fr;
+ u64 err_ctlr;
+ u64 err_status;
+ u64 err_addr;
+ u64 err_misc0;
+ u64 err_misc1;
+ u64 err_misc2;
+ u64 err_misc3;
+};
[...]That's a good point. I'll update this in the next version.
+static bool ras_extn_v1p1(void)I suspect it'd be better to pass this value around directly as
+{
+ unsigned long fld, reg = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_RAS_SHIFT);
+
+ return fld >= ID_AA64PFR0_RAS_V1P1;
+}
`version`, rather than dropping this into a `misc23_present` temporary
variable, as that would be a little clearer, and future-proof if/when
more registers get added.
[...]Yes, will update.
+void arch_arm_ras_report_error(u64 implemented, bool clear_misc)Why get_cpu() here? Do you just need smp_processor_id()?
+{
+ struct ras_ext_regs regs = {0};
+ unsigned int i, cpu_num;
+ bool misc23_present;
+ bool fatal = false;
+ u64 num_records;
+
+ if (!this_cpu_has_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ cpu_num = get_cpu();
The commit message explained that this would be PE-local (e.g. in a PPI
handler), and we've already checked this_cpu_has_cap() which assumes
we're not preemptible.
So I don't see why we should use get_cpu() here -- any time it would
have a difference implies something has already gone wrong.
+ num_records = read_sysreg_s(SYS_ERRIDR_EL1) & ERRIDR_NUM_MASK;Why in hex? We normally print 'cpu%d' or 'CPU%d', since this is a
+
+ for (i = 0; i < num_records; i++) {
+ if (!(implemented & BIT(i)))
+ continue;
+
+ write_sysreg_s(i, SYS_ERRSELR_EL1);
+ isb();
+ regs.err_status = read_sysreg_s(SYS_ERXSTATUS_EL1);
+
+ if (!(regs.err_status & ERR_STATUS_V))
+ continue;
+
+ pr_err("error from processor 0x%x\n", cpu_num);
logical ID anyway.
Will do.+As above, I reckon it's better to have this as 'version' or
+ if (regs.err_status & ERR_STATUS_AV)
+ regs.err_addr = read_sysreg_s(SYS_ERXADDR_EL1);
+
+ misc23_present = ras_extn_v1p1();
'ras_version', and have the checks below be:
if (version >= ID_AA64PFR0_RAS_V1P1) {
// poke SYS_ERXMISC2_EL1
// poke SYS_ERXMISC3_EL1
}
I like this proposal and will adopt it in the next version. The clearing is conditional based on an option in the ACPI table. The misc registers report mostly IMPDEF information which may or may not need to be cleared after reporting depending on the hardware IP.+Any reason not to clear when we read, above? e.g.
+ if (regs.err_status & ERR_STATUS_MV) {
+ regs.err_misc0 = read_sysreg_s(SYS_ERXMISC0_EL1);
+ regs.err_misc1 = read_sysreg_s(SYS_ERXMISC1_EL1);
+
+ if (misc23_present) {
+ regs.err_misc2 = read_sysreg_s(SYS_ERXMISC2_EL1);
+ regs.err_misc3 = read_sysreg_s(SYS_ERXMISC3_EL1);
+ }
+ }
+
+ arch_arm_ras_print_error(®s, i, misc23_present);
+
+ /*
+ * In the future, we will treat UER conditions as potentially
+ * recoverable.
+ */
+ if (regs.err_status & ERR_STATUS_UE)
+ fatal = true;
+
+ regs.err_status = arch_arm_ras_get_status_clear_value(regs.err_status);
+ write_sysreg_s(regs.err_status, SYS_ERXSTATUS_EL1);
+
+ if (clear_misc) {
+ write_sysreg_s(0x0, SYS_ERXMISC0_EL1);
+ write_sysreg_s(0x0, SYS_ERXMISC1_EL1);
+
+ if (misc23_present) {
+ write_sysreg_s(0x0, SYS_ERXMISC2_EL1);
+ write_sysreg_s(0x0, SYS_ERXMISC3_EL1);
+ }
+ }
#define READ_CLEAR_MISC(nr, clear) \
({ \
unsigned long __val = read_sysreg_s(SYS_ERXMISC##nr##_EL1); \
if (clear); \
write_sysreg_s(0, SYS_ERXMISC##nr##_EL1); \
__val; \
})
if (regs.err_status & ERR_STATUS_MV) {
regs.err_misc0 = READ_CLEAR_MISC(0, clear_misc);
regs.err_misc1 = READ_CLEAR_MISC(1, clear_misc);
if (version >= ID_AA64PFR0_RAS_V1P1) {
regs.err_misc2 = READ_CLEAR_MISC(2, clear_misc);
regs.err_misc3 = READ_CLEAR_MISC(3, clear_misc);
}
}
... why does the clearing need to be conditional?
I'll separate this into it's own patch. It's not just a bug in KVM, these system registers are not defined at all today in the arm64 sysreg.h file (adding those is part of this patch as well).+This should be a preparatory patch; this is preumably a latent bug in
+ isb();
+ }
+
+ if (fatal)
+ panic("ARM RAS: uncorrectable error encountered");
+
+ put_cpu();
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e3ec1a44f94d..dc15e9896db4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1573,6 +1573,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC2_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC3_EL1), trap_raz_wi },
KVM.
[...]
+static struct aest_node_data __percpu **ppi_data;As above, do we have any guarantee these are actually PPIs?
+static int ppi_irqs[AEST_MAX_PPI];
+static u8 num_ppi;
+static u8 ppi_idx;
Yes, I can do that. When I tested this the IP I used did not have a DEVARCH register which followed the spec, otherwise I would have caught this.+static bool aest_mmio_ras_misc23_present(u64 base_addr)Is the shift the wrong way around?
+{
+ u32 val;
+
+ val = readl((void *) (base_addr + ERRDEVARCH_OFFSET));
+ val <<= ERRDEVARCH_REV_SHIFT;
+ val &= ERRDEVARCH_REV_MASK;
+
+ return val >= RAS_REV_v1_1;
+}
Above we have:
#define ERRDEVARCH_REV_SHIFT 0x16
#define ERRDEVARCH_REV_MASK 0xf
#define RAS_REV_v1_1 0x1
.. so this is:
val <<= 0x16;
val &= 0xf; // val[0x15:0] == 0, so this is 0
return val >= 0x1; // false
It'd be nicer to use FIELD_GET() here.
As above, I also think it would be better to retrun the value of the
field, and check that explciitly, for future proofing.
Good point. We should panic at least before clearing the error. I think the panic should happen immediately after the aest_print() call, do you agree? We should still print the error before panicking (APEI/GHES prints the full error prior to panicking as well).
[...]
+static void aest_proc(struct aest_node_data *data)Why don't we call panic() as soon as we realise an error is fatal?
+{
+ struct ras_ext_regs *regs_p, regs = {0};
+ bool misc23_present;
+ bool fatal = false;
+ u64 errgsr = 0;
+ int i;
+
+ /*
+ * Currently SR based handling is done through the architected
+ * discovery exposed through SRs. That may change in the future
+ * if there is supplemental information in the AEST that is
+ * needed.
+ */
+ if (data->interface.type == ACPI_AEST_NODE_SYSTEM_REGISTER) {
+ arch_arm_ras_report_error(data->interface.implemented,
+ data->interface.flags & AEST_INTERFACE_CLEAR_MISC);
+ return;
+ }
+
+ regs_p = data->interface.regs;
+ errgsr = readq((void *) (((u64) regs_p) + ERRGSR_OFFSET));
+
+ for (i = data->interface.start; i < data->interface.end; i++) {
+ if (!(data->interface.implemented & BIT(i)))
+ continue;
+
+ if (!(data->interface.status_reporting & BIT(i)) && !(errgsr & BIT(i)))
+ continue;
+
+ regs.err_status = readq(®s_p[i].err_status);
+ if (!(regs.err_status & ERR_STATUS_V))
+ continue;
+
+ if (regs.err_status & ERR_STATUS_AV)
+ regs.err_addr = readq(®s_p[i].err_addr);
+
+ regs.err_fr = readq(®s_p[i].err_fr);
+ regs.err_ctlr = readq(®s_p[i].err_ctlr);
+
+ if (regs.err_status & ERR_STATUS_MV) {
+ misc23_present = aest_mmio_ras_misc23_present((u64) regs_p);
+ regs.err_misc0 = readq(®s_p[i].err_misc0);
+ regs.err_misc1 = readq(®s_p[i].err_misc1);
+
+ if (misc23_present) {
+ regs.err_misc2 = readq(®s_p[i].err_misc2);
+ regs.err_misc3 = readq(®s_p[i].err_misc3);
+ }
+ }
+
+ aest_print(data, regs, i, misc23_present);
+
+ if (regs.err_status & ERR_STATUS_UE)
+ fatal = true;
+
+ regs.err_status = arch_arm_ras_get_status_clear_value(regs.err_status);
+ writeq(regs.err_status, ®s_p[i].err_status);
+
+ if (data->interface.flags & AEST_INTERFACE_CLEAR_MISC) {
+ writeq(0x0, ®s_p[i].err_misc0);
+ writeq(0x0, ®s_p[i].err_misc1);
+
+ if (misc23_present) {
+ writeq(0x0, ®s_p[i].err_misc2);
+ writeq(0x0, ®s_p[i].err_misc3);
+ }
+ }
+ }
+
+ if (fatal)
+ panic("AEST: uncorrectable error encountered");