Re: [PATCH] riscv: add system error interrupt handler support
From: Rui Qi
Date: Fri Feb 27 2026 - 02:56:58 EST
On 2/26/26 5:22 PM, Conor Dooley wrote:
> On Thu, Feb 26, 2026 at 04:27:35PM +0800, Rui Qi wrote:
>> Add a system error interrupt handler for RISC-V that panics
>> the system when hardware errors are detected. The implementation includes:
>>
>> - Add IRQ_SYS_ERROR (23) interrupt definition to CSR header
>> - Implement sys_error.c module with panic handler
>> - Register per-CPU interrupt handler for system error interrupts
>> - Add module to kernel build system
>>
>> When a system error interrupt occurs, the handler immediately panics
>> the system with a descriptive message to ensure the error is properly
>> captured and the system is halted safely.
>>
>> Signed-off-by: Rui Qi <qirui.001@xxxxxxxxxxxxx>
>> ---
>> arch/riscv/include/asm/csr.h | 4 +-
>> arch/riscv/kernel/Makefile | 1 +
>> arch/riscv/kernel/sys_error.c | 80 +++++++++++++++++++++++++++++++++++
>> include/linux/cpuhotplug.h | 1 +
>> 4 files changed, 85 insertions(+), 1 deletion(-)
>> create mode 100644 arch/riscv/kernel/sys_error.c
>>
>> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
>> index 31b8988f4488..1f43c25b07ed 100644
>> --- a/arch/riscv/include/asm/csr.h
>> +++ b/arch/riscv/include/asm/csr.h
>> @@ -99,7 +99,8 @@
>> #define IRQ_M_EXT 11
>> #define IRQ_S_GEXT 12
>> #define IRQ_PMU_OVF 13
>> -#define IRQ_LOCAL_MAX (IRQ_PMU_OVF + 1)
>> +#define IRQ_SYS_ERROR 23
>
> Hmmm, two problems I think with this. 23 is one of the interrupts that
> has been reserved for use with AIA. I don't think they use it right now,
> but in the future it might see use there.
>
> The first problem is kind of moot though, because reserving 16-23 for
> AIA is a retcon, and previously these interrupts were available custom
> use on any platform (as you have done here), so while it might be a
> system error on your platform, it could be something completely innocuous
> on mine!
>
> With that in mind, does having this in arch code make sense at all?
> Can this just be a normal driver, that'll only probe on your specific
> platform?
>
> Cheers,
> Conor.
>
Thanks for the comment.
I checked the latest RISC-V Interrupt Spec (2025-03-12). In that
version, interrupts 16–23 are defined as architectural local interrupts,
and interrupt 23 is tentatively proposed for a “Bus or system error”
type condition. That suggests this interrupt number is no longer just a
free, platform-defined slot — it now carries architectural intent and a
potential standardized meaning.
Given this context, my current implementation treats interrupt 23 as a
local condition that matches the spec’s intent for a system-level error
signal, rather than an arbitrary, custom platform interrupt. This seemed
reasonable as long as it aligns with the architectural semantics for
local interrupts.
That said, I’m open to the concern about placing this handling in
arch/riscv, and I’d like to understand your preference: do you think
this should be entirely moved into platform-specific code, or would a
conditional, spec-aware arch implementation (e.g., gated on the presence
of the relevant AIA/local interrupt support) be acceptable? Please let
me know what approach you’d suggest.
>> +#define IRQ_LOCAL_MAX (IRQ_SYS_ERROR + 1)
>> #define IRQ_LOCAL_MASK GENMASK((IRQ_LOCAL_MAX - 1), 0)
>>
>> /* Exception causes */
>> @@ -535,6 +536,7 @@
>> # define RV_IRQ_TIMER IRQ_S_TIMER
>> # define RV_IRQ_EXT IRQ_S_EXT
>> # define RV_IRQ_PMU IRQ_PMU_OVF
>> +# define RV_IRQ_SYS_ERROR IRQ_SYS_ERROR
>> # define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
>>
>> #endif /* !CONFIG_RISCV_M_MODE */
>> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
>> index cabb99cadfb6..3aaf16c75d6e 100644
>> --- a/arch/riscv/kernel/Makefile
>> +++ b/arch/riscv/kernel/Makefile
>> @@ -72,6 +72,7 @@ obj-y += vendor_extensions.o
>> obj-y += vendor_extensions/
>> obj-y += probes/
>> obj-y += tests/
>> +obj-y += sys_error.o
>> obj-$(CONFIG_MMU) += vdso.o vdso/
>> obj-$(CONFIG_RISCV_USER_CFI) += vdso_cfi/
>>
>> diff --git a/arch/riscv/kernel/sys_error.c b/arch/riscv/kernel/sys_error.c
>> new file mode 100644
>> index 000000000000..5b88ff4a0e84
>> --- /dev/null
>> +++ b/arch/riscv/kernel/sys_error.c
>> @@ -0,0 +1,80 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2026 Bytedance, Inc.
>> + */
>> +#define pr_fmt(fmt) "riscv-sys-error: " fmt
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/irq.h>
>> +#include <linux/irqdomain.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/percpu.h>
>> +#include <linux/module.h>
>> +#include <asm/irq.h>
>> +#include <linux/cpuhotplug.h>
>> +#include <asm/csr.h>
>> +
>> +static unsigned int riscv_sys_error_irq;
>> +static DEFINE_PER_CPU_READ_MOSTLY(int, sys_error_dummy_dev);
>> +
>> +static irqreturn_t sys_error_irq_handler(int irq, void *dev)
>> +{
>> + panic("RISC-V System Error Interrupt - System Error Detected");
>> + return IRQ_HANDLED;
>> +}
>> +
>> +static int riscv_serror_starting_cpu(unsigned int cpu)
>> +{
>> + csr_set(CSR_IE, BIT(RV_IRQ_SYS_ERROR));
>> + enable_percpu_irq(riscv_sys_error_irq, irq_get_trigger_type(riscv_sys_error_irq));
>> + return 0;
>> +}
>> +
>> +static int riscv_serror_dying_cpu(unsigned int cpu)
>> +{
>> + csr_clear(CSR_IE, BIT(RV_IRQ_SYS_ERROR));
>> + disable_percpu_irq(riscv_sys_error_irq);
>> + return 0;
>> +}
>> +
>> +static int __init sys_error_init(void)
>> +{
>> + int ret;
>> + struct irq_domain *domain = NULL;
>> +
>> + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
>> + DOMAIN_BUS_ANY);
>> + if (!domain) {
>> + pr_err("Failed to find INTC IRQ root domain\n");
>> + return -ENODEV;
>> + }
>> +
>> + riscv_sys_error_irq = irq_create_mapping(domain, RV_IRQ_SYS_ERROR);
>> + if (!riscv_sys_error_irq) {
>> + pr_err("Failed to map PMU interrupt for node\n");
>> + return -ENODEV;
>> + }
>> +
>> + ret = request_percpu_irq(riscv_sys_error_irq, sys_error_irq_handler,
>> + "riscv-syserror", &sys_error_dummy_dev);
>> + if (ret) {
>> + pr_err("registering percpu irq failed [%d]\n", ret);
>> + return ret;
>> + }
>> +
>> + ret = cpuhp_setup_state(CPUHP_AP_RISCV_SERROR_STARTING,
>> + "riscv/sys_error:starting",
>> + riscv_serror_starting_cpu, riscv_serror_dying_cpu);
>> + if (ret) {
>> + pr_err("cpuhp setup state failed [%d]\n", ret);
>> + goto fail_free_irq;
>> + }
>> +
>> + return 0;
>> +
>> +fail_free_irq:
>> + free_percpu_irq(riscv_sys_error_irq, &sys_error_dummy_dev);
>> + return ret;
>> +}
>> +
>> +arch_initcall(sys_error_init)
>> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
>> index 62cd7b35a29c..f6d0c05f72df 100644
>> --- a/include/linux/cpuhotplug.h
>> +++ b/include/linux/cpuhotplug.h
>> @@ -174,6 +174,7 @@ enum cpuhp_state {
>> CPUHP_AP_REALTEK_TIMER_STARTING,
>> CPUHP_AP_RISCV_TIMER_STARTING,
>> CPUHP_AP_CLINT_TIMER_STARTING,
>> + CPUHP_AP_RISCV_SERROR_STARTING,
>> CPUHP_AP_CSKY_TIMER_STARTING,
>> CPUHP_AP_TI_GP_TIMER_STARTING,
>> CPUHP_AP_HYPERV_TIMER_STARTING,
>> --
>> 2.20.1