Re: [PATCH] riscv: add system error interrupt handler support
From: Conor Dooley
Date: Thu Feb 26 2026 - 04:23:02 EST
On Thu, Feb 26, 2026 at 04:27:35PM +0800, Rui Qi wrote:
> Add a system error interrupt handler for RISC-V that panics
> the system when hardware errors are detected. The implementation includes:
>
> - Add IRQ_SYS_ERROR (23) interrupt definition to CSR header
> - Implement sys_error.c module with panic handler
> - Register per-CPU interrupt handler for system error interrupts
> - Add module to kernel build system
>
> When a system error interrupt occurs, the handler immediately panics
> the system with a descriptive message to ensure the error is properly
> captured and the system is halted safely.
>
> Signed-off-by: Rui Qi <qirui.001@xxxxxxxxxxxxx>
> ---
> arch/riscv/include/asm/csr.h | 4 +-
> arch/riscv/kernel/Makefile | 1 +
> arch/riscv/kernel/sys_error.c | 80 +++++++++++++++++++++++++++++++++++
> include/linux/cpuhotplug.h | 1 +
> 4 files changed, 85 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/kernel/sys_error.c
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 31b8988f4488..1f43c25b07ed 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -99,7 +99,8 @@
> #define IRQ_M_EXT 11
> #define IRQ_S_GEXT 12
> #define IRQ_PMU_OVF 13
> -#define IRQ_LOCAL_MAX (IRQ_PMU_OVF + 1)
> +#define IRQ_SYS_ERROR 23
Hmmm, two problems I think with this. 23 is one of the interrupts that
has been reserved for use with AIA. I don't think they use it right now,
but in the future it might see use there.
The first problem is kind of moot though, because reserving 16-23 for
AIA is a retcon, and previously these interrupts were available custom
use on any platform (as you have done here), so while it might be a
system error on your platform, it could be something completely innocuous
on mine!
With that in mind, does having this in arch code make sense at all?
Can this just be a normal driver, that'll only probe on your specific
platform?
Cheers,
Conor.
> +#define IRQ_LOCAL_MAX (IRQ_SYS_ERROR + 1)
> #define IRQ_LOCAL_MASK GENMASK((IRQ_LOCAL_MAX - 1), 0)
>
> /* Exception causes */
> @@ -535,6 +536,7 @@
> # define RV_IRQ_TIMER IRQ_S_TIMER
> # define RV_IRQ_EXT IRQ_S_EXT
> # define RV_IRQ_PMU IRQ_PMU_OVF
> +# define RV_IRQ_SYS_ERROR IRQ_SYS_ERROR
> # define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
>
> #endif /* !CONFIG_RISCV_M_MODE */
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index cabb99cadfb6..3aaf16c75d6e 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -72,6 +72,7 @@ obj-y += vendor_extensions.o
> obj-y += vendor_extensions/
> obj-y += probes/
> obj-y += tests/
> +obj-y += sys_error.o
> obj-$(CONFIG_MMU) += vdso.o vdso/
> obj-$(CONFIG_RISCV_USER_CFI) += vdso_cfi/
>
> diff --git a/arch/riscv/kernel/sys_error.c b/arch/riscv/kernel/sys_error.c
> new file mode 100644
> index 000000000000..5b88ff4a0e84
> --- /dev/null
> +++ b/arch/riscv/kernel/sys_error.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026 Bytedance, Inc.
> + */
> +#define pr_fmt(fmt) "riscv-sys-error: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/irq.h>
> +#include <linux/irqdomain.h>
> +#include <linux/interrupt.h>
> +#include <linux/percpu.h>
> +#include <linux/module.h>
> +#include <asm/irq.h>
> +#include <linux/cpuhotplug.h>
> +#include <asm/csr.h>
> +
> +static unsigned int riscv_sys_error_irq;
> +static DEFINE_PER_CPU_READ_MOSTLY(int, sys_error_dummy_dev);
> +
> +static irqreturn_t sys_error_irq_handler(int irq, void *dev)
> +{
> + panic("RISC-V System Error Interrupt - System Error Detected");
> + return IRQ_HANDLED;
> +}
> +
> +static int riscv_serror_starting_cpu(unsigned int cpu)
> +{
> + csr_set(CSR_IE, BIT(RV_IRQ_SYS_ERROR));
> + enable_percpu_irq(riscv_sys_error_irq, irq_get_trigger_type(riscv_sys_error_irq));
> + return 0;
> +}
> +
> +static int riscv_serror_dying_cpu(unsigned int cpu)
> +{
> + csr_clear(CSR_IE, BIT(RV_IRQ_SYS_ERROR));
> + disable_percpu_irq(riscv_sys_error_irq);
> + return 0;
> +}
> +
> +static int __init sys_error_init(void)
> +{
> + int ret;
> + struct irq_domain *domain = NULL;
> +
> + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
> + DOMAIN_BUS_ANY);
> + if (!domain) {
> + pr_err("Failed to find INTC IRQ root domain\n");
> + return -ENODEV;
> + }
> +
> + riscv_sys_error_irq = irq_create_mapping(domain, RV_IRQ_SYS_ERROR);
> + if (!riscv_sys_error_irq) {
> + pr_err("Failed to map PMU interrupt for node\n");
> + return -ENODEV;
> + }
> +
> + ret = request_percpu_irq(riscv_sys_error_irq, sys_error_irq_handler,
> + "riscv-syserror", &sys_error_dummy_dev);
> + if (ret) {
> + pr_err("registering percpu irq failed [%d]\n", ret);
> + return ret;
> + }
> +
> + ret = cpuhp_setup_state(CPUHP_AP_RISCV_SERROR_STARTING,
> + "riscv/sys_error:starting",
> + riscv_serror_starting_cpu, riscv_serror_dying_cpu);
> + if (ret) {
> + pr_err("cpuhp setup state failed [%d]\n", ret);
> + goto fail_free_irq;
> + }
> +
> + return 0;
> +
> +fail_free_irq:
> + free_percpu_irq(riscv_sys_error_irq, &sys_error_dummy_dev);
> + return ret;
> +}
> +
> +arch_initcall(sys_error_init)
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 62cd7b35a29c..f6d0c05f72df 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -174,6 +174,7 @@ enum cpuhp_state {
> CPUHP_AP_REALTEK_TIMER_STARTING,
> CPUHP_AP_RISCV_TIMER_STARTING,
> CPUHP_AP_CLINT_TIMER_STARTING,
> + CPUHP_AP_RISCV_SERROR_STARTING,
> CPUHP_AP_CSKY_TIMER_STARTING,
> CPUHP_AP_TI_GP_TIMER_STARTING,
> CPUHP_AP_HYPERV_TIMER_STARTING,
> --
> 2.20.1
Attachment:
signature.asc
Description: PGP signature