[PATCH v4 2/2] remoteproc: xlnx: add crash detection mechanism
From: Tanmay Shah
Date: Tue Mar 03 2026 - 18:37:28 EST
Remote processor will report the crash reason via the resource table
and notify the host via mailbox notification. The host checks this
crash reason on every mailbox notification from the remote and report
to the rproc core framework. Then the rproc core framework will start
the recovery process.
Signed-off-by: Tanmay Shah <tanmay.shah@xxxxxxx>
---
Changes in v4:
- Optimize crash resource memory by changing type to u32 to u8
- Introduce version field in the crash resource
- Check crash related condition before rproc state related condition
- Move crash reporting to the bottom half of the isr instead of
actual irq handler
- Introduce 16 bytes array in the crash report to store crash reason
in the string format
- Remove redundant type cast
Changes in v3:
- %s/kick/mailbox notification/
- %s/core framework/rproc core framework/
- fold simple function within zynqmp_r5_handle_rsc().
- remove spurious change
- reset crash state after reporting the crash
- document set and reset of ATTACH_ON_RECOVERY flag
- set recovery_disabled flag to false
- check condition rproc->crash_reason != NULL
Changes in v2:
- clear attach recovery boot flag during detach and stop ops
drivers/remoteproc/xlnx_r5_remoteproc.c | 71 ++++++++++++++++++++++++-
1 file changed, 70 insertions(+), 1 deletion(-)
diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c
index 5a468d959f1e..9c7cf9f37294 100644
--- a/drivers/remoteproc/xlnx_r5_remoteproc.c
+++ b/drivers/remoteproc/xlnx_r5_remoteproc.c
@@ -108,6 +108,10 @@ struct rsc_tbl_data {
const uintptr_t rsc_tbl;
} __packed;
+enum xlnx_rproc_fw_rsc {
+ XLNX_RPROC_FW_CRASH_REASON = RSC_VENDOR_START,
+};
+
/*
* Hardcoded TCM bank values. This will stay in driver to maintain backward
* compatibility with device-tree that does not have TCM information.
@@ -127,9 +131,25 @@ static const struct mem_bank_data zynqmp_tcm_banks_lockstep[] = {
{0xffe30000UL, 0x30000, 0x10000UL, PD_R5_1_BTCM, "btcm1"},
};
+/**
+ * struct xlnx_rproc_crash_report - resource to know crash status and reason
+ *
+ * @version: version of this resource
+ * @crash_state: if true, the rproc is notifying crash, time to recover
+ * @crash_reason: number to describe reason of crash
+ * @crash_reason_str: short string description of crash reason
+ */
+struct xlnx_rproc_crash_report {
+ u8 version;
+ u8 crash_state;
+ u8 crash_reason;
+ char crash_reason_str[16];
+} __packed;
+
/**
* struct zynqmp_r5_core - remoteproc core's internal data
*
+ * @crash_report: rproc crash state and reason
* @rsc_tbl_va: resource table virtual address
* @sram: Array of sram memories assigned to this core
* @num_sram: number of sram for this core
@@ -143,6 +163,7 @@ static const struct mem_bank_data zynqmp_tcm_banks_lockstep[] = {
* @ipi: pointer to mailbox information
*/
struct zynqmp_r5_core {
+ struct xlnx_rproc_crash_report *crash_report;
void __iomem *rsc_tbl_va;
struct zynqmp_sram_bank *sram;
int num_sram;
@@ -200,11 +221,27 @@ static int event_notified_idr_cb(int id, void *ptr, void *data)
*/
static void handle_event_notified(struct work_struct *work)
{
+ struct zynqmp_r5_core *r5_core;
struct mbox_info *ipi;
struct rproc *rproc;
ipi = container_of(work, struct mbox_info, mbox_work);
rproc = ipi->r5_core->rproc;
+ r5_core = ipi->r5_core;
+
+ /* report crash only if expected */
+ if (r5_core->crash_report && r5_core->crash_report->crash_state) {
+ if (rproc->state == RPROC_ATTACHED || rproc->state == RPROC_RUNNING) {
+ dev_warn(&rproc->dev, "crash reason id: %d %.15s\n",
+ r5_core->crash_report->crash_reason,
+ r5_core->crash_report->crash_reason_str);
+ rproc_report_crash(rproc, RPROC_FATAL_ERROR);
+ r5_core->crash_report->crash_state = false;
+ r5_core->crash_report->crash_reason = 0;
+ r5_core->crash_report->crash_reason_str[0] = '\0';
+ return;
+ }
+ }
/*
* We only use IPI for interrupt. The RPU firmware side may or may
@@ -438,6 +475,13 @@ static int zynqmp_r5_rproc_stop(struct rproc *rproc)
if (ret)
dev_err(r5_core->dev, "core force power down failed\n");
+ /*
+ * Clear attach on recovery flag during stop operation. The next state
+ * of the remote processor is expected to be "Running" state. In this
+ * state boot recovery method must take place over attach on recovery.
+ */
+ test_and_clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features);
+
return ret;
}
@@ -859,6 +903,9 @@ static int zynqmp_r5_get_rsc_table_va(struct zynqmp_r5_core *r5_core)
static int zynqmp_r5_attach(struct rproc *rproc)
{
+ /* Enable attach on recovery method. Clear it during rproc stop. */
+ rproc_set_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY);
+
dev_dbg(&rproc->dev, "rproc %d attached\n", rproc->index);
return 0;
@@ -873,9 +920,30 @@ static int zynqmp_r5_detach(struct rproc *rproc)
*/
zynqmp_r5_rproc_kick(rproc, 0);
+ clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features);
+
return 0;
}
+static int zynqmp_r5_handle_rsc(struct rproc *rproc, u32 rsc_type, void *rsc,
+ int offset, int avail)
+{
+ struct zynqmp_r5_core *r5_core = rproc->priv;
+ void *rsc_offset = (r5_core->rsc_tbl_va + offset);
+
+ if (rsc_type == XLNX_RPROC_FW_CRASH_REASON) {
+ r5_core->crash_report = rsc_offset;
+ /* reset all values */
+ r5_core->crash_report->crash_state = false;
+ r5_core->crash_report->crash_reason = 0;
+ r5_core->crash_report->crash_reason_str[0] = '\0';
+ } else {
+ return RSC_IGNORED;
+ }
+
+ return RSC_HANDLED;
+}
+
static const struct rproc_ops zynqmp_r5_rproc_ops = {
.prepare = zynqmp_r5_rproc_prepare,
.unprepare = zynqmp_r5_rproc_unprepare,
@@ -890,6 +958,7 @@ static const struct rproc_ops zynqmp_r5_rproc_ops = {
.get_loaded_rsc_table = zynqmp_r5_get_loaded_rsc_table,
.attach = zynqmp_r5_attach,
.detach = zynqmp_r5_detach,
+ .handle_rsc = zynqmp_r5_handle_rsc,
};
/**
@@ -923,7 +992,7 @@ static struct zynqmp_r5_core *zynqmp_r5_add_rproc_core(struct device *cdev)
rproc_coredump_set_elf_info(r5_rproc, ELFCLASS32, EM_ARM);
- r5_rproc->recovery_disabled = true;
+ r5_rproc->recovery_disabled = false;
r5_rproc->has_iommu = false;
r5_rproc->auto_boot = false;
r5_core = r5_rproc->priv;
--
2.34.1