[PATCH RESEND v2 12/18] EDAC/synopsys: Read data syndrome on errors

From: Serge Semin
Date: Sat Sep 10 2022 - 15:51:33 EST


In case of the corrected and uncorrected errors DW uMCTL2 DDR controller
preserves the ECC syndrome of the erroneous data pattern in the ECCCSYN2
and ECCUSYN2 CSRs [1]. Seeing the MCI core permits supplying the ECC
syndrome to the error reporting method, let's read it from the CSRs and
pass the value out to the core so to provide more details of the happened
error.

[1] DesignWare® Cores Enhanced Universal DDR Memory Controller (uMCTL2)
Databook, Version 3.91a, October 2020, p.826, p.837

Signed-off-by: Serge Semin <Sergey.Semin@xxxxxxxxxxxxxxxxxxxx>
---
drivers/edac/synopsys_edac.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 00417f368893..805ab7879000 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -302,6 +302,7 @@ struct snps_ddrc_info {
* @bankgrp: Bank group number.
* @bitpos: Bit position.
* @data: Data causing the error.
+ * @syndrome: Erroneous data syndrome.
*/
struct snps_ecc_error_info {
u32 row;
@@ -310,6 +311,7 @@ struct snps_ecc_error_info {
u32 bankgrp;
u32 bitpos;
u64 data;
+ u32 syndrome;
};

/**
@@ -421,9 +423,7 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
if (priv->info.dq_width == SNPS_DQ_64)
p->ceinfo.data |= (u64)readl(base + ECC_CSYND1_OFST) << 32;

- edac_dbg(2, "ECCCSYN0: 0x%08X ECCCSYN1: 0x%08X ECCCSYN2: 0x%08X\n",
- readl(base + ECC_CSYND0_OFST), readl(base + ECC_CSYND1_OFST),
- readl(base + ECC_CSYND2_OFST));
+ p->ceinfo.syndrome = readl(base + ECC_CSYND2_OFST);

ue_err:
if (!p->ue_cnt)
@@ -441,6 +441,8 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
if (priv->info.dq_width == SNPS_DQ_64)
p->ueinfo.data |= (u64)readl(base + ECC_UESYND1_OFST) << 32;

+ p->ueinfo.syndrome = readl(base + ECC_UESYND2_OFST);
+
out:
spin_lock_irqsave(&priv->lock, flags);

@@ -475,7 +477,7 @@ static void snps_handle_error(struct mem_ctl_info *mci, struct snps_ecc_status *
pinf->bitpos, pinf->data);

edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
- p->ce_cnt, 0, 0, 0, 0, 0, -1,
+ p->ce_cnt, 0, 0, pinf->syndrome, 0, 0, -1,
priv->message, "");
}

@@ -488,7 +490,7 @@ static void snps_handle_error(struct mem_ctl_info *mci, struct snps_ecc_status *
pinf->data);

edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
- p->ue_cnt, 0, 0, 0, 0, 0, -1,
+ p->ue_cnt, 0, 0, pinf->syndrome, 0, 0, -1,
priv->message, "");
}

--
2.37.2