Re: [PATCH v2 4/4] EDAC: synopsys: Add useful debug and output information for 64bit systems

From: Robert Richter
Date: Wed Apr 01 2020 - 06:57:58 EST


On 01.04.20 15:39:09, Sherry Sun wrote:
> Now the synopsys_edac driver only support to output the 32-bit error
> data, but for 64 bit systems, such as i.MX8MP, 64 bit error data is
> needed. At the same time, when CE/UE happens, syndrome data is also
> useful to showed to user. So here add data_high and syndrome data for
> 64-bit systems.
>
> And in order to distinguish 64-bit systems and other systems, here
> adjust the position of the zynqmp_get_dtype(), so we can called
> this function to distinguish it. To ensure that functions of the same
> function are in the same position, here adjust the position of the
> zynq_get_dtype() too.
>
> Signed-off-by: Sherry Sun <sherry.sun@xxxxxxx>
> ---
> drivers/edac/synopsys_edac.c | 182 ++++++++++++++++++++---------------
> 1 file changed, 102 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
> index bf4202a24683..e8c3631ddff4 100644
> --- a/drivers/edac/synopsys_edac.c
> +++ b/drivers/edac/synopsys_edac.c
> @@ -278,18 +278,22 @@
> * @col: Column number.
> * @bank: Bank number.
> * @bitpos: Bit position.
> - * @data: Data causing the error.
> + * @data_low: Low bit data causing the error.
> + * @data_high: High bit data causing the error(used for 64 bit systems).
> * @bankgrpnr: Bank group number.
> * @blknr: Block number.
> + * @syndrome: Syndrome of the error.
> */
> struct ecc_error_info {
> u32 row;
> u32 col;
> u32 bank;
> u32 bitpos;
> - u32 data;
> + u32 data_low;
> + u32 data_high;

Where are 16, 32 and 64 bit widths. You could handle them all the same
in a u64.

If I am not wrong, the width is fix for the whole mci. So you could
create various .get_error_info() functions depending on the data
width without run time width checks.

> u32 bankgrpnr;
> u32 blknr;
> + u32 syndrome;
> };
>
> /**

> @@ -399,7 +467,7 @@ static int zynq_get_error_info(struct synps_edac_priv *priv)
> p->ueinfo.row = (regval & ADDR_ROW_MASK) >> ADDR_ROW_SHIFT;
> p->ueinfo.col = regval & ADDR_COL_MASK;
> p->ueinfo.bank = (regval & ADDR_BANK_MASK) >> ADDR_BANK_SHIFT;
> - p->ueinfo.data = readl(base + UE_DATA_31_0_OFST);
> + p->ueinfo.data_low = readl(base + UE_DATA_31_0_OFST);
> clearval |= ECC_CTRL_CLR_UE_ERR;
>
> out:
> @@ -443,10 +511,14 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
> p->ceinfo.bankgrpnr = (regval & ECC_CEADDR1_BNKGRP_MASK) >>
> ECC_CEADDR1_BNKGRP_SHIFT;
> p->ceinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
> - p->ceinfo.data = readl(base + ECC_CSYND0_OFST);
> - edac_dbg(2, "ECCCSYN0: 0x%08X ECCCSYN1: 0x%08X ECCCSYN2: 0x%08X\n",
> - readl(base + ECC_CSYND0_OFST), readl(base + ECC_CSYND1_OFST),
> - readl(base + ECC_CSYND2_OFST));
> + p->ceinfo.data_low = readl(base + ECC_CSYND0_OFST);
> + if (zynqmp_get_dtype(base) == DEV_X8) {
> + p->ceinfo.data_high = readl(base + ECC_CSYND1_OFST);
> + p->ceinfo.syndrome = readl(base + ECC_CSYND2_OFST);
> + edac_dbg(2, "CE data_low: 0x%08X data_high: 0x%08X syndrome: 0x%08X\n",
> + p->ceinfo.data_low, p->ceinfo.data_high,
> + p->ceinfo.syndrome);

You are loosing edac_dbg() here for the != DEV_X8 cases.

> + }
> ue_err:
> if (!p->ue_cnt)
> goto out;
> @@ -459,7 +531,14 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
> p->ueinfo.bank = (regval & ECC_CEADDR1_BNKNR_MASK) >>
> ECC_CEADDR1_BNKNR_SHIFT;
> p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
> - p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
> + p->ueinfo.data_low = readl(base + ECC_UESYND0_OFST);
> + if (zynqmp_get_dtype(base) == DEV_X8) {
> + p->ueinfo.data_high = readl(base + ECC_UESYND1_OFST);
> + p->ueinfo.syndrome = readl(base + ECC_UESYND2_OFST);
> + edac_dbg(2, "UE data_low: 0x%08X data_high: 0x%08X syndrome: 0x%08X\n",
> + p->ueinfo.data_low, p->ueinfo.data_high,
> + p->ueinfo.syndrome);

Similar here, no edac_dbg() for != DEV_X8.

> + }
> out:
> clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT;
> clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
> @@ -480,20 +559,27 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
> {
> struct synps_edac_priv *priv = mci->pvt_info;
> struct ecc_error_info *pinf;
> + int n;
>
> if (p->ce_cnt) {
> pinf = &p->ceinfo;
> if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
> - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
> - "DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
> - "CE", pinf->row, pinf->bank,
> - pinf->bankgrpnr, pinf->blknr,
> - pinf->bitpos, pinf->data);
> + n = snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
> + "DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
> + "CE", pinf->row, pinf->bank,
> + pinf->bankgrpnr, pinf->blknr,
> + pinf->bitpos, pinf->data_low);
> +
> + if (zynqmp_get_dtype(priv->baseaddr) == DEV_X8)

This is zynqmp specific, right? but you call it in the generic
function handle_error().

-Robert

> + snprintf(priv->message + n,
> + SYNPS_EDAC_MSG_SIZE - n,
> + " Data_high: 0x%08x Syndrome: 0x%08x",
> + pinf->data_high, pinf->syndrome);
> } else {
> snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
> "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
> "CE", pinf->row, pinf->bank, pinf->col,
> - pinf->bitpos, pinf->data);
> + pinf->bitpos, pinf->data_low);
> }
>
> edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,