[PATCH v9 4/6] edac: synopsys: Add EDAC ECC support for ZynqMP DDRC

From: Manish Narani
Date: Mon Oct 15 2018 - 01:30:33 EST


Add EDAC ECC support for ZynqMP DDRC IP. The IP supports interrupts for
corrected and uncorrected errors. Add interrupt handlers for the same.

Signed-off-by: Manish Narani <manish.narani@xxxxxxxxxx>
---
drivers/edac/Kconfig | 2 +-
drivers/edac/synopsys_edac.c | 324 ++++++++++++++++++++++++++++++++++++++++---
2 files changed, 308 insertions(+), 18 deletions(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2..7c40eb2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -441,7 +441,7 @@ config EDAC_ALTERA_SDMMC

config EDAC_SYNOPSYS
tristate "Synopsys DDR Memory Controller"
- depends on ARCH_ZYNQ
+ depends on ARCH_ZYNQ || ARCH_ZYNQMP
help
Support for error detection and correction on the Synopsys DDR
memory controller.
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index d1999e0..603c4bd 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -22,6 +22,7 @@
#include <linux/edac.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/interrupt.h>
#include <linux/of.h>
#include <linux/of_device.h>

@@ -272,6 +273,8 @@
* @bank: Bank number.
* @bitpos: Bit position.
* @data: Data causing the error.
+ * @bankgrpnr: Bank group number.
+ * @blknr: Block number.
*/
struct ecc_error_info {
u32 row;
@@ -279,6 +282,8 @@ struct ecc_error_info {
u32 bank;
u32 bitpos;
u32 data;
+ u32 bankgrpnr;
+ u32 blknr;
};

/**
@@ -385,6 +390,66 @@ static int zynq_get_error_info(struct synps_edac_priv *priv)
}

/**
+ * zynqmp_get_error_info - Get the current ECC error info.
+ * @priv: DDR memory controller private instance data.
+ *
+ * Return: one if there is no error otherwise returns zero.
+ */
+static int zynqmp_get_error_info(struct synps_edac_priv *priv)
+{
+ struct synps_ecc_status *p;
+ u32 regval, clearval = 0;
+ void __iomem *base;
+
+ base = priv->baseaddr;
+ p = &priv->stat;
+
+ regval = readl(base + ECC_STAT_OFST);
+ if (!regval)
+ return 1;
+
+ p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
+ p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
+ p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
+
+ regval = readl(base + ECC_CEADDR0_OFST);
+ if (!p->ce_cnt)
+ goto ue_err;
+
+ p->ceinfo.row = (regval & ECC_CEADDR0_RW_MASK);
+ regval = readl(base + ECC_CEADDR1_OFST);
+ p->ceinfo.bank = (regval & ECC_CEADDR1_BNKNR_MASK) >>
+ ECC_CEADDR1_BNKNR_SHIFT;
+ p->ceinfo.bankgrpnr = (regval & ECC_CEADDR1_BNKGRP_MASK) >>
+ ECC_CEADDR1_BNKGRP_SHIFT;
+ p->ceinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
+ p->ceinfo.data = readl(base + ECC_CSYND0_OFST);
+ edac_dbg(2, "ECCCSYN0: 0x%08X ECCCSYN1: 0x%08X ECCCSYN2: 0x%08X\n",
+ readl(base + ECC_CSYND0_OFST), readl(base + ECC_CSYND1_OFST),
+ readl(base + ECC_CSYND2_OFST));
+ue_err:
+ regval = readl(base + ECC_UEADDR0_OFST);
+ if (!p->ue_cnt)
+ goto out;
+
+ p->ueinfo.row = (regval & ECC_CEADDR0_RW_MASK);
+ regval = readl(base + ECC_UEADDR1_OFST);
+ p->ueinfo.bankgrpnr = (regval & ECC_CEADDR1_BNKGRP_MASK) >>
+ ECC_CEADDR1_BNKGRP_SHIFT;
+ p->ueinfo.bank = (regval & ECC_CEADDR1_BNKNR_MASK) >>
+ ECC_CEADDR1_BNKNR_SHIFT;
+ p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
+ p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
+out:
+ clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT;
+ clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
+ writel(clearval, base + ECC_CLR_OFST);
+ writel(0x0, base + ECC_CLR_OFST);
+
+ return 0;
+}
+
+/**
* handle_error - Handle Correctable and Uncorrectable errors.
* @mci: EDAC memory controller instance.
* @p: Synopsys ECC status structure.
@@ -398,9 +463,25 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)

if (p->ce_cnt) {
pinf = &p->ceinfo;
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type :%s Row %d Bank %d Col %d ",
- "CE", pinf->row, pinf->bank, pinf->col);
+ if (!priv->p_data->quirks) {
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "DDR ECC error type:%s Row %d Bank %d Col %d ",
+ "CE", pinf->row, pinf->bank, pinf->col);
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "Bit Position: %d Data: 0x%08x\n",
+ pinf->bitpos, pinf->data);
+ } else {
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "DDR ECC error type:%s Row %d Bank %d Col %d ",
+ "CE", pinf->row, pinf->bank, pinf->col);
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "BankGroup Number %d Block Number %d ",
+ pinf->bankgrpnr, pinf->blknr);
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "Bit Position: %d Data: 0x%08x\n",
+ pinf->bitpos, pinf->data);
+ }
+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
p->ce_cnt, 0, 0, 0, 0, 0, -1,
priv->message, "");
@@ -408,9 +489,19 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)

if (p->ue_cnt) {
pinf = &p->ueinfo;
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type :%s Row %d Bank %d Col %d ",
- "UE", pinf->row, pinf->bank, pinf->col);
+ if (!priv->p_data->quirks) {
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "DDR ECC error type :%s Row %d Bank %d Col %d ",
+ "UE", pinf->row, pinf->bank, pinf->col);
+ } else {
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "DDR ECC error type :%s Row %d Bank %d Col %d ",
+ "UE", pinf->row, pinf->bank, pinf->col);
+ snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
+ "BankGroup Number %d Block Number %d",
+ pinf->bankgrpnr, pinf->blknr);
+ }
+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
p->ue_cnt, 0, 0, 0, 0, 0, -1,
priv->message, "");
@@ -420,6 +511,42 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
}

/**
+ * intr_handler - Interrupt Handler for ECC interrupts.
+ * @irq: IRQ number.
+ * @dev_id: Device ID.
+ *
+ * Return: IRQ_NONE, if interrupt not set or IRQ_HANDLED otherwise.
+ */
+static irqreturn_t intr_handler(int irq, void *dev_id)
+{
+ const struct synps_platform_data *p_data;
+ struct mem_ctl_info *mci = dev_id;
+ struct synps_edac_priv *priv;
+ int status, regval;
+
+ priv = mci->pvt_info;
+ p_data = priv->p_data;
+
+ regval = readl(priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+ regval &= (DDR_QOSCE_MASK | DDR_QOSUE_MASK);
+ if (!(regval & ECC_CE_UE_INTR_MASK))
+ return IRQ_NONE;
+
+ status = p_data->get_error_info(priv);
+ if (status)
+ return IRQ_NONE;
+
+ priv->ce_cnt += priv->stat.ce_cnt;
+ priv->ue_cnt += priv->stat.ue_cnt;
+ handle_error(mci, &priv->stat);
+
+ edac_dbg(3, "Total error count CE %d UE %d\n",
+ priv->ce_cnt, priv->ue_cnt);
+ writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+ return IRQ_HANDLED;
+}
+
+/**
* check_errors - Check controller for ECC errors.
* @mci: EDAC memory controller instance.
*
@@ -427,10 +554,13 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
*/
static void check_errors(struct mem_ctl_info *mci)
{
- struct synps_edac_priv *priv = mci->pvt_info;
- const struct synps_platform_data *p_data = priv->p_data;
+ const struct synps_platform_data *p_data;
+ struct synps_edac_priv *priv;
int status;

+ priv = mci->pvt_info;
+ p_data = priv->p_data;
+
status = p_data->get_error_info(priv);
if (status)
return;
@@ -475,6 +605,39 @@ static enum dev_type zynq_get_dtype(const void __iomem *base)
}

/**
+ * zynqmp_get_dtype - Return the controller memory width.
+ * @base: DDR memory controller base address.
+ *
+ * Get the EDAC device type width appropriate for the current controller
+ * configuration.
+ *
+ * Return: a device type width enumeration.
+ */
+static enum dev_type zynqmp_get_dtype(const void __iomem *base)
+{
+ enum dev_type dt;
+ u32 width;
+
+ width = readl(base + CTRL_OFST);
+ width = (width & ECC_CTRL_BUSWIDTH_MASK) >> ECC_CTRL_BUSWIDTH_SHIFT;
+ switch (width) {
+ case DDRCTL_EWDTH_16:
+ dt = DEV_X2;
+ break;
+ case DDRCTL_EWDTH_32:
+ dt = DEV_X4;
+ break;
+ case DDRCTL_EWDTH_64:
+ dt = DEV_X8;
+ break;
+ default:
+ dt = DEV_UNKNOWN;
+ }
+
+ return dt;
+}
+
+/**
* zynq_get_ecc_state - Return the controller ECC enable/disable status.
* @base: DDR memory controller base address.
*
@@ -484,19 +647,43 @@ static enum dev_type zynq_get_dtype(const void __iomem *base)
*/
static bool zynq_get_ecc_state(void __iomem *base)
{
- bool state = false;
enum dev_type dt;
u32 ecctype;

dt = zynq_get_dtype(base);
if (dt == DEV_UNKNOWN)
- return state;
+ return false;

ecctype = readl(base + SCRUB_OFST) & SCRUB_MODE_MASK;
if ((ecctype == SCRUB_MODE_SECDED) && (dt == DEV_X2))
- state = true;
+ return true;

- return state;
+ return false;
+}
+
+/**
+ * zynqmp_get_ecc_state - Return the controller ECC enable/disable status.
+ * @base: DDR memory controller base address.
+ *
+ * Get the ECC enable/disable status for the controller.
+ *
+ * Return: a ECC status boolean i.e true/false - enabled/disabled.
+ */
+static bool zynqmp_get_ecc_state(void __iomem *base)
+{
+ enum dev_type dt;
+ u32 ecctype;
+
+ dt = zynqmp_get_dtype(base);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(base + ECC_CFG0_OFST) & SCRUB_MODE_MASK;
+ if ((ecctype == SCRUB_MODE_SECDED) &&
+ ((dt == DEV_X2) || (dt == DEV_X4) || (dt == DEV_X8)))
+ return true;
+
+ return false;
}

/**
@@ -538,6 +725,34 @@ static enum mem_type zynq_get_mtype(const void __iomem *base)
}

/**
+ * zynqmp_get_mtype - Returns controller memory type.
+ * @base: Synopsys ECC status structure.
+ *
+ * Get the EDAC memory type appropriate for the current controller
+ * configuration.
+ *
+ * Return: a memory type enumeration.
+ */
+static enum mem_type zynqmp_get_mtype(const void __iomem *base)
+{
+ enum mem_type mt;
+ u32 memtype;
+
+ memtype = readl(base + CTRL_OFST);
+
+ if ((memtype & MEM_TYPE_DDR3) || (memtype & MEM_TYPE_LPDDR3))
+ mt = MEM_DDR3;
+ else if (memtype & MEM_TYPE_DDR2)
+ mt = MEM_RDDR2;
+ else if ((memtype & MEM_TYPE_LPDDR4) || (memtype & MEM_TYPE_DDR4))
+ mt = MEM_DDR4;
+ else
+ mt = MEM_EMPTY;
+
+ return mt;
+}
+
+/**
* init_csrows - Initialize the csrow data.
* @mci: EDAC memory controller instance.
*
@@ -598,13 +813,57 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev)
mci->dev_name = SYNPS_EDAC_MOD_STRING;
mci->mod_name = SYNPS_EDAC_MOD_VER;

- edac_op_state = EDAC_OPSTATE_POLL;
- mci->edac_check = check_errors;
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
+ edac_op_state = EDAC_OPSTATE_INT;
+ } else {
+ edac_op_state = EDAC_OPSTATE_POLL;
+ mci->edac_check = check_errors;
+ }
+
mci->ctl_page_to_phys = NULL;

init_csrows(mci);
}

+static void enable_intr(struct synps_edac_priv *priv)
+{
+ /* Enable UE/CE Interrupts */
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+}
+
+static void disable_intr(struct synps_edac_priv *priv)
+{
+ /* Disable UE/CE Interrupts */
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+}
+
+static int setup_irq(struct mem_ctl_info *mci,
+ struct platform_device *pdev)
+{
+ struct synps_edac_priv *priv = mci->pvt_info;
+ int ret, irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "No IRQ %d in DT\n", irq);
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, intr_handler,
+ 0, dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MC, "Failed to request IRQ\n");
+ return ret;
+ }
+
+ enable_intr(priv);
+
+ return 0;
+}
+
static const struct synps_platform_data zynq_edac_def = {
.get_error_info = zynq_get_error_info,
.get_mtype = zynq_get_mtype,
@@ -613,9 +872,26 @@ static const struct synps_platform_data zynq_edac_def = {
.quirks = 0,
};

+static const struct synps_platform_data zynqmp_edac_def = {
+ .get_error_info = zynqmp_get_error_info,
+ .get_mtype = zynqmp_get_mtype,
+ .get_dtype = zynqmp_get_dtype,
+ .get_ecc_state = zynqmp_get_ecc_state,
+ .quirks = DDR_ECC_INTR_SUPPORT,
+};
+
static const struct of_device_id synps_edac_match[] = {
- { .compatible = "xlnx,zynq-ddrc-a05", .data = (void *)&zynq_edac_def },
- { /* end of table */ }
+ {
+ .compatible = "xlnx,zynq-ddrc-a05",
+ .data = (void *)&zynq_edac_def
+ },
+ {
+ .compatible = "xlnx,zynqmp-ddrc-2.40a",
+ .data = (void *)&zynqmp_edac_def
+ },
+ {
+ /* end of table */
+ }
};

MODULE_DEVICE_TABLE(of, synps_edac_match);
@@ -674,6 +950,12 @@ static int mc_probe(struct platform_device *pdev)

mc_init(mci, pdev);

+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
+ rc = setup_irq(mci, pdev);
+ if (rc)
+ goto free_edac_mc;
+ }
+
rc = edac_mc_add_mc(mci);
if (rc) {
edac_printk(KERN_ERR, EDAC_MC,
@@ -685,7 +967,9 @@ static int mc_probe(struct platform_device *pdev)
* Start capturing the correctable and uncorrectable errors. A write of
* 0 starts the counters.
*/
- writel(0x0, baseaddr + ECC_CTRL_OFST);
+ if (!(priv->p_data->quirks & DDR_ECC_INTR_SUPPORT))
+ writel(0x0, baseaddr + ECC_CTRL_OFST);
+
return rc;

free_edac_mc:
@@ -703,6 +987,12 @@ static int mc_probe(struct platform_device *pdev)
static int mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+ struct synps_edac_priv *priv;
+
+ priv = mci->pvt_info;
+
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT)
+ disable_intr(priv);

edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
--
2.1.1