Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
From: Borislav Petkov
Date: Thu Nov 19 2015 - 13:34:41 EST
On Tue, Oct 27, 2015 at 03:38:12PM -0500, dinguyen@xxxxxxxxxxxxxxxxxxxxx wrote:
> From: Thor Thayer <tthayer@xxxxxxxxxxxxxxxxxxxxx>
>
> Adding L2 Cache and On-Chip RAM EDAC support for the
> Altera SoCs using the EDAC device model. The SDRAM
> controller is using the Memory Controller model.
>
> Each type of ECC is individually configurable.
>
> The SDRAM ECC is a separate Kconfig option because:
> 1) the SDRAM preparation can take almost 2 seconds on boot and some
> customers need a faster boot time.
> 2) the SDRAM has an ECC initialization dependency on the preloader
> which is outside the kernel. It is desirable to be able to turn the
> SDRAM on & off separately.
>
> Signed-off-by: Thor Thayer <tthayer@xxxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Dinh Nguyen <dinguyen@xxxxxxxxxxxxxxxxxxxxx>
> ---
> v7: s/of_get_named_gen_pool/of_gen_pool_get
> Remove #ifdef for EDAC_DEBUG
> Use -ENODEV instead of EPROBE_DEFER
>
> v6: Convert to nested EDAC in device tree. Force L2 cache
> on for L2Cache ECC & remove L2 cache syscon for checking
> enable bit. Update year in header.
>
> v5: No Change
>
> v4: Change mask defines to use BIT().
> Fix comment style to agree with kernel coding style.
> Better printk description for read != write in trigger.
> Remove SysFS debugging message.
> Better dci->mod_name
> Move gen_pool pointer assignment to end of function.
> Invert logic to reduce indent in ocram depenency check.
> Change from dev_err() to edac_printk()
> Replace magic numbers with defines & comments.
> Improve error injection test.
> Change Makefile intermediary name to altr (from alt)
>
> v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c
> instead of separate files.
>
> v2: Fix L2 dependency comments.
> ---
> drivers/edac/Kconfig | 16 ++
> drivers/edac/Makefile | 5 +-
> drivers/edac/altera_edac.c | 488 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 507 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index ef25000..b80b4ad 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -376,6 +376,22 @@ config EDAC_ALTERA_MC
> preloader must initialize the SDRAM before loading
> the kernel.
>
> +config EDAC_ALTERA_L2C
> + bool "Altera L2 Cache EDAC"
> + depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
> + select CACHE_L2X0
> + help
> + Support for error detection and correction on the
> + Altera L2 cache Memory for Altera SoCs. This option
> + requires L2 cache so it will force that selection.
> +
> +config EDAC_ALTERA_OCRAM
> + bool "Altera On-Chip RAM EDAC"
> + depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA && SRAM && GENERIC_ALLOCATOR
> + help
> + Support for error detection and correction on the
> + Altera On-Chip RAM Memory for Altera SoCs.
> +
> config EDAC_SYNOPSYS
> tristate "Synopsys DDR Memory Controller"
> depends on EDAC_MM_EDAC && ARCH_ZYNQ
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index dbf53e0..8f1c6fc 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -67,6 +67,9 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
> obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
> obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
>
> -obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o
> +altr_edac-y := altera_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_MC) += altr_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_L2C) += altr_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_OCRAM) += altr_edac.o
What are those supposed to accomplish?
> obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
> obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
> index 9296409..154ac8c 100644
> --- a/drivers/edac/altera_edac.c
> +++ b/drivers/edac/altera_edac.c
> @@ -17,8 +17,10 @@
> * Adapted from the highbank_mc_edac driver.
> */
>
> +#include <asm/cacheflush.h>
> #include <linux/ctype.h>
> #include <linux/edac.h>
> +#include <linux/genalloc.h>
> #include <linux/interrupt.h>
> #include <linux/kernel.h>
> #include <linux/mfd/syscon.h>
> @@ -34,6 +36,7 @@
>
> #define EDAC_MOD_STR "altera_edac"
> #define EDAC_VERSION "1"
> +#define EDAC_DEVICE "ALTR_MEM"
Let's simply call it "Altera" - it is more human-friendly.
> static const struct altr_sdram_prv_data c5_data = {
> .ecc_ctrl_offset = CV_CTLCFG_OFST,
> @@ -75,6 +78,33 @@ static const struct altr_sdram_prv_data a10_data = {
> .ue_set_mask = A10_DIAGINT_TDERRA_MASK,
> };
>
> +/************************** EDAC Device Defines **************************/
> +
> +/* OCRAM ECC Management Group Defines */
> +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
> +#define ALTR_OCR_ECC_EN_MASK BIT(0)
> +#define ALTR_OCR_ECC_INJS_MASK BIT(1)
> +#define ALTR_OCR_ECC_INJD_MASK BIT(2)
> +#define ALTR_OCR_ECC_SERR_MASK BIT(3)
> +#define ALTR_OCR_ECC_DERR_MASK BIT(4)
> +
> +/* L2 ECC Management Group Defines */
> +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
> +#define ALTR_L2_ECC_EN_MASK BIT(0)
> +#define ALTR_L2_ECC_INJS_MASK BIT(1)
> +#define ALTR_L2_ECC_INJD_MASK BIT(2)
Single bit masks? You don't need to call them "_MASK" - simply remove
it.
> +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
> +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
> +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
> +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
> +
> +/*********************** EDAC Memory Controller Functions ****************/
> +
> +/* The SDRAM controller uses the EDAC Memory Controller framework. */
> +
> +#ifdef CONFIG_EDAC_ALTERA_MC
> +
> static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
> {
> struct mem_ctl_info *mci = dev_id;
> @@ -504,6 +534,462 @@ static struct platform_driver altr_sdram_edac_driver = {
>
> module_platform_driver(altr_sdram_edac_driver);
>
> +#endif /* #ifdef CONFIG_EDAC_ALTERA_MC */
#endif /* CONFIG_EDAC_ALTERA_MC */
is the usual syntax for those.
> +/************************* EDAC Parent Probe *************************/
> +
> +static const struct of_device_id altr_edac_device_of_match[];
> +
> +static const struct of_device_id altr_edac_of_match[] = {
> + { .compatible = "altr,edac" },
> + {},
> +};
> +MODULE_DEVICE_TABLE(of, altr_edac_of_match);
> +
> +static int altr_edac_probe(struct platform_device *pdev)
> +{
> + of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
> + NULL, &pdev->dev);
> + return 0;
> +}
> +
> +static struct platform_driver altr_edac_driver = {
> + .probe = altr_edac_probe,
> + .driver = {
> + .name = "altr_edac",
> + .of_match_table = altr_edac_of_match,
> + },
> +};
> +module_platform_driver(altr_edac_driver);
WARNING: DT compatible string "altr,edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#176: FILE: drivers/edac/altera_edac.c:543:
+ { .compatible = "altr,edac" },
WARNING: DT compatible string "altr,l2-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#326: FILE: drivers/edac/altera_edac.c:693:
+ { .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },
WARNING: DT compatible string "altr,ocram-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#329: FILE: drivers/edac/altera_edac.c:696:
+ { .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },
> +
> +/************************* EDAC Device Functions *************************/
> +
> +/*
> + * EDAC Device Functions (shared between various IPs).
> + * The discrete memories use the EDAC Device framework. The probe
> + * and error handling functions are very similar between memories
> + * so they are shared. The memory allocation and free for EDAC trigger
and freeing
> + * testing are different for each memory.
> + */
> +
> +const struct edac_device_prv_data ocramecc_data;
> +const struct edac_device_prv_data l2ecc_data;
> +
> +struct edac_device_prv_data {
> + int (*setup)(struct platform_device *pdev, void __iomem *base);
> + int ce_clear_mask;
> + int ue_clear_mask;
> + struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
> + void * (*alloc_mem)(size_t size, void **other);
> + void (*free_mem)(void *p, size_t size, void *other);
> + int ecc_enable_mask;
> + int ce_set_mask;
> + int ue_set_mask;
> + int trig_alloc_sz;
> +};
> +
> +struct altr_edac_device_dev {
> + void __iomem *base;
> + int sb_irq;
> + int db_irq;
> + const struct edac_device_prv_data *data;
> + char *edac_dev_name;
> +};
> +
> +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
> +{
> + struct edac_device_ctl_info *dci = dev_id;
> + struct altr_edac_device_dev *drvdata = dci->pvt_info;
> + const struct edac_device_prv_data *priv = drvdata->data;
> +
> + if (irq == drvdata->sb_irq) {
> + if (priv->ce_clear_mask)
> + writel(priv->ce_clear_mask, drvdata->base);
> + edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
> + }
> + if (irq == drvdata->db_irq) {
> + if (priv->ue_clear_mask)
> + writel(priv->ue_clear_mask, drvdata->base);
> + edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
> + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
> + }
> +
> + return IRQ_HANDLED;
> +}
> +
> +ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci,
> + const char *buffer, size_t count)
Is that an error injection function? If so, it should be behind
CONFIG_EDAC_DEBUG or an altera-specific Kconfig item which people can
enable - you don't want people to be able to inject errors on production
systems.
Also, those injection facilities should be in debugfs and not sysfs -
look at xgene_edac for an example.
I'll stop here - that's enough TODO for now :-)
Thanks.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/