On Wed, Jul 20, 2022 at 11:30:14AM +0530, Appana Durga Kedareswara rao wrote:To let user know about exact status of the TMR Subsystem(Manager) status
From: Appana Durga Kedareswara rao <appana.durga.rao@xxxxxxxxxx>
Triple Modular Redundancy(TMR) subsystem contains three microblaze cores,
subsystem is fault-tolerant and continues to operate nominally after
encountering an error. Together with the capability to detect and recover
from errors, the implementation ensures the reliability of the entire
subsystem. TMR Manager is responsible for performing recovery of the
subsystem detects the fault via a break signal it invokes microblaze
software break handler which calls the tmr manager driver api to
update the error count and status, added support for fault detection
feature via sysfs interface.
Usage:
To know the hardware status:
cat /sys/devices/platform/amba_pl/44a10000.tmr_manager/status
To know the break handler count(Error count):
cat /sys/devices/platform/amba_pl/44a10000.tmr_manager/errcnt
Signed-off-by: Appana Durga Kedareswara rao <appana.durga.kedareswara.rao@xxxxxxx>
Signed-off-by: Appana Durga Kedareswara rao <appana.durga.rao@xxxxxxxxxx>
---
Changes for v2:
--> Added Examples for sysfs entries
--> Removed uneeded struct dev from the driver private structure
--> Fixed style issues (Used resource_size_t instead of uintptr_t)
--> Updated driver to use sysfs_emit() API instead of sprintf() API
--> Added error checks wherever applicable.
--> Fixed sysfs registration.
.../testing/sysfs-driver-xilinx-tmr-manager | 27 ++
MAINTAINERS | 7 +
drivers/misc/Kconfig | 10 +
drivers/misc/Makefile | 1 +
drivers/misc/xilinx_tmr_manager.c | 253 ++++++++++++++++++
5 files changed, 298 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
create mode 100644 drivers/misc/xilinx_tmr_manager.c
diff --git a/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager b/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
new file mode 100644
index 000000000000..fc5fe7e22b09
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
@@ -0,0 +1,27 @@
+What: /sys/devices/platform/amba_pl/<dev>/status
+Date: June 2022
+Contact: appana.durga.rao@xxxxxxxxxx
+Description: This control file provides the status of the tmr manager
+ useful for getting the status of fault.
+ This file cannot be written.
+ Example:
+ # cat /sys/devices/platform/amba_pl/44a10000.tmr_manager/status
+ Lockstep mismatch between processor 1 and 2
+ Lockstep mismatch between processor 2 and 3
Why a whole long string?
And this should only be 1 line, not multiple lines. If it's multiple
lines, this is NOT ok for a sysfs file.
+
+What: /sys/devices/platform/amba_pl/<dev>/errcnt
+Date: June 2022
+Contact: appana.durga.rao@xxxxxxxxxx
+Description: This control file provides the fault detection count.
+ This file cannot be written.
+ Example:
+ # cat /sys/devices/platform/amba_pl/44a10000.tmr_manager/errcnt
+ 1
+
+What: /sys/devices/platform/amba_pl/<dev>/dis_block_break
+Date: June 2022
+Contact: appana.durga.rao@xxxxxxxxxx
+Description: This control file enables the break signal.
+ This file is write only.
+ Example:
+ # echo 1 > /sys/devices/platform/amba_pl/44a10000.tmr_manager/dis_block_break
diff --git a/MAINTAINERS b/MAINTAINERS
index 651616ed8ae2..732fd9ae7d9f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13080,6 +13080,13 @@ W: http://www.monstr.eu/fdt/
T: git git://git.monstr.eu/linux-2.6-microblaze.git
F: arch/microblaze/
+MICROBLAZE TMR MANAGER
+M: Appana Durga Kedareswara rao <appana.durga.kedareswara.rao@xxxxxxx>
+S: Supported
+F: Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
+F: Documentation/devicetree/bindings/misc/xlnx,tmr-manager.yaml
+F: drivers/misc/xilinx_tmr_manager.c
+
MICROCHIP AT91 DMA DRIVERS
M: Ludovic Desroches <ludovic.desroches@xxxxxxxxxxxxx>
M: Tudor Ambarus <tudor.ambarus@xxxxxxxxxxxxx>
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 41d2bb0ae23a..555ae2e33b91 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -483,6 +483,16 @@ config OPEN_DICE
If unsure, say N.
+config TMR_MANAGER
+ bool "Select TMR Manager"
+ depends on MICROBLAZE && MB_MANAGER
+ help
+ This option enables the driver developed for TMR Manager. The Triple
+ Modular Redundancy(TMR) manager provides support for fault detection
+ via sysfs interface.
+
+ Say N here unless you know what you are doing.
Not a module?
+
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 70e800e9127f..28b9803f909b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -60,3 +60,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o
obj-$(CONFIG_OPEN_DICE) += open-dice.o
+obj-$(CONFIG_TMR_MANAGER) += xilinx_tmr_manager.o
diff --git a/drivers/misc/xilinx_tmr_manager.c b/drivers/misc/xilinx_tmr_manager.c
new file mode 100644
index 000000000000..dbeca18c409f
--- /dev/null
+++ b/drivers/misc/xilinx_tmr_manager.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx TMR Subsystem.
+ *
+ * Copyright (C) 2022 Xilinx, Inc.
+ *
+ * Description:
+ * This driver is developed for TMR Manager,The Triple Modular Redundancy(TMR)
+ * Manager is responsible for handling the TMR subsystem state, including
+ * fault detection and error recovery. The core is triplicated in each of
+ * the sub-blocks in the TMR subsystem, and provides majority voting of
+ * its internal state provides soft error detection, correction and
+ * recovery. Error detection feature is provided through sysfs
+ * entries which allow the user to observer the TMR microblaze
+ * status.
+ */
+
+#include <asm/xilinx_mb_manager.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+/* TMR Manager Register offsets */
+#define XTMR_MANAGER_CR_OFFSET 0x0
+#define XTMR_MANAGER_FFR_OFFSET 0x4
+#define XTMR_MANAGER_CMR0_OFFSET 0x8
+#define XTMR_MANAGER_CMR1_OFFSET 0xC
+#define XTMR_MANAGER_BDIR_OFFSET 0x10
+#define XTMR_MANAGER_SEMIMR_OFFSET 0x1C
+
+/* Register Bitmasks/shifts */
+#define XTMR_MANAGER_CR_MAGIC1_MASK GENMASK(7, 0)
+#define XTMR_MANAGER_CR_MAGIC2_MASK GENMASK(15, 8)
+#define XTMR_MANAGER_CR_RIR_MASK BIT(16)
+#define XTMR_MANAGER_FFR_LM12_MASK BIT(0)
+#define XTMR_MANAGER_FFR_LM13_MASK BIT(1)
+#define XTMR_MANAGER_FFR_LM23_MASK BIT(2)
+
+#define XTMR_MANAGER_CR_MAGIC2_SHIFT 4
+#define XTMR_MANAGER_CR_RIR_SHIFT 16
+#define XTMR_MANAGER_CR_BB_SHIFT 18
+
+#define XTMR_MANAGER_MAGIC1_MAX_VAL 255
+
+/**
+ * struct xtmr_manager_dev - Driver data for TMR Manager
+ * @regs: device physical base address
+ * @cr_val: control register value
+ * @magic1: Magic 1 hardware configuration value
+ * @err_cnt: error statistics count
+ * @phys_baseaddr: Physical base address
+ */
+struct xtmr_manager_dev {
+ void __iomem *regs;
+ u32 cr_val;
+ u32 magic1;
+ u32 err_cnt;
+ resource_size_t phys_baseaddr;
+};
+
+/* IO accessors */
+static inline void xtmr_manager_write(struct xtmr_manager_dev *xtmr_manager,
+ u32 addr, u32 value)
+{
+ iowrite32(value, xtmr_manager->regs + addr);
+}
+
+static inline u32 xtmr_manager_read(struct xtmr_manager_dev *xtmr_manager,
+ u32 addr)
+{
+ return ioread32(xtmr_manager->regs + addr);
+}
+
+static void xmb_manager_reset_handler(struct xtmr_manager_dev *xtmr_manager)
+{
+ /* Clear the FFR Register contents as a part of recovery process. */
+ xtmr_manager_write(xtmr_manager, XTMR_MANAGER_FFR_OFFSET, 0);
+}
+
+static void xmb_manager_update_errcnt(struct xtmr_manager_dev *xtmr_manager)
+{
+ xtmr_manager->err_cnt++;
+}
+
+static ssize_t errcnt_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct xtmr_manager_dev *xtmr_manager = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%x\n", xtmr_manager->err_cnt);
+}
+static DEVICE_ATTR_RO(errcnt);
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct xtmr_manager_dev *xtmr_manager = dev_get_drvdata(dev);
+ size_t ffr;
+ int len = 0;
+
+ ffr = xtmr_manager_read(xtmr_manager, XTMR_MANAGER_FFR_OFFSET);
+ if ((ffr & XTMR_MANAGER_FFR_LM12_MASK) == XTMR_MANAGER_FFR_LM12_MASK) {
+ len += sysfs_emit_at(buf, len, "Lockstep mismatch between ");
+ len += sysfs_emit_at(buf, len, "processor 1 and 2\n");
You can write a full string all at once, no need to call this twice.
+ }
+
+ if ((ffr & XTMR_MANAGER_FFR_LM13_MASK) == XTMR_MANAGER_FFR_LM13_MASK) {
+ len += sysfs_emit_at(buf, len, "Lockstep mismatch between ");
+ len += sysfs_emit_at(buf, len, "processor 1 and 3\n");
+ }
+
+ if ((ffr & XTMR_MANAGER_FFR_LM23_MASK) == XTMR_MANAGER_FFR_LM23_MASK) {
+ len += sysfs_emit_at(buf, len, "Lockstep mismatch between ");
+ len += sysfs_emit_at(buf, len, "processor 2 and 3\n");
+ }
As said above, multiple lines is not ok, you need to fix up this api.
Perhaps 3 files, one for eacy type of mismatch and a simple 0/1 value
returned in them?
+
+ return len;
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t dis_block_break_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct xtmr_manager_dev *xtmr_manager = dev_get_drvdata(dev);
+ int ret;
+ long value;
+
+ ret = kstrtoul(buf, 16, &value);
+ if (ret)
+ return ret;
+
+ if (value > 1)
+ return -EINVAL;
Why is 1 magic?
And we have a sysfs function to read a 0/1/Y/N/y/n value, please use
that.
thanks,
greg k-h