Re: [PATCH v4 1/1] soc: fujitsu: Add A64FX diagnostic interrupt driver

From: Jiri Slaby
Date: Wed May 11 2022 - 02:48:03 EST


On 11. 05. 22, 8:21, Hitomi Hasegawa wrote:
Enable diagnostic interrupts for the Fujitsu A64FX.

Register the NMI/IRQ corresponding to the A64FX's device definition
dedicated to diagnostic interrupts, so that when this interrupt is
sent using the BMC, it causes a panic. This can be used to obtain
a kernel dump.

Signed-off-by: Hitomi Hasegawa <hasegawa-hitomi@xxxxxxxxxxx>

Hi,

I'm not sure why you cc linux-serial, but anyway, comments below :).

--- /dev/null
+++ b/drivers/soc/fujitsu/a64fx-diag.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * A64FX diag driver.
+ * Copyright (c) 2022 Fujitsu Ltd.
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define A64FX_DIAG_IRQ 1
+#define BMC_DIAG_INTERRUPT_STATUS_OFFSET (0x0044)
+#define BMC_DIAG_INTERRUPT_ENABLE_OFFSET (0x0040)
+#define BMC_DIAG_INTERRUPT_MASK BIT(31)
+
+struct a64fx_diag_priv {
+ int irq;
+ void __iomem *mmsc_reg_base;
+ bool has_nmi;

There are unnecessary holes in the struct. If you reorder it, you drop some alignment. Like: pointer, int, bool.

+};
+
+static irqreturn_t a64fx_diag_handler_nmi(int irq, void *dev_id)
+{
+ nmi_panic(NULL, "a64fx_diag: interrupt received\n");
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t a64fx_diag_handler_irq(int irq, void *dev_id)
+{
+ panic("a64fx_diag: interrupt received\n");
+
+ return IRQ_HANDLED;
+}
+
+static void a64fx_diag_interrupt_clear(struct a64fx_diag_priv *priv)
+{
+ u32 mmsc;
+ void __iomem *diag_status_reg_addr;

I'm not sure what soc/ maintainers prefer, but inverted xmas tree would look/read better.

+
+ diag_status_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_STATUS_OFFSET;
+ mmsc = readl(diag_status_reg_addr);
+ if (mmsc & BMC_DIAG_INTERRUPT_MASK)
+ writel(BMC_DIAG_INTERRUPT_MASK, diag_status_reg_addr);
+}
+
+static void a64fx_diag_interrupt_enable(struct a64fx_diag_priv *priv)
+{
+ u32 mmsc;
+ void __iomem *diag_enable_reg_addr;
+
+ diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
+ mmsc = readl(diag_enable_reg_addr);
+ if (!(mmsc & BMC_DIAG_INTERRUPT_MASK)) {
+ mmsc |= BMC_DIAG_INTERRUPT_MASK;
+ writel(mmsc, diag_enable_reg_addr);
+ }
+}
+
+static void a64fx_diag_interrupt_disable(struct a64fx_diag_priv *priv)
+{
+ u32 mmsc;
+ void __iomem *diag_enable_reg_addr;
+
+ diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
+ mmsc = readl(diag_enable_reg_addr);
+ if (mmsc & BMC_DIAG_INTERRUPT_MASK) {
+ mmsc &= ~BMC_DIAG_INTERRUPT_MASK;
+ writel(mmsc, diag_enable_reg_addr);
+ }
+}
+
+static int a64fx_diag_probe(struct platform_device *pdev)
+{
+ int ret;
+ unsigned long irq_flags;
+ struct device *dev = &pdev->dev;
+ struct a64fx_diag_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(struct a64fx_diag_priv), GFP_KERNEL);

Don't we prefer sizeof(*priv)?

+ if (priv == NULL)
+ return -ENOMEM;
+
+ priv->mmsc_reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->mmsc_reg_base))
+ return PTR_ERR(priv->mmsc_reg_base);
+
+ priv->irq = platform_get_irq(pdev, A64FX_DIAG_IRQ);
+ if (priv->irq < 0)
+ return priv->irq;
+
+ platform_set_drvdata(pdev, priv);
+
+ a64fx_diag_interrupt_clear(priv);
+ a64fx_diag_interrupt_enable(priv);
+
+ irq_flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_AUTOEN |
+ IRQF_NO_THREAD;
+ ret = request_nmi(priv->irq, &a64fx_diag_handler_nmi, irq_flags,
+ "a64fx_diag_nmi", NULL);
+ if (ret) {
+ ret = request_irq(priv->irq, &a64fx_diag_handler_irq,
+ irq_flags, "a64fx_diag_irq", NULL);
+ if (ret) {
+ dev_err(dev, "cannot register IRQ %d\n", ret);

No a64fx_diag_interrupt_disable()?

+ return ret;
+ }
+ enable_irq(priv->irq);

Hmm...

+ priv->has_nmi = false;

No need to set zeroed priv member to zero.

+ } else {
+ enable_nmi(priv->irq);

Provided the above, I don't immediatelly see, what's the purpose of IRQF_NO_AUTOEN then?

+ priv->has_nmi = true;
+ }
+
+ return 0;
+}
+
+static int __exit a64fx_diag_remove(struct platform_device *pdev)

Is __exit appropriate here at all -- I doubt that.

+{
+ struct a64fx_diag_priv *priv = platform_get_drvdata(pdev);
+
+ a64fx_diag_interrupt_disable(priv);
+ a64fx_diag_interrupt_clear(priv);
+
+ if (priv->has_nmi)
+ free_nmi(priv->irq, NULL);
+ else
+ free_irq(priv->irq, NULL);
+
+ return 0;
+}
+
+static const struct acpi_device_id a64fx_diag_acpi_match[] = {
+ { "FUJI2007", 0 },
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, a64fx_diag_acpi_match);
+
+
+static struct platform_driver a64fx_diag_driver = {
+ .driver = {
+ .name = "a64fx_diag_driver",
+ .acpi_match_table = ACPI_PTR(a64fx_diag_acpi_match),
+ },
+ .probe = a64fx_diag_probe,
+ .remove = a64fx_diag_remove,
+};
+
+module_platform_driver(a64fx_diag_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Hitomi Hasegawa <hasegawa-hitomi@xxxxxxxxxxx>");
+MODULE_DESCRIPTION("A64FX diag driver");


--
js
suse labs