Re: [v2,1/4] watchdog: at91sam9_wdt: better watchdog support

From: boris brezillon
Date: Wed Oct 02 2013 - 15:27:28 EST


Hello Guenter,

Thanks for reviewing this patch.

On 02/10/2013 18:12, Guenter Roeck wrote:
On Fri, Jun 21, 2013 at 03:21:28PM -0000, Boris BREZILLON wrote:
The at91sam9 watchdog timer can only be configured once, and the current
implementation tries to configure it in a static way:
- 2 seconds timeout
- wdt restart every 500ms

If the timer has already been configured with different values, it returns an
error and do not create any watchdog device.

This is not critical if the watchdog is disabled, but if it has been enabled with
different timeout values it will lead to a SoC reset.

This patch series tries to address this issue by adapting the heartbeat value
according the WDT timer config:
- it first tries to configure the timer as requested.
- if it fails it fallbacks to the current config, adapting its heartbeat timer
to the needs

This patch series also move to a dynamically allocated at91wdt device instead
of the static instance.

It adds a new at91 wdt type: software. This new type make use of the at91 wdt
interrupt to trigger a software reboot.

Finally it adds several properties to the device tree bindings.

Signed-off-by: Boris BREZILLON <b.brezillon@xxxxxxxxxxx>

---
drivers/watchdog/at91sam9_wdt.c | 319 +++++++++++++++++++++++++++++----------
1 file changed, 236 insertions(+), 83 deletions(-)

diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index be37dde..fb47ec5 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -19,11 +19,13 @@
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/platform_device.h>
+#include <linux/reboot.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <linux/jiffies.h>
@@ -31,23 +33,32 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include "at91sam9_wdt.h"
#define DRV_NAME "AT91SAM9 Watchdog"
-#define wdt_read(field) \
- __raw_readl(at91wdt_private.base + field)
-#define wdt_write(field, val) \
- __raw_writel((val), at91wdt_private.base + field)
+#define wdt_read(wdt, field) \
+ __raw_readl((wdt)->base + field)
+#define wdt_write(wtd, field, val) \
+ __raw_writel((val), (wdt)->base + field)
/* AT91SAM9 watchdog runs a 12bit counter @ 256Hz,
* use this to convert a watchdog
* value from/to milliseconds.
*/
+#define hz_to_ticks(h) (((h << 8) / HZ) - 1)
Unused.

I'll drop it.


+#define ticks_to_hz(t) (((t + 1) * HZ) >> 8)
#define ms_to_ticks(t) (((t << 8) / 1000) - 1)
#define ticks_to_ms(t) (((t + 1) * 1000) >> 8)
You should put macro parameters in () to avoid undesired side effects.

(h), (t), (field)

Sure, some of them are old. No reason not to fix it, though.

I'll fix it (I'll fix old macros in a separate patch)


+#define WDT_MR_RESET 0x3FFF2FFF
+
+#define WDT_WDD_MAX 0xFFF
+#define WDT_WDV_MAX 0xFFF
What are the units ?
ticks (slow clk cycles).
I'll rename these macros:
#define WDT_XXX_MAX_TICKS 0xFFF

+
/* Hardware timeout in seconds */
#define WDT_HW_TIMEOUT 2
@@ -66,23 +77,41 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-static struct watchdog_device at91_wdt_dev;
-static void at91_ping(unsigned long data);
-
-static struct {
+#define to_wdt(wdd) container_of(wdd, struct at91wdt, wdd)
+struct at91wdt {
+ struct watchdog_device wdd;
void __iomem *base;
unsigned long next_heartbeat; /* the next_heartbeat for the timer */
struct timer_list timer; /* The timer that pings the watchdog */
-} at91wdt_private;
+ u32 mr;
+ u32 mr_mask;
+ unsigned long heartbeat; /* WDT heartbeat in jiffies */
+ bool nowayout;
+ unsigned int irq;
+};
/* ......................................................................... */
+static irqreturn_t wdt_interrupt(int irq, void *dev_id)
+{
+ struct at91wdt *wdt = (struct at91wdt *)dev_id;
+
+ if (wdt_read(wdt, AT91_WDT_SR)) {
+ /* Reboot */
That is quite obvious, isn't it ?
I'll drop the comment line.

+ pr_crit("at91sam9 WDT software reset\n");
+ emergency_restart();
+ pr_crit("Reboot didn't ?????\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
/*
* Reload the watchdog timer. (ie, pat the watchdog)
*/
-static inline void at91_wdt_reset(void)
+static inline void at91_wdt_reset(struct at91wdt *wdt)
{
- wdt_write(AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
+ wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
}
/*
@@ -90,26 +119,20 @@ static inline void at91_wdt_reset(void)
*/
static void at91_ping(unsigned long data)
{
- if (time_before(jiffies, at91wdt_private.next_heartbeat) ||
- (!watchdog_active(&at91_wdt_dev))) {
- at91_wdt_reset();
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ struct at91wdt *wdt = (struct at91wdt *)data;
+ if (time_before(jiffies, wdt->next_heartbeat) ||
+ (!watchdog_active(&wdt->wdd))) {

Unnecessary () around !watchdog_active()

[ Sure, old code. Not a reason not to clean it up ]
Okay, I'll remove all unnecessary "()" (even the old ones).

+ at91_wdt_reset(wdt);
+ mod_timer(&wdt->timer, jiffies + wdt->heartbeat);
} else
pr_crit("I will reset your machine !\n");
}
static int at91_wdt_ping(struct watchdog_device *wdd)
{
+ struct at91wdt *wdt = to_wdt(wdd);
/* calculate when the next userspace timeout will be */
- at91wdt_private.next_heartbeat = jiffies + wdd->timeout * HZ;
- return 0;
-}
-
-static int at91_wdt_start(struct watchdog_device *wdd)
-{
- /* calculate the next userspace timeout and modify the timer */
- at91_wdt_ping(wdd);
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ wdt->next_heartbeat = jiffies + wdd->timeout * HZ;
return 0;
}
@@ -125,36 +148,82 @@ static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_ti
return 0;
}
-/*
- * Set the watchdog time interval in 1/256Hz (write-once)
- * Counter is 12 bit.
- */
-static int at91_wdt_settimeout(unsigned int timeout)
+static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt)
{
- unsigned int reg;
- unsigned int mr;
-
- /* Check if disabled */
- mr = wdt_read(AT91_WDT_MR);
- if (mr & AT91_WDT_WDDIS) {
- pr_err("sorry, watchdog is disabled\n");
- return -EIO;
+ u32 tmp;
+ u32 delta;
+ u32 value;
+ int err;
+ u32 mask = wdt->mr_mask;
+
+ tmp = wdt_read(wdt, AT91_WDT_MR);
+ if ((tmp & mask) != (wdt->mr & mask)) {
+ if (tmp == WDT_MR_RESET) {
+ wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+ tmp = wdt_read(wdt, AT91_WDT_MR);
+ }
+ }
+
+ if (tmp & AT91_WDT_WDDIS) {
+ if (wdt->mr & AT91_WDT_WDDIS)
+ return 0;
+ dev_err(wdt->wdd.parent,
+ ": sorry, watchdog is disabled\n");
No need to be sorry all the time. The ':' is unnecessary.

Sure, I'll remove the ":" and rework old and new messages (remove the "sorry" word).

+ return -EINVAL;
+ }
+
+ value = tmp & AT91_WDT_WDV;
+ delta = (tmp & AT91_WDT_WDD) >> 16;
+
+ if (delta < value) {
+ if ((value / (value - delta)) < 4) {
Unnecessary ( ) around left side of equation.

+ dev_err(wdt->wdd.parent,
+ ": sorry, max-heartbeat should be at least 4 times min-heartbeat (max-heartbeat = %d x min-heartbeat)\n",
+ (value / (value - delta)));
Unnecessary ( ) around the equation.

+ return -EINVAL;
+ }
+ }
+
+ wdt->heartbeat = ticks_to_hz(value / 4);
+
+ if (!wdt->heartbeat) {
+ dev_err(wdt->wdd.parent,
+ ": sorry, linux timer (%i Hz) cannot handle watchdog timeout (%i ms)\n",
+ HZ, ticks_to_ms(value));
+ return -EINVAL;
Isn't that a bit rude ? Why not set it to the minimum ?
We're using a normal timer (not an hrtimer), and IIRC normal timers' precision depends on the
configured HZ option. As a result the minimum heartbeat value may trigger the timer function
after the hardware watchdog counter expires.

An example where this problem might occur:

HZ = 10 <=> normal timer precision = 100 ms
min hearbeat = 180 ms
max hearbeat = 200 ms

The div by 4 operation is here to take a safety margin by refreshing the hardware counter at least
4 times more often than needed.

I'll think of something more flexible...
If you have some ideas, please feel free to share it.

+ }
+
+ if ((tmp & AT91_WDT_WDFIEN) && wdt->irq) {
+ err = request_irq(wdt->irq, wdt_interrupt,
+ IRQF_SHARED | IRQF_IRQPOLL,
+ pdev->name, wdt);
+ if (err)
+ return err;
}
- /*
- * All counting occurs at SLOW_CLOCK / 128 = 256 Hz
- *
- * Since WDV is a 12-bit counter, the maximum period is
- * 4096 / 256 = 16 seconds.
- */
- reg = AT91_WDT_WDRSTEN /* causes watchdog reset */
- /* | AT91_WDT_WDRPROC causes processor reset only */
- | AT91_WDT_WDDBGHLT /* disabled in debug mode */
- | AT91_WDT_WDD /* restart at any time */
- | (timeout & AT91_WDT_WDV); /* timer value */
- wdt_write(AT91_WDT_MR, reg);
+ if ((tmp & wdt->mr_mask) != (wdt->mr & wdt->mr_mask))
+ dev_warn(wdt->wdd.parent,
+ ": watchdog already configured differently (mr = %x expecting %x)\n",
+ tmp & wdt->mr_mask, wdt->mr & wdt->mr_mask);
+
+ setup_timer(&wdt->timer, at91_ping, (unsigned long)wdt);
+ mod_timer(&wdt->timer, jiffies + wdt->heartbeat);
+
+ /* Try to set timeout from device tree first */
+ if (watchdog_init_timeout(&wdt->wdd, 0, &pdev->dev))
+ watchdog_init_timeout(&wdt->wdd, heartbeat, &pdev->dev);
+ watchdog_set_nowayout(&wdt->wdd, wdt->nowayout);
+ err = watchdog_register_device(&wdt->wdd);
+ if (err)
+ goto out_stop_timer;
+
+ wdt->next_heartbeat = jiffies + wdt->wdd.timeout * HZ;
return 0;
+
+out_stop_timer:
+ del_timer(&wdt->timer);
+ return err;
}
/* ......................................................................... */
@@ -167,63 +236,147 @@ static const struct watchdog_info at91_wdt_info = {
static const struct watchdog_ops at91_wdt_ops = {
.owner = THIS_MODULE,
- .start = at91_wdt_start,
+ .start = at91_wdt_ping,
.stop = at91_wdt_stop,
.ping = at91_wdt_ping,
.set_timeout = at91_wdt_set_timeout,
};
-static struct watchdog_device at91_wdt_dev = {
- .info = &at91_wdt_info,
- .ops = &at91_wdt_ops,
- .timeout = WDT_HEARTBEAT,
- .min_timeout = 1,
- .max_timeout = 0xFFFF,
-};
+#if defined(CONFIG_OF)
+static int of_at91wdt_init(struct device_node *np, struct at91wdt *wdt)
+{
+ u32 min = 0;
+ u32 max = 0xFFF;
Are those default values ? If so might use defines (there are actually defines
for 0xfff already).
I'll replace the 0xfff value by the XX_MAX macro.
Should I create a new XX_MIN macro ?
+ const char *tmp;
+
+ wdt->base = of_iomap(np, 0);
I am a bit at loss why you need to use use this function here.

I think it would be more beneficial to use devm_ioremap() or even better
devm_ioremap_resource() in both of and non-dt cases; then you don't have
to call iounmap().

Sure. This was one of my first driver using the devm and of_iomap functions, and now I realize
I've been misusing them.

I'll fix this too.

+ if (!wdt->base) {
+ dev_err(wdt->wdd.parent, "failed to map registers, aborting.\n");
+ return -EINVAL;
+ }
+
+ /* Get the interrupts property */
+ wdt->irq = irq_of_parse_and_map(np, 0);
+ if (!wdt->irq)
+ dev_warn(wdt->wdd.parent, "failed to get IRQ from DT.\n");
+
+ if (!of_property_read_u32_index(np, "atmel,max-heartbeat-sec", 0,
+ &max)) {
+ max *= 256;
+ if (max > WDT_WDV_MAX)
+ max = WDT_WDV_MAX;
+
+ if (!of_property_read_u32_index(np, "atmel,min-heartbeat-sec",
+ 0, &min)) {
+ min *= 256;
+ if (4 * min > max)
+ min = max / 4;
+ }
+ }
I am a bit confused about those parameters. How do they match min_timeout and
max_timeout ?

AT91 wdt block does not provide a reconfigurable timeout.
To bypass this limitation the driver periodically reset the hw watchdog (based on the previous config,
the device tree config or the static config) using a timer.

The ping callback (used by watchdog core) only compute the next_heartbeat (expressed in system ticks)
based on the current jiffies value (number of ticks elapsed since system startup) and the configured
timeout value (configured using the set_timeout callback).

To summarize:
- min/max heartbeat timeout represent the hw timeout properties (those directly linked to WDD WDV
fields).
- min/max timeout (in watchdog_device struct) represent the software timeout properties (those
exposed to the user space API).

I could rework the driver to directly use the hw properties and remove the set_timeout functionality,
but this would break the user space API.

+
+ wdt->mr_mask = 0x3FFFFFFF;
+ wdt->mr = 0;
+ if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) &&
+ !strcmp(tmp, "software")) {
+ wdt->mr |= AT91_WDT_WDFIEN;
+ wdt->mr_mask &= ~AT91_WDT_WDRPROC;
+ } else
+ wdt->mr |= AT91_WDT_WDRSTEN;
+
+ if (!of_property_read_string(np, "atmel,reset-type", &tmp) &&
+ !strcmp(tmp, "proc"))
+ wdt->mr |= AT91_WDT_WDRPROC;
+
+ if (of_property_read_bool(np, "atmel,disable")) {
+ wdt->mr |= AT91_WDT_WDDIS;
+ wdt->mr_mask &= AT91_WDT_WDDIS;
+ }
+
+ if (of_property_read_bool(np, "atmel,idle-halt"))
+ wdt->mr |= AT91_WDT_WDIDLEHLT;
+
+ if (of_property_read_bool(np, "atmel,dbg-halt"))
+ wdt->mr |= AT91_WDT_WDDBGHLT;
+
+ wdt->mr |= max | ((max - min) << 16);
+
+ return 0;
+}
+#else
+static inline int of_at91wdt_init(struct device_node *np, struct at91wdt *wdt)
+{
+ return 0;
+}
+#endif
static int __init at91wdt_probe(struct platform_device *pdev)
{
struct resource *r;
- int res;
-
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r)
- return -ENODEV;
- at91wdt_private.base = ioremap(r->start, resource_size(r));
- if (!at91wdt_private.base) {
- dev_err(&pdev->dev, "failed to map registers, aborting.\n");
- return -ENOMEM;
- }
+ int err;
+ struct at91wdt *wdt;
- at91_wdt_dev.parent = &pdev->dev;
- watchdog_init_timeout(&at91_wdt_dev, heartbeat, &pdev->dev);
- watchdog_set_nowayout(&at91_wdt_dev, nowayout);
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
- /* Set watchdog */
- res = at91_wdt_settimeout(ms_to_ticks(WDT_HW_TIMEOUT * 1000));
- if (res)
- return res;
+ wdt->mr = (WDT_HW_TIMEOUT * 256) | AT91_WDT_WDRSTEN | AT91_WDT_WDD |
+ AT91_WDT_WDDBGHLT | AT91_WDT_WDIDLEHLT;
+ wdt->mr_mask = 0x3FFFFFFF;
+ wdt->nowayout = nowayout;
+ wdt->wdd.parent = &pdev->dev;
+ wdt->wdd.info = &at91_wdt_info;
+ wdt->wdd.ops = &at91_wdt_ops;
+ wdt->wdd.timeout = WDT_HEARTBEAT;
+ wdt->wdd.min_timeout = 1;
+ wdt->wdd.max_timeout = 0xFFFF;
+
+ if (pdev->dev.of_node) {
+ err = of_at91wdt_init(pdev->dev.of_node, wdt);
+ if (err)
+ goto out_free_watchdog;
+ } else {
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ err = -ENODEV;
+ goto out_free_watchdog;
+ }
+
+ wdt->base = ioremap(r->start, resource_size(r));
+ if (!wdt->base) {
+ dev_err(&pdev->dev, "failed to map registers, aborting.\n");
+ err = -ENOMEM;
+ goto out_free_watchdog;
+ }
+ }
- res = watchdog_register_device(&at91_wdt_dev);
- if (res)
- return res;
+ err = at91_wdt_init(pdev, wdt);
+ if (err)
+ goto out_iounmap;
- at91wdt_private.next_heartbeat = jiffies + at91_wdt_dev.timeout * HZ;
- setup_timer(&at91wdt_private.timer, at91_ping, 0);
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ platform_set_drvdata(pdev, wdt);
pr_info("enabled (heartbeat=%d sec, nowayout=%d)\n",
- at91_wdt_dev.timeout, nowayout);
+ wdt->wdd.timeout, wdt->nowayout);
return 0;
+
+out_iounmap:
+ iounmap(wdt->base);
+out_free_watchdog:
+ devm_kfree(&pdev->dev, wdt);
The whole point of using devm_kzalloc() is that you don't have to call kfree()
or devm_kfree(). So this call (and with it the label) is unnecessary.

+ return err;
}
static int __exit at91wdt_remove(struct platform_device *pdev)
{
- watchdog_unregister_device(&at91_wdt_dev);
+ struct at91wdt *wdt = platform_get_drvdata(pdev);
+ watchdog_unregister_device(&wdt->wdd);
pr_warn("I quit now, hardware will probably reboot!\n");
- del_timer(&at91wdt_private.timer);
+ del_timer(&wdt->timer);
+
+ iounmap(wdt->base);
+ devm_kfree(&pdev->dev, wdt);
The call to devm_kfree() is unnecessary.

return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/