RE: [PATCH 3/3] watchdog: ds1374_wdt: Introduce Dallas/Maxim DS1374 Watchdog driver
From: Johnson CH Chen (陳昭勳)
Date: Wed Jun 24 2020 - 08:09:24 EST
Hi,
> On 6/22/20 11:28 PM, Johnson CH Chen (陳昭勳) wrote:
> > Hi,
> >
> > Thanks for your good detailed suggestions!
> >
>
> Other feedback suggests to convert the driver to use the watchdog core in the
> rtc code. I would suggest to follow that suggestion.
>
Understand the following suggestions for watchdog timer setting and I will improve them with watchdog core in rtc code later.
Best regards,
Johnson
> >>> + * It would be more efficient to use i2c msgs/i2c_transfer directly
> >>> +but, as
> >>> + * recommened in .../Documentation/i2c/writing-clients section
> >>> + * "Sending and receiving", using SMBus level communication is
> preferred.
> >>> + */
> >>> +
> >>> +#include <linux/kernel.h>
> >>> +#include <linux/device.h>
> >>> +#include <linux/module.h>
> >>> +#include <linux/ioctl.h>
> >>> +#include <linux/reboot.h>
> >>> +#include <linux/watchdog.h>
> >>> +#include <linux/workqueue.h>
> >>> +#include <linux/platform_device.h>
> >>> +#include <linux/i2c.h>
> >>> +#include <linux/uaccess.h>
> >>
> >> Alphabetic order please.
> >>
> >>> +
> >>> +#define DEVNAME "ds1374_wdt"
> >>> +
> >>> +#define DS1374_REG_WDALM0 0x04 /* Watchdog/Alarm */
> >>> +#define DS1374_REG_WDALM1 0x05
> >>> +#define DS1374_REG_WDALM2 0x06
> >>> +#define DS1374_REG_CR 0x07 /* Control */
> >>> +#define DS1374_REG_CR_AIE 0x01 /* Alarm Int. Enable */
> >>> +#define DS1374_REG_CR_WDSTR 0x08 /* 1=INT, 0=RST */
> >>> +#define DS1374_REG_CR_WDALM 0x20 /* 1=Watchdog, 0=Alarm */
> >>> +#define DS1374_REG_CR_WACE 0x40 /* WD/Alarm counter enable */
> >>> +
> >>> +/* Default margin */
> >>> +#define WD_TIMO 131762
> >>> +#define TIMER_MARGIN_MIN 4096 /* 1s */
> >>> +#define TIMER_MARGIN_MAX (60*60*24*4096) /* one day */
> >>> +
> >>> +static int wdt_margin = WD_TIMO;
> >>
> >> Sjould not be pre-initialized. Also, 131762 isn't 32 seconds, it is
> >> 131,762 seconds.
> >>
> >
> > 131762 is 32 seconds actually because watchdog counter increases per
> > 1/4096 seconds for DS1374. If DS1374_REG_CR_WDALM is set to 0 (alarm),
> > alarm counter will increase per 1 second.
> >
>
> The watchdog core keeps timeouts in seconds. For the watchdog core,
> 131762 is 131,762 seconds. Your code assumes that the watchdog core
> does not care about / need to scale, which is wrong. Besides,
> MODULE_PARM_DESC below clearly states "Watchdog timeout _in seconds_"
> (emphasis added).
>
> >>> +module_param(wdt_margin, int, 0);
> >>> +MODULE_PARM_DESC(wdt_margin, "Watchdog timeout in seconds
> (default
> >>> +32s)");
> >>> +
> >>
> >> As a new driver, it would be better to just use the customary "timeout".
> >>
> >>> +static bool nowayout = WATCHDOG_NOWAYOUT;
> module_param(nowayout,
> >>> +bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped
> >> once
> >>> +started default ="
> >>> + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
> >>> +
> >>> +struct ds1374_wdt {
> >>> + struct i2c_client *client;
> >>> + struct watchdog_device *wdt;
> >>> + struct work_struct work;
> >>
> >> Not used.
> >>
> >>> +
> >>> + /* The mutex protects alarm operations, and prevents a race
> >>> + * between the enable_irq() in the workqueue and the free_irq()
> >>> + * in the remove function.
> >>> + */
> >>
> >> There is no workqueue here, and the remove function is not needed.
> >>
> >>> + struct mutex mutex;
> >>> +};
> >>> +
> >>> +static const struct watchdog_info ds1374_wdt_info = {
> >>> + .identity = DEVNAME,
> >>> + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
> >>> + WDIOF_MAGICCLOSE,
> >>> +};
> >>> +
> >>> +static struct watchdog_device ds1374_wdd;
> >>> +
> >> Move declaration please.
> >>
> >>> +static int ds1374_wdt_settimeout(struct watchdog_device *wdt,
> >>> + unsigned int timeout)
> >>> +{
> >>
> >> How is this synchronized against accesses by the RTC driver ?
> >>
> > By original design in rtc-ds1374.c, it seems no synchronized problem
> between
> > rtc and watchdog, but I think we can add mutex lock to deal with it.
> >
> The mutex is used there where needed.
>
> >>> + int ret = -ENOIOCTLCMD;
> >>
> >> Not an appropriate error code here.
> >>
> >>> + u8 buf[4];
> >>> + int cr, i;
> >>> + struct ds1374_wdt *drv_data = watchdog_get_drvdata(wdt);
> >>> +
> >>> + ret = cr = i2c_smbus_read_byte_data(drv_data->client,
> DS1374_REG_CR);
> >>> + if (ret < 0)
> >>> + goto out;
> >>
> >> "goto out;" is unnecessary. Just return the error. Same everywhere else
> below.
> >>
> >>> +
> >>> + /* Disable any existing watchdog/alarm before setting the new one
> */
> >>> + cr &= ~DS1374_REG_CR_WACE;
> >>> +
> >>> + ret = i2c_smbus_write_byte_data(drv_data->client, DS1374_REG_CR,
> cr);
> >>> + if (ret < 0)
> >>> + goto out;
> >>> +
> >>> + /* Set new watchdog time */
> >>> + for (i = 0; i < 3; i++) {
> >>> + buf[i] = timeout & 0xff;
> >>> + timeout >>= 8;
> >>> + }
> >>
> >> The value passed to this function from the watchdog core is the timeout in
> >> seconds. I don't see a conversion to internal values here.
> >>
> >
> > For original design in rtc-ds1374.c, ds1374_wdt_settimeout () will call
> > ds1374_write_rtc() to write hardware register. To separate watchdog and rtc
> > functions, expand code from ds1374_write_rtc() here, and final buf[] values
> > will be written into hardware register for DS1374.
> >
>
> ds1374_wdt_settimeout() is an API function, It gets timeouts from the
> watchdog
> core in seconds. Those timeouts have to be converted to chip internal values
> in this function.
>
> >>> +
> >>> + ret = i2c_smbus_write_i2c_block_data(drv_data->client,
> >>> + DS1374_REG_WDALM0, 3, buf);
> >>> + if (ret) {
> >>> + pr_info("couldn't set new watchdog time\n");
> >>> + goto out;
> >>> + }
> >>
> >>> +
> >>> + /* Enable watchdog timer */
> >>> + cr |= DS1374_REG_CR_WACE | DS1374_REG_CR_WDALM;
> >>> + cr &= ~DS1374_REG_CR_WDSTR;/* for RST PIN */
> >>> + cr &= ~DS1374_REG_CR_AIE;
> >>> +
> >>> + ret = i2c_smbus_write_byte_data(drv_data->client, DS1374_REG_CR,
> cr);
> >>> + if (ret < 0)
> >>> + goto out;
> >>> +
> >>> + return 0;
> >>
> >> Pointless. Just return ret.
> >>
> >> Also, this function needs to store the new timeout in struct
> watchdog_device.
> >>
> >>> +out:
> >>> + return ret;
> >>> +}
> >>> +
> >>> +
> >>> +/*
> >>> + * Read WD/Alarm counter to reload the watchdog timer. (ie, pat the
> >>> +watchdog) */ static int ds1374_wdt_ping(struct watchdog_device *wdt)
> >>> +{
> >>> + struct ds1374_wdt *drv_data = watchdog_get_drvdata(wdt);
> >>> + int ret;
> >>> + u8 buf[4];
> >>> +
> >>> + ret = i2c_smbus_read_i2c_block_data(drv_data->client,
> >>> + DS1374_REG_WDALM0, 3, buf);
> >>> +
> >>> + if (ret < 0 || ret < 3)
> >>> + pr_info("WD TICK FAIL!!!!!!!!!! %i\n", ret);
> >>> +
> >>
> >> This is not an info message, this is an error. Besides, it is just noise.
> >> Just return the error and drop the message.
> >>
> >>> + return ret;
> >>> +}
> >>> +
> >>> +static int ds1374_wdt_start(struct watchdog_device *wdt) {
> >>> + int ret;
> >>> +
> >>> + ret = ds1374_wdt_settimeout(wdt, wdt_margin);
> >>
> >> This is wrong. The timeout may have been updated by userspace.
> >> It is inappropriate to change it back to the default here.
> >>
> > Thanks, I will keep in mind.
> >
> >>> + if (ret)
> >>> + return ret;
> >>> +
> >>> + ds1374_wdt_ping(wdt);
> >>
> >> Please do not ignore errors.
> >>
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static int ds1374_wdt_stop(struct watchdog_device *wdt) {
> >>> + struct ds1374_wdt *drv_data = watchdog_get_drvdata(wdt);
> >>> + int cr;
> >>> +
> >>> + if (nowayout)
> >>> + return 0;
> >>
> >> Not needed.
> >>
> > Thanks!, it has been implemented in watchdog_stop().
> >
> >>> +
> >>> + cr = i2c_smbus_read_byte_data(drv_data->client, DS1374_REG_CR);
> >>> + /* Disable watchdog timer */
> >>> + cr &= ~DS1374_REG_CR_WACE;
> >>> +
> >>> + return i2c_smbus_write_byte_data(drv_data->client,
> DS1374_REG_CR,
> >>> +cr); }
> >>> +
> >>> +/*
> >>> + * Handle commands from user-space.
> >>> + */
> >>> +static long ds1374_wdt_ioctl(struct watchdog_device *wdt, unsigned int
> >> cmd,
> >>> + unsigned long arg)
> >>
> >> The whole point of using the watchdog subsystem is to not need a local ioctl
> >> function - and most definitely not one that duplicates watchdog core
> >> functionality.
> >>
> >>> +{
> >>> + int new_margin, options;
> >>> +
> >>> + switch (cmd) {
> >>> + case WDIOC_GETSUPPORT:
> >>> + return copy_to_user((struct watchdog_info __user *)arg,
> >>> + &ds1374_wdt_info, sizeof(ds1374_wdt_info)) ? -EFAULT : 0;
> >>> +
> >>> + case WDIOC_GETSTATUS:
> >>> + case WDIOC_GETBOOTSTATUS:
> >>> + return put_user(0, (int __user *)arg);
> >>> + case WDIOC_KEEPALIVE:
> >>> + ds1374_wdt_ping(wdt);
> >>> + return 0;
> >>> + case WDIOC_SETTIMEOUT:
> >>> + if (get_user(new_margin, (int __user *)arg))
> >>> + return -EFAULT;
> >>> +
> >>> + /* the hardware's tick rate is 4096 Hz, so
> >>> + * the counter value needs to be scaled accordingly
> >>> + */
> >>> + new_margin <<= 12;
> >>> + if (new_margin < 1 || new_margin > 16777216)
> >>> + return -EINVAL;
> >>> +
> >>> + wdt_margin = new_margin;
> >>> + ds1374_wdt_settimeout(wdt, new_margin);
> >>
> >> Is the idea here to bypass the watchdog subsystem's notion of keeping the
> >> timeout in seconds ? If so, that would be wrong and unacceptable.
> >>
> > It seems take care about 4096Hz for original design in rtc-ds1374.c. I think
> we
> > can just use ioctl() which watchdog core provides and input margin time
> > appropriately.
> >
> >
> >>> + ds1374_wdt_ping(wdt);
> >>> + fallthrough;
> >>> + case WDIOC_GETTIMEOUT:
> >>> + /* when returning ... inverse is true */
> >>> + return put_user((wdt_margin >> 12), (int __user *)arg);
> >>> + case WDIOC_SETOPTIONS:
> >>> + if (copy_from_user(&options, (int __user *)arg, sizeof(int)))
> >>> + return -EFAULT;
> >>> +
> >>> + if (options & WDIOS_DISABLECARD) {
> >>> + pr_info("disable watchdog\n");
> >>> + ds1374_wdt_stop(wdt);
> >>> + return 0;
> >>> + }
> >>> +
> >>> + if (options & WDIOS_ENABLECARD) {
> >>> + pr_info("enable watchdog\n");
> >>> + ds1374_wdt_settimeout(wdt, wdt_margin);
> >>> + ds1374_wdt_ping(wdt);
> >>> + return 0;
> >>> + }
> >>> + return -EINVAL;
> >>> + }
> >>> + return -ENOTTY;
> >>> +}
> >>> +
> >>> +static int ds1374_wdt_notify_sys(struct notifier_block *this,
> >>> + unsigned long code, void *unused)
> >>> +{
> >>> + if (code == SYS_DOWN || code == SYS_HALT)
> >>> + /* Disable Watchdog */
> >>> + ds1374_wdt_stop(&ds1374_wdd);
> >>> + return NOTIFY_DONE;
> >>> +}
> >>> +
> >> Not needed - see below.
> >>
> >>> +static struct notifier_block ds1374_wdt_notifier = {
> >>> + .notifier_call = ds1374_wdt_notify_sys, };
> >>> +
> >>> +static int ds1374_wdt_probe(struct platform_device *pdev) {
> >>> + struct ds1374_wdt *drv_data;
> >>> + struct i2c_client *client = to_i2c_client(pdev->dev.parent);
> >>> + int ret;
> >>> +
> >>> + drv_data = devm_kzalloc(&pdev->dev, sizeof(struct ds1374_wdt),
> >>> + GFP_KERNEL);
> >>> + if (!drv_data)
> >>> + return -ENOMEM;
> >>> +
> >>> + drv_data->wdt = &ds1374_wdd;
> >>> + drv_data->client = client;
> >>> + mutex_init(&drv_data->mutex);
> >>> +
> >>> + watchdog_init_timeout(drv_data->wdt, wdt_margin, &pdev->dev);
> >>> + watchdog_set_nowayout(drv_data->wdt, nowayout);
> >>> +
> >>> + watchdog_set_drvdata(drv_data->wdt, drv_data);
> >>> + platform_set_drvdata(pdev, drv_data);
> >>> +
> >>> + ret = watchdog_register_device(drv_data->wdt);
> >>
> >> Use devm_watchdog_register_device()
> >>
> >>> + if (ret) {
> >>> + dev_err(&pdev->dev, "failed to register Watchdog device\n");
> >>> + return ret;
> >>> + }
> >>> +
> >>> + ret = register_reboot_notifier(&ds1374_wdt_notifier);
> >>
> >> Call watchdog_stop_on_reboot() before calling watchdog_register_device()
> >> instead.
> >>
> >>> + if (ret) {
> >>> + watchdog_unregister_device(drv_data->wdt);
> >>> + return ret;
> >>> + }
> >>> +
> >>> + ds1374_wdt_settimeout(drv_data->wdt, wdt_margin);
> >>
> >> Unnecessary here; called from start function.
> >>
> >>> + dev_info(&pdev->dev, "Dallas/Maxim DS1374 watchdog device
> >>> +enabled\n");
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static int ds1374_wdt_remove(struct platform_device *pdev) {
> >>> + struct ds1374_wdt *drv_data = platform_get_drvdata(pdev);
> >>> +
> >>> + dev_warn(&pdev->dev, "Unregister DS1374 watchdog device\n");
> >>> + watchdog_unregister_device(drv_data->wdt);
> >>> + unregister_reboot_notifier(&ds1374_wdt_notifier);
> >>> +
> >>
> >> Call watchdog_stop_on_unregister() before calling
> >> watchdog_register_device().
> >>
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static void ds1374_wdt_shutdown(struct platform_device *pdev) {
> >>> + struct ds1374_wdt *drv_data = platform_get_drvdata(pdev);
> >>> +
> >>> + mutex_lock(&drv_data->mutex);
> >>> + ds1374_wdt_stop(drv_data->wdt);
> >>> + mutex_unlock(&drv_data->mutex);
> >>
> >> Unnecessary and pointless.
> >>
> >>> +}
> >>> +
> >>> +static const struct watchdog_ops ds1374_wdt_fops = {
> >>> + .owner = THIS_MODULE,
> >>> + .start = ds1374_wdt_start,
> >>> + .stop = ds1374_wdt_stop,
> >>> + .ping = ds1374_wdt_ping,
> >>> + .set_timeout = ds1374_wdt_settimeout,
> >>> + .ioctl = ds1374_wdt_ioctl,
> >>> +};
> >>> +
> >>> +static struct watchdog_device ds1374_wdd = {
> >>> + .info = &ds1374_wdt_info,
> >>> + .ops = &ds1374_wdt_fops,
> >>> + .min_timeout = TIMER_MARGIN_MIN,
> >>> + .max_timeout = TIMER_MARGIN_MAX,
> >>
> >> .timeout should be set here.
> >>
> >> Also, there can (at least in theory) be more than one ds1374 in the system.
> The
> >> code does not support this case. ds1374_wdd should be moved into struct
> >> ds1374_wdt.
> >>
> > Thanks for good suggestion.
> >>> +};
> >>> +
> >>> +static struct platform_driver ds1374_wdt = {
> >>> + .driver = {
> >>> + .owner = THIS_MODULE,
> >>> + .name = DEVNAME,
> >>> + },
> >>> + .probe = ds1374_wdt_probe,
> >>> + .remove = ds1374_wdt_remove,
> >>> + .shutdown = ds1374_wdt_shutdown,
> >>> +};
> >>> +
> >>> +module_platform_driver(ds1374_wdt);
> >>> +
> >>> +MODULE_AUTHOR("Johnson Chen <johnsonch.chen@xxxxxxxx>");
> >>> +MODULE_DESCRIPTION("Dallas/Maxim DS1374 Watchdog Driver");
> >>> +MODULE_LICENSE("GPL"); MODULE_ALIAS("Platform:ds1374_wdt");
> >>>
> >
> > Best regards,
> > Johnson
> >