Re: [RFC PATCH] watchdog: Adding softwatchdog

From: Guenter Roeck
Date: Sat Apr 24 2021 - 10:41:51 EST


On 4/24/21 3:25 AM, Peter Enderborg wrote:
> This is not a rebooting watchdog. It's function is to take other
> actions than a hard reboot. On many complex system there is some
> kind of manager that monitor and take action on slow systems.
> Android has it's lowmemorykiller (lmkd), desktops has earlyoom.
> This watchdog can be used to help monitor to preform some basic
> action to keep the monitor running.
>
> It can also be used standalone. This add a policy that is
> killing the process with highest oom_score_adj and using
> oom functions to it quickly. I think it is a good usecase
> for the patch. Memory siuations can be problematic for
> software that monitor system, but other prolicys can
> should also be possible. Like picking tasks from a memcg, or
> specific UID's or what ever is low priority.
> ---

NACK. Besides this not following the new watchdog API, the task
of a watchdog is to reset the system on failure. Its task is most
definitely not to re-implement the oom killer in any way, shape,
or form.

Guenter


> drivers/watchdog/Makefile | 2 +
> drivers/watchdog/softwatchdog.c | 231 ++++++++++++++++++++++++++++++++
> include/uapi/linux/watchdog.h | 6 +
> mm/oom_kill.c | 4 +-
> 4 files changed, 241 insertions(+), 2 deletions(-)
> create mode 100644 drivers/watchdog/softwatchdog.c
>
> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index f3a6540e725e..bff8f61fe504 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -221,3 +221,5 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
> obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
> obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
> obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
> +
> +obj-y += softwatchdog.o
> diff --git a/drivers/watchdog/softwatchdog.c b/drivers/watchdog/softwatchdog.c
> new file mode 100644
> index 000000000000..dd7153092da8
> --- /dev/null
> +++ b/drivers/watchdog/softwatchdog.c
> @@ -0,0 +1,231 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Soft watchdog
> + * This is a RFC of a watchdog does do not reboot the system.
> + * Instead it do a pre defined action
> + *
> + * Author: peter.enderborg@xxxxxxxx
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/oom.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/types.h>
> +#include <linux/timer.h>
> +#include <linux/miscdevice.h>
> +#include <linux/watchdog.h>
> +#include <linux/notifier.h>
> +#include <linux/init.h>
> +#include <linux/fs.h>
> +
> +void wake_oom_reaper(struct task_struct *tsk); /* need to public ... */
> +void __oom_kill_process(struct task_struct *victim, const char *message); /* hmm */
> +
> +static struct timer_list swd_timer;
> +static int heartbeat = 500;
> +static unsigned long swd_is_open;
> +static char expect_close;
> +static bool nowayout = WATCHDOG_NOWAYOUT;
> +
> +/* a example policy doing a process kill by calling
> + * functions within oom-killer.
> + */
> +static int policy_fast_kill_oom_score_adj(void)
> +{
> + struct task_struct *tsk;
> + struct task_struct *selected = NULL;
> + int highest = 0;
> +
> + rcu_read_lock();
> + for_each_process(tsk) {
> + struct task_struct *candidate;
> +
> + if (tsk->flags & PF_KTHREAD)
> + continue;
> +
> + /* Ignore task if coredump in progress */
> + if (tsk->mm && tsk->mm->core_state)
> + continue;
> + candidate = find_lock_task_mm(tsk);
> + if (!candidate)
> + continue;
> +
> + if (highest < candidate->signal->oom_score_adj) {
> + /* for test dont kill level 0 */
> + highest = candidate->signal->oom_score_adj;
> + selected = candidate;
> + pr_info("new selected %d %d", selected->pid,
> + selected->signal->oom_score_adj);
> + }
> + task_unlock(candidate);
> + }
> + if (selected)
> + get_task_struct(selected);
> +
> + rcu_read_unlock();
> + if (selected) {
> + int pid = selected->pid;
> +
> + pr_info("swd killing: %d", selected->pid);
> + __oom_kill_process(selected, "swd");
> + return pid;
> + }
> + return 0;
> +}
> +
> +static void swd_ping(void)
> +{
> + mod_timer(&swd_timer, jiffies + heartbeat);
> +}
> +
> +static long swd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> + void __user *argp = (void __user *)arg;
> + int __user *p = argp;
> + int new_heartbeat;
> + int status;
> +
> + struct watchdog_info ident = {
> + .options = WDIOF_SETTIMEOUT|
> + WDIOF_MAGICCLOSE|
> + WDIOF_OOMKILL |
> + WDIOF_AUTOKILL,
> + .identity = "swd",
> + };
> +
> + switch (cmd) {
> + case WDIOC_GETSUPPORT:
> + return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
> + case WDIOC_GETSTATUS:
> + status = 0;
> + return put_user(status, p);
> + case WDIOC_GETBOOTSTATUS:
> + return put_user(0, p);
> + case WDIOC_KEEPALIVE:
> + swd_ping();
> + return 0;
> + case WDIOC_SETTIMEOUT:
> + if (get_user(new_heartbeat, p))
> + return -EFAULT;
> + heartbeat = new_heartbeat;
> + fallthrough;
> + case WDIOC_GETTIMEOUT:
> + return put_user(heartbeat, p);
> + default:
> + return -ENOTTY;
> + }
> + return -ENOTTY;
> +}
> +
> +static void swd_timer_func(struct timer_list *unused)
> +{
> + int res;
> +
> + pr_info("swd timer %d\n", heartbeat);
> + res = policy_fast_kill_oom_score_adj();
> + if (res)
> + pr_info("killed %d\n", res);
> +
> + mod_timer(&swd_timer, jiffies + heartbeat);
> +}
> +
> +static int swd_start(void)
> +{
> + add_timer(&swd_timer);
> + return 0;
> +}
> +
> +static int swd_stop(void)
> +{
> + del_timer(&swd_timer);
> + return 0;
> +}
> +
> +static int swd_open(struct inode *inode, struct file *file)
> +{
> + if (test_and_set_bit(0, &swd_is_open))
> + return -EBUSY;
> + swd_start();
> + return stream_open(inode, file);
> +}
> +
> +static int swd_release(struct inode *inode, struct file *file)
> +{
> + if (expect_close != 42) {
> + swd_stop();
> + clear_bit(0, &swd_is_open);
> + } else {
> + pr_crit("SWD device closed unexpectedly. SWD will not stop!\n");
> + swd_ping();
> + }
> + expect_close = 0;
> + return 0;
> +}
> +static ssize_t swd_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + if (count) {
> + if (!nowayout) {
> + size_t i;
> +
> + expect_close = 0;
> + for (i = 0; i != count; i++) {
> + char c;
> +
> + if (get_user(c, buf + i))
> + return -EFAULT;
> + if (c == 'V')
> + expect_close = 42;
> + }
> + }
> + swd_ping();
> + }
> + return count;
> +}
> +
> +static const struct file_operations swd_fops = {
> + .owner = THIS_MODULE,
> + .llseek = no_llseek,
> + .write = swd_write,
> + .unlocked_ioctl = swd_ioctl,
> + .compat_ioctl = compat_ptr_ioctl,
> + .open = swd_open,
> + .release = swd_release,
> +};
> +
> +static struct miscdevice swd_miscdev = {
> + .minor = WATCHDOG_MINOR,
> + .name = "soft-watchdog",
> + .fops = &swd_fops,
> +};
> +
> +int __init swd_register(void)
> +{
> + int ret;
> +
> + pr_info("swd installed with timer");
> + ret = misc_register(&swd_miscdev);
> + timer_setup(&swd_timer, swd_timer_func, 0);
> + return 0;
> +}
> +
> +static int __init swd_init(void)
> +{
> + return 0;
> +}
> +
> +static void __exit swd_unload(void)
> +{
> + return;
> +}
> +
> +subsys_initcall(swd_register);
> +
> +module_init(swd_init);
> +module_exit(swd_unload);
> +
> +MODULE_AUTHOR("Peter Enderborg");
> +MODULE_DESCRIPTION("Memory software watchdog");
> +MODULE_LICENSE("GPL");
> diff --git a/include/uapi/linux/watchdog.h b/include/uapi/linux/watchdog.h
> index b15cde5c9054..780987482e2d 100644
> --- a/include/uapi/linux/watchdog.h
> +++ b/include/uapi/linux/watchdog.h
> @@ -48,6 +48,12 @@ struct watchdog_info {
> #define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */
> #define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or
> other external alarm not a reboot */
> +#define WDIOF_OOMKILL 0x0800 /* Watchdog trigger process kill as
> + * oom kill
> + */
> +#define WDIOF_AUTOKILL 0x1000 /* Watchdog listen to oom notifiy
> + * and kills with its policy
> + */
> #define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */
>
> #define WDIOS_DISABLECARD 0x0001 /* Turn off the watchdog timer */
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index fa1cf18bac97..a5f7299af9a3 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -658,7 +658,7 @@ static int oom_reaper(void *unused)
> return 0;
> }
>
> -static void wake_oom_reaper(struct task_struct *tsk)
> +void wake_oom_reaper(struct task_struct *tsk)
> {
> /* mm is already queued? */
> if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
> @@ -856,7 +856,7 @@ static bool task_will_free_mem(struct task_struct *task)
> return ret;
> }
>
> -static void __oom_kill_process(struct task_struct *victim, const char *message)
> +void __oom_kill_process(struct task_struct *victim, const char *message)
> {
> struct task_struct *p;
> struct mm_struct *mm;
>