Re: [RFC PATCH] watchdog: Adding softwatchdog
From: Peter.Enderborg
Date: Sat Apr 24 2021 - 11:29:04 EST
On 4/24/21 4:41 PM, Guenter Roeck wrote:
> On 4/24/21 3:25 AM, Peter Enderborg wrote:
>> This is not a rebooting watchdog. It's function is to take other
>> actions than a hard reboot. On many complex system there is some
>> kind of manager that monitor and take action on slow systems.
>> Android has it's lowmemorykiller (lmkd), desktops has earlyoom.
>> This watchdog can be used to help monitor to preform some basic
>> action to keep the monitor running.
>>
>> It can also be used standalone. This add a policy that is
>> killing the process with highest oom_score_adj and using
>> oom functions to it quickly. I think it is a good usecase
>> for the patch. Memory siuations can be problematic for
>> software that monitor system, but other prolicys can
>> should also be possible. Like picking tasks from a memcg, or
>> specific UID's or what ever is low priority.
>> ---
> NACK. Besides this not following the new watchdog API, the task
> of a watchdog is to reset the system on failure. Its task is most
> definitely not to re-implement the oom killer in any way, shape,
> or form.
>
> Guenter
Do you have better idea where the re-invented wheel might
fit better if it not for watchdog API?
>
>> drivers/watchdog/Makefile | 2 +
>> drivers/watchdog/softwatchdog.c | 231 ++++++++++++++++++++++++++++++++
>> include/uapi/linux/watchdog.h | 6 +
>> mm/oom_kill.c | 4 +-
>> 4 files changed, 241 insertions(+), 2 deletions(-)
>> create mode 100644 drivers/watchdog/softwatchdog.c
>>
>> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
>> index f3a6540e725e..bff8f61fe504 100644
>> --- a/drivers/watchdog/Makefile
>> +++ b/drivers/watchdog/Makefile
>> @@ -221,3 +221,5 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
>> obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
>> obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
>> obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
>> +
>> +obj-y += softwatchdog.o
>> diff --git a/drivers/watchdog/softwatchdog.c b/drivers/watchdog/softwatchdog.c
>> new file mode 100644
>> index 000000000000..dd7153092da8
>> --- /dev/null
>> +++ b/drivers/watchdog/softwatchdog.c
>> @@ -0,0 +1,231 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Soft watchdog
>> + * This is a RFC of a watchdog does do not reboot the system.
>> + * Instead it do a pre defined action
>> + *
>> + * Author: peter.enderborg@xxxxxxxx
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/mm.h>
>> +#include <linux/slab.h>
>> +#include <linux/oom.h>
>> +#include <linux/module.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/types.h>
>> +#include <linux/timer.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/watchdog.h>
>> +#include <linux/notifier.h>
>> +#include <linux/init.h>
>> +#include <linux/fs.h>
>> +
>> +void wake_oom_reaper(struct task_struct *tsk); /* need to public ... */
>> +void __oom_kill_process(struct task_struct *victim, const char *message); /* hmm */
>> +
>> +static struct timer_list swd_timer;
>> +static int heartbeat = 500;
>> +static unsigned long swd_is_open;
>> +static char expect_close;
>> +static bool nowayout = WATCHDOG_NOWAYOUT;
>> +
>> +/* a example policy doing a process kill by calling
>> + * functions within oom-killer.
>> + */
>> +static int policy_fast_kill_oom_score_adj(void)
>> +{
>> + struct task_struct *tsk;
>> + struct task_struct *selected = NULL;
>> + int highest = 0;
>> +
>> + rcu_read_lock();
>> + for_each_process(tsk) {
>> + struct task_struct *candidate;
>> +
>> + if (tsk->flags & PF_KTHREAD)
>> + continue;
>> +
>> + /* Ignore task if coredump in progress */
>> + if (tsk->mm && tsk->mm->core_state)
>> + continue;
>> + candidate = find_lock_task_mm(tsk);
>> + if (!candidate)
>> + continue;
>> +
>> + if (highest < candidate->signal->oom_score_adj) {
>> + /* for test dont kill level 0 */
>> + highest = candidate->signal->oom_score_adj;
>> + selected = candidate;
>> + pr_info("new selected %d %d", selected->pid,
>> + selected->signal->oom_score_adj);
>> + }
>> + task_unlock(candidate);
>> + }
>> + if (selected)
>> + get_task_struct(selected);
>> +
>> + rcu_read_unlock();
>> + if (selected) {
>> + int pid = selected->pid;
>> +
>> + pr_info("swd killing: %d", selected->pid);
>> + __oom_kill_process(selected, "swd");
>> + return pid;
>> + }
>> + return 0;
>> +}
>> +
>> +static void swd_ping(void)
>> +{
>> + mod_timer(&swd_timer, jiffies + heartbeat);
>> +}
>> +
>> +static long swd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>> +{
>> + void __user *argp = (void __user *)arg;
>> + int __user *p = argp;
>> + int new_heartbeat;
>> + int status;
>> +
>> + struct watchdog_info ident = {
>> + .options = WDIOF_SETTIMEOUT|
>> + WDIOF_MAGICCLOSE|
>> + WDIOF_OOMKILL |
>> + WDIOF_AUTOKILL,
>> + .identity = "swd",
>> + };
>> +
>> + switch (cmd) {
>> + case WDIOC_GETSUPPORT:
>> + return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
>> + case WDIOC_GETSTATUS:
>> + status = 0;
>> + return put_user(status, p);
>> + case WDIOC_GETBOOTSTATUS:
>> + return put_user(0, p);
>> + case WDIOC_KEEPALIVE:
>> + swd_ping();
>> + return 0;
>> + case WDIOC_SETTIMEOUT:
>> + if (get_user(new_heartbeat, p))
>> + return -EFAULT;
>> + heartbeat = new_heartbeat;
>> + fallthrough;
>> + case WDIOC_GETTIMEOUT:
>> + return put_user(heartbeat, p);
>> + default:
>> + return -ENOTTY;
>> + }
>> + return -ENOTTY;
>> +}
>> +
>> +static void swd_timer_func(struct timer_list *unused)
>> +{
>> + int res;
>> +
>> + pr_info("swd timer %d\n", heartbeat);
>> + res = policy_fast_kill_oom_score_adj();
>> + if (res)
>> + pr_info("killed %d\n", res);
>> +
>> + mod_timer(&swd_timer, jiffies + heartbeat);
>> +}
>> +
>> +static int swd_start(void)
>> +{
>> + add_timer(&swd_timer);
>> + return 0;
>> +}
>> +
>> +static int swd_stop(void)
>> +{
>> + del_timer(&swd_timer);
>> + return 0;
>> +}
>> +
>> +static int swd_open(struct inode *inode, struct file *file)
>> +{
>> + if (test_and_set_bit(0, &swd_is_open))
>> + return -EBUSY;
>> + swd_start();
>> + return stream_open(inode, file);
>> +}
>> +
>> +static int swd_release(struct inode *inode, struct file *file)
>> +{
>> + if (expect_close != 42) {
>> + swd_stop();
>> + clear_bit(0, &swd_is_open);
>> + } else {
>> + pr_crit("SWD device closed unexpectedly. SWD will not stop!\n");
>> + swd_ping();
>> + }
>> + expect_close = 0;
>> + return 0;
>> +}
>> +static ssize_t swd_write(struct file *file, const char __user *buf,
>> + size_t count, loff_t *ppos)
>> +{
>> + if (count) {
>> + if (!nowayout) {
>> + size_t i;
>> +
>> + expect_close = 0;
>> + for (i = 0; i != count; i++) {
>> + char c;
>> +
>> + if (get_user(c, buf + i))
>> + return -EFAULT;
>> + if (c == 'V')
>> + expect_close = 42;
>> + }
>> + }
>> + swd_ping();
>> + }
>> + return count;
>> +}
>> +
>> +static const struct file_operations swd_fops = {
>> + .owner = THIS_MODULE,
>> + .llseek = no_llseek,
>> + .write = swd_write,
>> + .unlocked_ioctl = swd_ioctl,
>> + .compat_ioctl = compat_ptr_ioctl,
>> + .open = swd_open,
>> + .release = swd_release,
>> +};
>> +
>> +static struct miscdevice swd_miscdev = {
>> + .minor = WATCHDOG_MINOR,
>> + .name = "soft-watchdog",
>> + .fops = &swd_fops,
>> +};
>> +
>> +int __init swd_register(void)
>> +{
>> + int ret;
>> +
>> + pr_info("swd installed with timer");
>> + ret = misc_register(&swd_miscdev);
>> + timer_setup(&swd_timer, swd_timer_func, 0);
>> + return 0;
>> +}
>> +
>> +static int __init swd_init(void)
>> +{
>> + return 0;
>> +}
>> +
>> +static void __exit swd_unload(void)
>> +{
>> + return;
>> +}
>> +
>> +subsys_initcall(swd_register);
>> +
>> +module_init(swd_init);
>> +module_exit(swd_unload);
>> +
>> +MODULE_AUTHOR("Peter Enderborg");
>> +MODULE_DESCRIPTION("Memory software watchdog");
>> +MODULE_LICENSE("GPL");
>> diff --git a/include/uapi/linux/watchdog.h b/include/uapi/linux/watchdog.h
>> index b15cde5c9054..780987482e2d 100644
>> --- a/include/uapi/linux/watchdog.h
>> +++ b/include/uapi/linux/watchdog.h
>> @@ -48,6 +48,12 @@ struct watchdog_info {
>> #define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */
>> #define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or
>> other external alarm not a reboot */
>> +#define WDIOF_OOMKILL 0x0800 /* Watchdog trigger process kill as
>> + * oom kill
>> + */
>> +#define WDIOF_AUTOKILL 0x1000 /* Watchdog listen to oom notifiy
>> + * and kills with its policy
>> + */
>> #define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */
>>
>> #define WDIOS_DISABLECARD 0x0001 /* Turn off the watchdog timer */
>> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
>> index fa1cf18bac97..a5f7299af9a3 100644
>> --- a/mm/oom_kill.c
>> +++ b/mm/oom_kill.c
>> @@ -658,7 +658,7 @@ static int oom_reaper(void *unused)
>> return 0;
>> }
>>
>> -static void wake_oom_reaper(struct task_struct *tsk)
>> +void wake_oom_reaper(struct task_struct *tsk)
>> {
>> /* mm is already queued? */
>> if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
>> @@ -856,7 +856,7 @@ static bool task_will_free_mem(struct task_struct *task)
>> return ret;
>> }
>>
>> -static void __oom_kill_process(struct task_struct *victim, const char *message)
>> +void __oom_kill_process(struct task_struct *victim, const char *message)
>> {
>> struct task_struct *p;
>> struct mm_struct *mm;
>>