Re: [PATCH] RFC: hung task: Check specific tasks for long uninterruptible sleep state

From: Imran Khan
Date: Tue Aug 22 2017 - 02:03:51 EST


On 8/22/2017 7:24 AM, Zhang, Shile (NSB - CN/Hangzhou) wrote:

> Hi, Imran,
>
Hi Shile,
Thanks for getting back on this.
> I think a "unmonitored list" is better than "monitor list", because we want khungtaskd can find out the "unexpected" hung task, but not few in a list.
> Then, for the fg tasks, which can put it in the "unmonitored list", for the bg tasks, I think we can tweak the timeout to control the duration.
>
Here fg and bg tasks were just an example and may not be applicable for all environments (non-android). But main intention here is to have
some mechanism that would allow to monitor only specific tasks. As we have a sysctl option to enable/disable this mechanism we can at any time
fall back to default mechanism of monitoring all tasks.

Alternatively this can be done on task priority basis also, where we have a minimum priority and only tasks with priorities higher than
this min priority would be monitored and if we keep this min priority to the least priority , all tasks would be monitored like the
default case. Please let me know your feedback regarding this approach.

Thanks and Regards,
Imran
> Thanks!
>
> BR, Shile
>
> -----Original Message-----
> From: Imran Khan [mailto:kimran@xxxxxxxxxxxxxx]
> Sent: Monday, August 21, 2017 6:26 PM
> To: mingo@xxxxxxxxxx
> Cc: imrank140517@xxxxxxxxx; Imran Khan <kimran@xxxxxxxxxxxxxx>; Luis R. Rodriguez <mcgrof@xxxxxxxxxx>; Kees Cook <keescook@xxxxxxxxxxxx>; Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>; Zhang, Shile (NSB - CN/Hangzhou) <shile.zhang@xxxxxxxxxxxxxxx>; Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx>; Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>; Vegard Nossum <vegard.nossum@xxxxxxxxxx>; Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>; John Siddle <jsiddle@xxxxxxxxxx>; open list <linux-kernel@xxxxxxxxxxxxxxx>; open list:PROC SYSCTL <linux-fsdevel@xxxxxxxxxxxxxxx>
> Subject: [PATCH] RFC: hung task: Check specific tasks for long uninterruptible sleep state
>
> khungtask by default monitors either all tasks or no tasks at all
> for long unterruptible sleeps.
> For Android like environments this arrangement is not optimal because
> on one hand it may be permissible to have some background(bg) task
> in uninterruptible sleep state for long duration while on the other
> hand it may not be permissible to have some foreground(fg) task like
> surfaceflinger in uninterruptible sleep state for long duration.
> So it would be good to have some arrangement so that few specified tasks
> can be monitored by khungtaskd, on a need basis.
> This change introduces a sysctl option, /proc/sys/kernel/
> hung_task_check_selected, to enable monitoring of selected tasks using
> khungtask daemon. If this sysctl option is enabled then only the tasks
> specified in /proc/hung_task_monitor_list are monitored otherwise all
> tasks are monitored, just like the default case.
>
> Signed-off-by: Imran Khan <kimran@xxxxxxxxxxxxxx>
> ---
> include/linux/sched/sysctl.h | 1 +
> kernel/hung_task.c | 121 ++++++++++++++++++++++++++++++++++++++++++-
> kernel/sysctl.c | 8 +++
> 3 files changed, 128 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index 0f5ecd4..05892f1 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -10,6 +10,7 @@
> extern unsigned int sysctl_hung_task_panic;
> extern unsigned long sysctl_hung_task_timeout_secs;
> extern int sysctl_hung_task_warnings;
> +extern int sysctl_hung_task_check_selected;
> extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
> void __user *buffer,
> size_t *lenp, loff_t *ppos);
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 751593e..49f13fb 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -16,12 +16,28 @@
> #include <linux/export.h>
> #include <linux/sysctl.h>
> #include <linux/utsname.h>
> +#include <linux/uaccess.h>
> +#include <linux/slab.h>
> +#include <linux/proc_fs.h>
> #include <linux/sched/signal.h>
> #include <linux/sched/debug.h>
>
> #include <trace/events/sched.h>
>
> /*
> + * Hung task that needs monitoring
> + */
> +struct hung_task {
> + struct list_head list;
> + char comm[TASK_COMM_LEN];
> +};
> +
> +static struct hung_task *monitor_list;
> +int sysctl_hung_task_check_selected;
> +
> +
> +
> +/*
> * The number of tasks checked:
> */
> int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
> @@ -76,6 +92,92 @@ static int __init hung_task_panic_setup(char *str)
> .notifier_call = hung_task_panic,
> };
>
> +static void hung_task_monitor_setup(void)
> +{
> + monitor_list = kmalloc(sizeof(*monitor_list), GFP_KERNEL);
> + if (monitor_list) {
> + INIT_LIST_HEAD(&monitor_list->list);
> + memset(monitor_list->comm, 0, TASK_COMM_LEN);
> + }
> +}
> +
> +
> +static int hung_task_info_show(struct seq_file *m, void *v)
> +{
> + struct hung_task *ht;
> +
> + ht = list_entry(v, struct hung_task, list);
> + seq_puts(m, ht->comm);
> +
> + return 0;
> +}
> +
> +static void *hung_task_info_start(struct seq_file *m, loff_t *pos)
> +{
> + return seq_list_start_head(&monitor_list->list, *pos);
> +}
> +
> +static void *hung_task_info_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> + return seq_list_next(v, &monitor_list->list, pos);
> +}
> +
> +static void hung_task_info_stop(struct seq_file *m, void *v)
> +{
> +}
> +
> +const struct seq_operations hung_task_info_op = {
> + .start = hung_task_info_start,
> + .next = hung_task_info_next,
> + .stop = hung_task_info_stop,
> + .show = hung_task_info_show
> +};
> +
> +static int hung_task_info_open(struct inode *inode, struct file *file)
> +{
> + return seq_open(file, &hung_task_info_op);
> +}
> +
> +static ssize_t
> +hung_task_info_write(struct file *file, const char __user *buf, size_t count,
> + loff_t *offs)
> +{
> + struct task_struct *g, *t;
> + struct hung_task *ht = kmalloc(sizeof(*ht), GFP_KERNEL);
> +
> + if (!ht)
> + return -ENOMEM;
> +
> + if (copy_from_user(ht->comm, buf, count))
> + return -EFAULT;
> + ht->comm[count] = '\0';
> +
> + for_each_process_thread(g, t) {
> + if (!strncmp(t->comm, ht->comm, strlen(t->comm))) {
> + list_add_tail(&ht->list, &monitor_list->list);
> + return count;
> + }
> + }
> +
> + pr_err("Non-existing task: %s can't be monitored\n", ht->comm);
> + return count;
> +}
> +
> +static const struct file_operations hung_task_info_operations = {
> + .open = hung_task_info_open,
> + .read = seq_read,
> + .write = hung_task_info_write,
> + .llseek = seq_lseek,
> + .release = seq_release,
> +};
> +
> +static int __init proc_hung_task_info_init(void)
> +{
> + proc_create("hung_task_monitor_list", 0644, NULL,
> + &hung_task_info_operations);
> + return 0;
> +}
> +
> static void check_hung_task(struct task_struct *t, unsigned long timeout)
> {
> unsigned long switch_count = t->nvcsw + t->nivcsw;
> @@ -167,6 +269,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> int max_count = sysctl_hung_task_check_count;
> int batch_count = HUNG_TASK_BATCHING;
> struct task_struct *g, *t;
> + struct hung_task *ht, *tmp;
>
> /*
> * If the system crashed already then all bets are off,
> @@ -186,9 +289,21 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> goto unlock;
> }
> /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
> - if (t->state == TASK_UNINTERRUPTIBLE)
> - check_hung_task(t, timeout);
> + if (t->state == TASK_UNINTERRUPTIBLE) {
> + if (sysctl_hung_task_check_selected) {
> + list_for_each_entry_safe(ht, tmp,
> + &monitor_list->list,
> + list)
> + if (!strncmp(ht->comm, t->comm,
> + strlen(t->comm)))
> + /* Task belongs to the selected group */
> + check_hung_task(t, timeout);
> + } else {
> + check_hung_task(t, timeout);
> + }
> + }
> }
> +
> unlock:
> rcu_read_unlock();
> if (hung_task_show_lock)
> @@ -259,6 +374,8 @@ static int watchdog(void *dummy)
> static int __init hung_task_init(void)
> {
> atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
> + hung_task_monitor_setup();
> + proc_hung_task_info_init();
> watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
>
> return 0;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 6648fbb..ab45774 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1096,6 +1096,14 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
> .proc_handler = proc_dointvec_minmax,
> .extra1 = &neg_one,
> },
> + {
> + .procname = "hung_task_check_selected",
> + .data = &sysctl_hung_task_check_selected,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &neg_one,
> + },
> #endif
> #ifdef CONFIG_RT_MUTEXES
> {
>


--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a\nmember of the Code Aurora Forum, hosted by The Linux Foundation