[PATCH] kernel/hung_task: Report top CPU consumers

From: zhouchuangao
Date: Fri May 14 2021 - 08:57:10 EST


1. If the task did not get scheduled for more than 2 minutes,
report top 3(By default) CPU consumers.

2. By default, the CPU utilization of each process in one minute
is calculated.

3. Add a new member last_cpu_time to task_struct to record the CPU
usage of the process at the beginning of the computation.

Signed-off-by: zhouchuangao <zhouchuangao@xxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/hung_task.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d5264b..103f98f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -973,6 +973,7 @@ struct task_struct {
unsigned long last_switch_count;
unsigned long last_switch_time;
unsigned long killed_time;
+ u64 last_cpu_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index bb2e3e1..fb5f944 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
+#include <linux/tick.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
@@ -21,7 +22,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/sched/sysctl.h>
-
+#include <linux/sched/cputime.h>
#include <trace/events/sched.h>

/*
@@ -55,6 +56,16 @@ static bool hung_task_show_lock;
static bool hung_task_call_panic;
static bool hung_task_show_all_bt;

+static u64 last_cpu_usage;
+static u64 interval_cpu_usage;
+
+#define NUM_CONSUMERS 3
+struct cpu_consumer {
+ char comm[TASK_COMM_LEN];
+ pid_t pid;
+ u64 cpu_used;
+};
+
static struct task_struct *watchdog_task;

#ifdef CONFIG_SMP
@@ -72,6 +83,145 @@ unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;

+#ifdef arch_idle_time
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+ u64 idle;
+
+ idle = kcs->cpustat[CPUTIME_IDLE];
+ if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+ idle += arch_idle_time(cpu);
+ return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+ u64 iowait;
+
+ iowait = kcs->cpustat[CPUTIME_IOWAIT];
+ if (cpu_online(cpu) && nr_iowait_cpu(cpu))
+ iowait += arch_idle_time(cpu);
+ return iowait;
+}
+#else
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+ u64 idle, idle_usecs = -1ULL;
+
+ if (cpu_online(cpu))
+ idle_usecs = get_cpu_idle_time_us(cpu, NULL);
+
+ if (idle_usecs == -1ULL)
+ /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
+ idle = kcs->cpustat[CPUTIME_IDLE];
+ else
+ idle = idle_usecs * NSEC_PER_USEC;
+
+ return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+ u64 iowait, iowait_usecs = -1ULL;
+
+ if (cpu_online(cpu))
+ iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
+
+ if (iowait_usecs == -1ULL)
+ /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
+ iowait = kcs->cpustat[CPUTIME_IOWAIT];
+ else
+ iowait = iowait_usecs * NSEC_PER_USEC;
+
+ return iowait;
+}
+#endif
+
+static void show_top_cpu_consumers(bool compute)
+{
+ int i, j;
+ struct task_struct *g, *t;
+ struct signal_struct *sig;
+ u64 cutime, cstime, utime, stime;
+ u64 task_cpu_time, interval_time;
+ struct cpu_consumer tcc[NUM_CONSUMERS];
+
+ memset(tcc, 0, sizeof(struct cpu_consumer) * NUM_CONSUMERS);
+
+ for_each_process_thread(g, t) {
+ sig = t->signal;
+ cutime = sig->cutime;
+ cstime = sig->cstime;
+ task_cputime_adjusted(t, &utime, &stime);
+ task_cpu_time = cutime + cstime + utime + stime;
+
+ if (compute) {
+ interval_time = task_cpu_time - t->last_cpu_time;
+ for (i = 0; i < NUM_CONSUMERS; i++) {
+ if (interval_time > tcc[i].cpu_used) {
+ for (j = NUM_CONSUMERS - 1; j > i; j--) {
+ strcpy(tcc[j].comm, tcc[j-1].comm);
+ tcc[j].pid = tcc[j-1].pid;
+ tcc[j].cpu_used = tcc[j-1].cpu_used;
+ }
+ strcpy(tcc[i].comm, t->comm);
+ tcc[i].pid = t->pid;
+ tcc[i].cpu_used = interval_time;
+ break;
+ }
+ }
+ } else
+ t->last_cpu_time = task_cpu_time;
+ }
+
+ if (compute) {
+ pr_info("hung task report top %d CPU consumers:\n", NUM_CONSUMERS);
+ pr_info("TOP COMM PID [TASK_CPU_TIME/ALL_CPU_TIME]\n");
+ for (i = 0; i < NUM_CONSUMERS; i++)
+ pr_info("Top%d %s %d [%lld/%lld]\n", i,
+ tcc[i].comm,
+ tcc[i].pid,
+ nsec_to_clock_t(tcc[i].cpu_used),
+ nsec_to_clock_t(interval_cpu_usage));
+ }
+}
+
+static void all_cpu_usage(bool compute)
+{
+ int i;
+ u64 user, nice, system, idle, iowait, irq, softirq, steal;
+ u64 guest, guest_nice;
+ u64 current_cpu_usage = 0;
+
+ user = nice = system = idle = iowait = irq = 0;
+ softirq = steal = guest = guest_nice = 0;
+
+ for_each_possible_cpu(i) {
+ struct kernel_cpustat kcpustat;
+ u64 *cpustat = kcpustat.cpustat;
+
+ kcpustat_cpu_fetch(&kcpustat, i);
+
+ user += cpustat[CPUTIME_USER];
+ nice += cpustat[CPUTIME_NICE];
+ system += cpustat[CPUTIME_SYSTEM];
+ idle += get_idle_time(&kcpustat, i);
+ iowait += get_iowait_time(&kcpustat, i);
+ irq += cpustat[CPUTIME_IRQ];
+ softirq += cpustat[CPUTIME_SOFTIRQ];
+ steal += cpustat[CPUTIME_STEAL];
+ guest += cpustat[CPUTIME_GUEST];
+ guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ }
+ current_cpu_usage = user + nice + system + idle + iowait +
+ irq + softirq + steal + guest + guest_nice;
+
+ if (compute)
+ interval_cpu_usage = current_cpu_usage - last_cpu_usage;
+ else
+ last_cpu_usage = current_cpu_usage;
+}
+
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
@@ -253,8 +403,15 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
trigger_all_cpu_backtrace();
}

- if (hung_task_call_panic)
+ if (hung_task_call_panic) {
+ all_cpu_usage(false);
+ show_top_cpu_consumers(false);
+ msleep(1000);
+ all_cpu_usage(true);
+ show_top_cpu_consumers(true);
+
panic("hung_task: blocked tasks");
+ }
}

static long hung_timeout_jiffies(unsigned long last_checked,
--
2.7.4