Re: [PATCH 1/2] tracing: Add latency histograms

From: Carsten Emde
Date: Thu Jul 28 2016 - 16:02:47 EST


Binoy,

> [..]
> This patch provides a recording mechanism to store data of potential
> sources of system latencies. [..]
Thanks a lot! I would be more than happy and certainly will help and test as much
as I can to get this functionality upstream.

Some time ago, I provided a patch to add a context switch time histogram.
Unfortunately, this patch did not make it into the RT patch by some reason.
However, the context switch time is needed to completely elucidate any latency
that is seen from user space. The below patch only applies to the original
version of the histogram patch - not to your version. I am appending it just
for your information. I would be more than happy to convert the patch so it
applies on top of yours. However, I would not do it, if there were no chance
that it ever gets considered. Please let me know.

Thanks,
-Carsten.


--

From: Carsten Emde <C.Emde@xxxxxxxxx>
Date: Fri, 3 Oct 2014 22:14:55 +0100
Subject: Add trace latency histogram to monitor context switch time

The existing two on-line methods to record system latency (timer and
wakeup latency) do not completely reflect the user-space wakeup time,
since the duration of the context switch is not taken into account. This
patch adds two new histograms - one that records the duration of the
context switch and another one that records the sum of the timer delay,
the wakeup latency and the newly available duration of the context
switch. The latter histogram probably is best suitable to determine the
worst-case total preemption latency of a given system.

Signed-off-by: Carsten Emde <C.Emde@xxxxxxxxx>

---
include/linux/sched.h | 8
kernel/trace/Kconfig | 32 +++
kernel/trace/latency_hist.c | 427 ++++++++++++++++++++++++++++++++++++++------
3 files changed, 411 insertions(+), 56 deletions(-)

Index: linux-4.4.9-rt17/include/linux/sched.h
===================================================================
--- linux-4.4.9-rt17.orig/include/linux/sched.h
+++ linux-4.4.9-rt17/include/linux/sched.h
@@ -1828,7 +1828,15 @@ struct task_struct {
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
long timer_offset;
#endif
+#endif /* CONFIG_WAKEUP_LATENCY_HIST */
+#ifdef CONFIG_SWITCHTIME_HIST
+ u64 switchtime_timestamp_hist;
+ struct task_struct *switchtime_prev;
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ unsigned long wakeup_latency;
#endif
+#endif /* CONFIG_SWITCHTIME_HIST */
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg_in_oom;
Index: linux-4.4.9-rt17/kernel/trace/Kconfig
===================================================================
--- linux-4.4.9-rt17.orig/kernel/trace/Kconfig
+++ linux-4.4.9-rt17/kernel/trace/Kconfig
@@ -325,6 +325,38 @@ config MISSED_TIMER_OFFSETS_HIST

/sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup

+config SWITCHTIME_HIST
+ depends on HIGH_RES_TIMERS
+ depends on HAVE_SYSCALL_TRACEPOINTS
+ depends on WAKEUP_LATENCY_HIST
+ select GENERIC_TRACER
+ select FTRACE_SYSCALLS
+ bool "Context Switch Time Histogram"
+ help
+ Generate a histogram of the context switch duration per CPU. The
+ histograms are disabled by default. To enable them, write a non-zero
+ number to
+
+ /sys/kernel/debug/tracing/latency_hist/enable/switchtime
+
+ The histogram data will be located in the debug file system at
+
+ /sys/kernel/debug/tracing/latency_hist/switchtime
+
+ If both Scheduling Latency Histogram, Missed Timer Offsets and the
+ newly provided Context Switch Time Histogram are selected, additional
+ histogram data will be collected from the sum of the wakeup latency,
+ the timer latency, if available, and the switch time per CPU. These
+ histograms are available in the
+
+ /sys/kernel/debug/tracing/latency_hist/timerwakeupswitch
+
+ directory. They reflect the apparent interrupt and scheduling latency
+ and are best suitable to determine the worst-case latency of a given
+ system. To enable these histograms, write a non-zero number to
+
+ /sys/kernel/debug/tracing/latency_hist/enable/timerwakeupswitch
+
config ENABLE_DEFAULT_TRACERS
bool "Trace process context switches and events"
depends on !GENERIC_TRACER
Index: linux-4.4.9-rt17/kernel/trace/latency_hist.c
===================================================================
--- linux-4.4.9-rt17.orig/kernel/trace/latency_hist.c
+++ linux-4.4.9-rt17/kernel/trace/latency_hist.c
@@ -23,11 +23,15 @@
#include <linux/sched/rt.h>
#include <linux/slab.h>
#include <linux/atomic.h>
+#include <trace/syscall.h>
#include <asm/div64.h>
+#include <asm/syscall.h>

#include "trace.h"
#include <trace/events/sched.h>

+extern struct tracepoint __tracepoint_sys_exit;
+
#define NSECS_PER_USECS 1000L

#define CREATE_TRACE_POINTS
@@ -41,6 +45,8 @@ enum {
WAKEUP_LATENCY_SHAREDPRIO,
MISSED_TIMER_OFFSETS,
TIMERANDWAKEUP_LATENCY,
+ SWITCHTIME,
+ TIMERWAKEUPSWITCH_LATENCY,
MAX_LATENCY_TYPE,
};

@@ -104,8 +110,10 @@ struct maxlatproc_data {
int prio;
int current_prio;
long latency;
- long timeroffset;
+ unsigned long timer_offset;
+ long wakeup_latency;
cycle_t timestamp;
+ int nr;
};
#endif

@@ -144,6 +152,19 @@ static DEFINE_PER_CPU(struct maxlatproc_
static unsigned long missed_timer_offsets_pid;
#endif

+#ifdef CONFIG_SWITCHTIME_HIST
+static DEFINE_PER_CPU(struct hist_data, switchtime);
+static char *switchtime_dir = "switchtime";
+static notrace void probe_syscall_exit(void *v, struct pt_regs *regs, long ret);
+static struct enable_data switchtime_enabled_data = {
+ .latency_type = SWITCHTIME,
+ .enabled = 0,
+};
+static DEFINE_PER_CPU(struct maxlatproc_data, switchtime_maxlatproc);
+static DEFINE_PER_CPU(struct task_struct *, switchtime_task);
+static unsigned long switchtime_pid;
+#endif
+
#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
@@ -155,13 +176,26 @@ static struct enable_data timerandwakeup
static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
#endif

+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+ defined(CONFIG_SWITCHTIME_HIST)
+static DEFINE_PER_CPU(struct hist_data, timerwakeupswitch_latency_hist);
+static char *timerwakeupswitch_latency_hist_dir = "timerwakeupswitch";
+static struct enable_data timerwakeupswitch_enabled_data = {
+ .latency_type = TIMERWAKEUPSWITCH_LATENCY,
+ .enabled = 0,
+};
+static DEFINE_PER_CPU(struct maxlatproc_data, timerwakeupswitch_maxlatproc);
+#endif
+
void notrace latency_hist(int latency_type, int cpu, long latency,
- long timeroffset, cycle_t stop,
- struct task_struct *p)
+ unsigned long timer_offset, long wakeup_latency,
+ cycle_t stop, struct task_struct *p, int nr)
{
struct hist_data *my_hist;
#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+ defined(CONFIG_SWITCHTIME_HIST)
struct maxlatproc_data *mp = NULL;
#endif

@@ -201,6 +235,12 @@ void notrace latency_hist(int latency_ty
mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
break;
#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+ case SWITCHTIME:
+ my_hist = &per_cpu(switchtime, cpu);
+ mp = &per_cpu(switchtime_maxlatproc, cpu);
+ break;
+#endif
#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
case TIMERANDWAKEUP_LATENCY:
@@ -208,6 +248,14 @@ void notrace latency_hist(int latency_ty
mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+ defined(CONFIG_SWITCHTIME_HIST)
+ case TIMERWAKEUPSWITCH_LATENCY:
+ my_hist = &per_cpu(timerwakeupswitch_latency_hist, cpu);
+ mp = &per_cpu(timerwakeupswitch_maxlatproc, cpu);
+ break;
+#endif

default:
return;
@@ -229,21 +277,38 @@ void notrace latency_hist(int latency_ty
if (unlikely(latency > my_hist->max_lat ||
my_hist->min_lat == LONG_MAX)) {
#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+ defined(CONFIG_SWITCHTIME_HIST)
if (latency_type == WAKEUP_LATENCY ||
latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
latency_type == MISSED_TIMER_OFFSETS ||
- latency_type == TIMERANDWAKEUP_LATENCY) {
- strncpy(mp->comm, p->comm, sizeof(mp->comm));
- strncpy(mp->current_comm, current->comm,
- sizeof(mp->current_comm));
- mp->pid = task_pid_nr(p);
- mp->current_pid = task_pid_nr(current);
- mp->prio = p->prio;
- mp->current_prio = current->prio;
+ latency_type == TIMERANDWAKEUP_LATENCY ||
+ latency_type == SWITCHTIME ||
+ latency_type == TIMERWAKEUPSWITCH_LATENCY) {
+ if (latency_type == SWITCHTIME ||
+ latency_type == TIMERWAKEUPSWITCH_LATENCY) {
+ strncpy(mp->current_comm, p->comm,
+ sizeof(mp->current_comm));
+ strncpy(mp->comm, current->comm,
+ sizeof(mp->comm));
+ mp->current_pid = task_pid_nr(p);
+ mp->pid = task_pid_nr(current);
+ mp->current_prio = p->prio;
+ mp->prio = current->prio;
+ } else {
+ strncpy(mp->comm, p->comm, sizeof(mp->comm));
+ strncpy(mp->current_comm, current->comm,
+ sizeof(mp->current_comm));
+ mp->pid = task_pid_nr(p);
+ mp->current_pid = task_pid_nr(current);
+ mp->prio = p->prio;
+ mp->current_prio = current->prio;
+ }
mp->latency = latency;
- mp->timeroffset = timeroffset;
+ mp->timer_offset = timer_offset;
+ mp->wakeup_latency = wakeup_latency;
mp->timestamp = stop;
+ mp->nr = nr;
}
#endif
my_hist->max_lat = latency;
@@ -366,8 +431,9 @@ static void clear_maxlatprocdata(struct
{
mp->comm[0] = mp->current_comm[0] = '\0';
mp->prio = mp->current_prio = mp->pid = mp->current_pid =
- mp->latency = mp->timeroffset = -1;
+ mp->latency = mp->timer_offset = mp->wakeup_latency = -1;
mp->timestamp = 0;
+ mp->nr = 0;
}
#endif

@@ -426,6 +492,12 @@ latency_hist_reset(struct file *file, co
mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
break;
#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+ case SWITCHTIME:
+ hist = &per_cpu(switchtime, cpu);
+ mp = &per_cpu(switchtime_maxlatproc, cpu);
+ break;
+#endif
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
case MISSED_TIMER_OFFSETS:
hist = &per_cpu(missed_timer_offsets, cpu);
@@ -439,15 +511,26 @@ latency_hist_reset(struct file *file, co
mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+ defined(CONFIG_SWITCHTIME_HIST)
+ case TIMERWAKEUPSWITCH_LATENCY:
+ hist = &per_cpu(timerwakeupswitch_latency_hist, cpu);
+ mp = &per_cpu(timerwakeupswitch_maxlatproc, cpu);
+ break;
+#endif
}

hist_reset(hist);
#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+ defined(CONFIG_SWITCHTIME_HIST)
if (latency_type == WAKEUP_LATENCY ||
latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
latency_type == MISSED_TIMER_OFFSETS ||
- latency_type == TIMERANDWAKEUP_LATENCY)
+ latency_type == TIMERANDWAKEUP_LATENCY ||
+ latency_type == SWITCHTIME ||
+ latency_type == TIMERWAKEUPSWITCH_LATENCY)
clear_maxlatprocdata(mp);
#endif
}
@@ -456,7 +539,8 @@ latency_hist_reset(struct file *file, co
}

#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+ defined(CONFIG_SWITCHTIME_HIST)
static ssize_t
show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
{
@@ -499,7 +583,7 @@ show_maxlatproc(struct file *file, char
{
int r;
struct maxlatproc_data *mp = file->private_data;
- int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
+ int strmaxlen = (TASK_COMM_LEN * 2) + 32 + (8 * 8);
unsigned long long t;
unsigned long usecs, secs;
char *buf;
@@ -518,10 +602,18 @@ show_maxlatproc(struct file *file, char
usecs = do_div(t, USEC_PER_SEC);
secs = (unsigned long) t;
r = snprintf(buf, strmaxlen,
- "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
- MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
- mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
+ "%d %d %ld (%ld,%ld) %s <- %d %d %s %lu.%06lu", mp->pid,
+ MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timer_offset,
+ mp->wakeup_latency, mp->comm, mp->current_pid,
+ MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
secs, usecs);
+#ifdef CONFIG_SWITCHTIME_HIST
+ if (mp->nr >= 0 && mp->nr < NR_syscalls)
+ r += snprintf(buf + r, strmaxlen - r, " %pf",
+ sys_call_table[mp->nr]);
+#endif
+ r += snprintf(buf + r, strmaxlen - r, "\n");
+
r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
kfree(buf);
return r;
@@ -569,9 +661,7 @@ do_enable(struct file *file, const char
ret = register_trace_preemptirqsoff_hist(
probe_preemptirqsoff_hist, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_preemptirqsoff_hist "
- "to trace_preemptirqsoff_hist\n");
+ pr_info("Couldn't register preemptirqsoff_hist probe\n");
return ret;
}
break;
@@ -581,17 +671,13 @@ do_enable(struct file *file, const char
ret = register_trace_sched_wakeup(
probe_wakeup_latency_hist_start, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_start "
- "to trace_sched_wakeup\n");
+ pr_info("Couldn't register sched_wakeup probe\n");
return ret;
}
ret = register_trace_sched_wakeup_new(
probe_wakeup_latency_hist_start, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_start "
- "to trace_sched_wakeup_new\n");
+ pr_info("Couldn't register sched_wakeup_new probe\n");
unregister_trace_sched_wakeup(
probe_wakeup_latency_hist_start, NULL);
return ret;
@@ -599,9 +685,7 @@ do_enable(struct file *file, const char
ret = register_trace_sched_switch(
probe_wakeup_latency_hist_stop, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_stop "
- "to trace_sched_switch\n");
+ pr_info("Couldn't register sched_switch probe\n");
unregister_trace_sched_wakeup(
probe_wakeup_latency_hist_start, NULL);
unregister_trace_sched_wakeup_new(
@@ -611,9 +695,7 @@ do_enable(struct file *file, const char
ret = register_trace_sched_migrate_task(
probe_sched_migrate_task, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_sched_migrate_task "
- "to trace_sched_migrate_task\n");
+ pr_info("Couldn't register sched_migrate_task probe\n");
unregister_trace_sched_wakeup(
probe_wakeup_latency_hist_start, NULL);
unregister_trace_sched_wakeup_new(
@@ -629,9 +711,19 @@ do_enable(struct file *file, const char
ret = register_trace_hrtimer_interrupt(
probe_hrtimer_interrupt, NULL);
if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_hrtimer_interrupt "
- "to trace_hrtimer_interrupt\n");
+ pr_info("Couldn't register hrtimer IRQ probe\n");
+ return ret;
+ }
+ break;
+#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+ case SWITCHTIME:
+ if (!wakeup_latency_enabled_data.enabled)
+ return -EINVAL;
+ ret = tracepoint_probe_register(&__tracepoint_sys_exit,
+ probe_syscall_exit, NULL);
+ if (ret) {
+ pr_info("Couldn't register sys_exit probe\n");
return ret;
}
break;
@@ -644,6 +736,16 @@ do_enable(struct file *file, const char
return -EINVAL;
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+ defined(CONFIG_SWITCHTIME_HIST)
+ case TIMERWAKEUPSWITCH_LATENCY:
+ if (!wakeup_latency_enabled_data.enabled ||
+ !missed_timer_offsets_enabled_data.enabled ||
+ !switchtime_enabled_data.enabled)
+ return -EINVAL;
+ break;
+#endif
default:
break;
}
@@ -695,6 +797,12 @@ do_enable(struct file *file, const char
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
timerandwakeup_enabled_data.enabled = 0;
#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+ switchtime_enabled_data.enabled = 0;
+#endif
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+ timerwakeupswitch_enabled_data.enabled = 0;
+#endif
break;
#endif
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
@@ -704,6 +812,25 @@ do_enable(struct file *file, const char
#ifdef CONFIG_WAKEUP_LATENCY_HIST
timerandwakeup_enabled_data.enabled = 0;
#endif
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+ timerwakeupswitch_enabled_data.enabled = 0;
+#endif
+ break;
+#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+ case SWITCHTIME:
+ {
+ int cpu;
+
+ tracepoint_probe_unregister(&__tracepoint_sys_exit,
+ probe_syscall_exit, NULL);
+
+ for_each_online_cpu(cpu)
+ per_cpu(switchtime_task, cpu) = NULL;
+ }
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+ timerwakeupswitch_enabled_data.enabled = 0;
+#endif
break;
#endif
default:
@@ -797,7 +924,7 @@ static notrace void probe_preemptirqsoff
NSECS_PER_USECS;

latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
- stop, NULL);
+ 0, stop, NULL, -1);
}
per_cpu(hist_irqsoff_counting, cpu) = 0;
}
@@ -815,7 +942,7 @@ static notrace void probe_preemptirqsoff
NSECS_PER_USECS;

latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
- 0, stop, NULL);
+ 0, 0, stop, NULL, -1);
}
per_cpu(hist_preemptoff_counting, cpu) = 0;
}
@@ -834,7 +961,7 @@ static notrace void probe_preemptirqsoff
NSECS_PER_USECS;

latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
- latency, 0, stop, NULL);
+ latency, 0, 0, stop, NULL, -1);
}
per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
}
@@ -845,6 +972,9 @@ static notrace void probe_preemptirqsoff

#ifdef CONFIG_WAKEUP_LATENCY_HIST
static DEFINE_RAW_SPINLOCK(wakeup_lock);
+#ifdef CONFIG_SWITCHTIME_HIST
+static DEFINE_RAW_SPINLOCK(switchtime_lock);
+#endif
static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
int cpu)
{
@@ -855,7 +985,6 @@ static notrace void probe_sched_migrate_
struct task_struct *cpu_wakeup_task;

raw_spin_lock_irqsave(&wakeup_lock, flags);
-
cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
if (task == cpu_wakeup_task) {
put_task_struct(cpu_wakeup_task);
@@ -863,8 +992,24 @@ static notrace void probe_sched_migrate_
cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
get_task_struct(cpu_wakeup_task);
}
-
raw_spin_unlock_irqrestore(&wakeup_lock, flags);
+
+#ifdef CONFIG_SWITCHTIME_HIST
+ {
+ struct task_struct *cpu_switchtime_task;
+
+ raw_spin_lock_irqsave(&switchtime_lock, flags);
+ cpu_switchtime_task = per_cpu(switchtime_task, old_cpu);
+ if (task == cpu_switchtime_task) {
+ put_task_struct(cpu_switchtime_task);
+ per_cpu(switchtime_task, old_cpu) = NULL;
+ cpu_switchtime_task =
+ per_cpu(switchtime_task, cpu) = task;
+ get_task_struct(cpu_switchtime_task);
+ }
+ raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+ }
+#endif
}
}

@@ -876,8 +1021,23 @@ static notrace void probe_wakeup_latency
int cpu = task_cpu(p);
struct task_struct *cpu_wakeup_task;

- raw_spin_lock_irqsave(&wakeup_lock, flags);
+#ifdef CONFIG_SWITCHTIME_HIST
+ struct task_struct *cpu_switchtime_task;
+
+ if (!switchtime_pid) {
+ raw_spin_lock_irqsave(&switchtime_lock, flags);
+ cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+ if (cpu_switchtime_task &&
+ p->prio < cpu_switchtime_task->prio) {
+ cpu_switchtime_task->switchtime_timestamp_hist = 0;
+ put_task_struct(cpu_switchtime_task);
+ per_cpu(switchtime_task, cpu) = NULL;
+ }
+ raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+ }
+#endif

+ raw_spin_lock_irqsave(&wakeup_lock, flags);
cpu_wakeup_task = per_cpu(wakeup_task, cpu);

if (wakeup_pid) {
@@ -914,6 +1074,7 @@ static notrace void probe_wakeup_latency
long latency;
cycle_t stop;
struct task_struct *cpu_wakeup_task;
+ int hit = 0;

raw_spin_lock_irqsave(&wakeup_lock, flags);

@@ -949,28 +1110,52 @@ static notrace void probe_wakeup_latency
NSECS_PER_USECS;

if (per_cpu(wakeup_sharedprio, cpu)) {
- latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
- next);
+ latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, 0,
+ stop, next, -1);
per_cpu(wakeup_sharedprio, cpu) = 0;
} else {
- latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
+ latency_hist(WAKEUP_LATENCY, cpu, latency, 0, 0, stop, next,
+ -1);
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
if (timerandwakeup_enabled_data.enabled) {
latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
- next->timer_offset + latency, next->timer_offset,
- stop, next);
+ next->timer_offset + latency, next->timer_offset, 0,
+ stop, next, -1);
}
+ hit = 1;
#endif
}

out_reset:
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- next->timer_offset = 0;
-#endif
put_task_struct(cpu_wakeup_task);
per_cpu(wakeup_task, cpu) = NULL;
out:
raw_spin_unlock_irqrestore(&wakeup_lock, flags);
+
+#ifdef CONFIG_SWITCHTIME_HIST
+ if (hit && switchtime_enabled_data.enabled &&
+ (switchtime_pid == 0 || task_pid_nr(next) == switchtime_pid)) {
+ unsigned long flags;
+ struct task_struct *cpu_switchtime_task;
+
+ raw_spin_lock_irqsave(&switchtime_lock, flags);
+
+ cpu_switchtime_task = per_cpu(switchtime_task, cpu) =
+ next;
+ get_task_struct(cpu_switchtime_task);
+
+ raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+
+ next->switchtime_timestamp_hist = stop;
+ next->wakeup_latency = latency;
+ next->switchtime_prev = current;
+ }
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ else
+ next->timer_offset = 0;
+#endif
+
+#endif
}
#endif

@@ -992,10 +1177,28 @@ static notrace void probe_hrtimer_interr
return;
}

+#ifdef CONFIG_SWITCHTIME_HIST
+ if (!switchtime_pid) {
+ unsigned long flags;
+ struct task_struct *cpu_switchtime_task;
+
+ raw_spin_lock_irqsave(&switchtime_lock, flags);
+ cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+ if (cpu_switchtime_task &&
+ task->prio < cpu_switchtime_task->prio) {
+ cpu_switchtime_task->switchtime_timestamp_hist =
+ 0;
+ put_task_struct(cpu_switchtime_task);
+ per_cpu(switchtime_task, cpu) = NULL;
+ }
+ raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+ }
+#endif
+
now = ftrace_now(cpu);
latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
- latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
- task);
+ latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, 0, 0, now,
+ task, -1);
#ifdef CONFIG_WAKEUP_LATENCY_HIST
task->timer_offset = latency;
#endif
@@ -1003,6 +1206,66 @@ static notrace void probe_hrtimer_interr
}
#endif

+#ifdef CONFIG_SWITCHTIME_HIST
+static notrace void probe_syscall_exit(void *v, struct pt_regs *regs, long ret)
+{
+ unsigned long flags;
+ int cpu = task_cpu(current);
+ struct task_struct *cpu_switchtime_task;
+
+ raw_spin_lock_irqsave(&switchtime_lock, flags);
+
+ cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+ if (cpu_switchtime_task == NULL)
+ goto out;
+
+ if (current == cpu_switchtime_task) {
+ unsigned long latency;
+ cycle_t stop;
+ int syscall_nr;
+
+ if (switchtime_pid) {
+ if (likely(switchtime_pid !=
+ task_pid_nr(current)))
+ goto out;
+ }
+
+ /*
+ * The task we are waiting for exited from sytem call.
+ * Calculate latency since start of context switch and store it
+ * in histogram.
+ */
+ stop = ftrace_now(raw_smp_processor_id());
+
+ latency = ((long) (stop - current->switchtime_timestamp_hist)) /
+ NSECS_PER_USECS;
+
+ syscall_nr = syscall_get_nr(current, regs);
+
+ current->switchtime_timestamp_hist = 0;
+
+ latency_hist(SWITCHTIME, cpu, latency, 0, 0, stop,
+ current->switchtime_prev, syscall_nr);
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ if (timerwakeupswitch_enabled_data.enabled) {
+ latency_hist(TIMERWAKEUPSWITCH_LATENCY, cpu,
+ current->timer_offset + current->wakeup_latency +
+ latency, current->timer_offset,
+ current->wakeup_latency, stop,
+ current->switchtime_prev, syscall_nr);
+ }
+ current->timer_offset = current->wakeup_latency = 0;
+#endif
+ }
+ put_task_struct(cpu_switchtime_task);
+ per_cpu(switchtime_task, cpu) = NULL;
+
+out:
+ raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+}
+#endif
+
static __init int latency_hist_init(void)
{
struct dentry *latency_hist_root = NULL;
@@ -1147,6 +1410,32 @@ static __init int latency_hist_init(void
&enable_fops);
#endif

+#ifdef CONFIG_SWITCHTIME_HIST
+ dentry = debugfs_create_dir(switchtime_dir,
+ latency_hist_root);
+ for_each_possible_cpu(i) {
+ sprintf(name, cpufmt, i);
+ entry = debugfs_create_file(name, 0444, dentry,
+ &per_cpu(switchtime, i), &latency_hist_fops);
+ my_hist = &per_cpu(switchtime, i);
+ atomic_set(&my_hist->hist_mode, 1);
+ my_hist->min_lat = LONG_MAX;
+
+ sprintf(name, cpufmt_maxlatproc, i);
+ mp = &per_cpu(switchtime_maxlatproc, i);
+ entry = debugfs_create_file(name, 0444, dentry, mp,
+ &maxlatproc_fops);
+ clear_maxlatprocdata(mp);
+ }
+ entry = debugfs_create_file("pid", 0644, dentry,
+ (void *)&switchtime_pid, &pid_fops);
+ entry = debugfs_create_file("reset", 0644, dentry,
+ (void *)SWITCHTIME, &latency_hist_reset_fops);
+ entry = debugfs_create_file("switchtime", 0644,
+ enable_root, (void *)&switchtime_enabled_data,
+ &enable_fops);
+#endif
+
#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
@@ -1172,6 +1461,32 @@ static __init int latency_hist_init(void
enable_root, (void *)&timerandwakeup_enabled_data,
&enable_fops);
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+ defined(CONFIG_SWITCHTIME_HIST)
+ dentry = debugfs_create_dir(timerwakeupswitch_latency_hist_dir,
+ latency_hist_root);
+ for_each_possible_cpu(i) {
+ sprintf(name, cpufmt, i);
+ entry = debugfs_create_file(name, 0444, dentry,
+ &per_cpu(timerwakeupswitch_latency_hist, i),
+ &latency_hist_fops);
+ my_hist = &per_cpu(timerwakeupswitch_latency_hist, i);
+ atomic_set(&my_hist->hist_mode, 1);
+ my_hist->min_lat = LONG_MAX;
+
+ sprintf(name, cpufmt_maxlatproc, i);
+ mp = &per_cpu(timerwakeupswitch_maxlatproc, i);
+ entry = debugfs_create_file(name, 0444, dentry, mp,
+ &maxlatproc_fops);
+ clear_maxlatprocdata(mp);
+ }
+ entry = debugfs_create_file("reset", 0644, dentry,
+ (void *)TIMERWAKEUPSWITCH_LATENCY, &latency_hist_reset_fops);
+ entry = debugfs_create_file("timerwakeupswitch", 0644,
+ enable_root, (void *)&timerwakeupswitch_enabled_data,
+ &enable_fops);
+#endif
return 0;
}