Re: [RFC/PATCH] tracing: show total NMI time in hwlat tracer

From: Steven Rostedt
Date: Wed Dec 14 2016 - 11:17:36 EST



[ added the forgotten LKML ;-) ]

On Wed, 14 Dec 2016 09:53:01 -0500
Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:

> On Wed, 14 Dec 2016 16:13:51 +0900
> Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>
> > Hi Steve,
> >
> > While looking at the code I found the below. I'm not sure the current
> > code is correct but it seems that nmi_total_ts would have *total* time
> > of NMI handler execution IMHO. Or maybe keeping longest time is enough.
> >
> > Currently nmi_total_ts has the duration of the last occurrence of NMI.
> > But I think it should keep all of NMI handling time.
> >
>
> Yeah, that looks like it should be accumulative.

Actually, I take that back. We don't care what the accumulative NMI
time of the entire sample width is. But we do care what the
accumulative NMI was of each individual loop, as that's what we are
timing.

I'm wondering if we keep a total accumulative count and then take a
snapshot of it in the loop, and record the difference in each iteration.

I added a patch below on top of your patch.

>
> -- Steve
>
> > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> > ---
> > kernel/trace/trace_hwlat.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
> > index 775569ec50d0..0b1ead11658e 100644
> > --- a/kernel/trace/trace_hwlat.c
> > +++ b/kernel/trace/trace_hwlat.c
> > @@ -151,7 +151,7 @@ void trace_hwlat_callback(bool enter)
> > if (enter)
> > nmi_ts_start = time_get();
> > else
> > - nmi_total_ts = time_get() - nmi_ts_start;
> > + nmi_total_ts += time_get() - nmi_ts_start;
> > }
> >
> > if (enter)
>


---
kernel/trace/trace_entries.h | 1 +
kernel/trace/trace_hwlat.c | 25 +++++++++++++++++++++++--
kernel/trace/trace_output.c | 5 ++++-
3 files changed, 28 insertions(+), 3 deletions(-)

Index: linux-trace.git/kernel/trace/trace_entries.h
===================================================================
--- linux-trace.git.orig/kernel/trace/trace_entries.h 2016-11-30 13:08:00.701199978 -0500
+++ linux-trace.git/kernel/trace/trace_entries.h 2016-12-14 10:11:55.461704990 -0500
@@ -346,6 +346,7 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
__field( u64, duration )
__field( u64, outer_duration )
__field( u64, nmi_total_ts )
+ __field( u64, nmi_max_ts )
__field_struct( struct timespec, timestamp )
__field_desc( long, timestamp, tv_sec )
__field_desc( long, timestamp, tv_nsec )
Index: linux-trace.git/kernel/trace/trace_hwlat.c
===================================================================
--- linux-trace.git.orig/kernel/trace/trace_hwlat.c 2016-12-14 10:09:49.937878304 -0500
+++ linux-trace.git/kernel/trace/trace_hwlat.c 2016-12-14 11:11:52.940771118 -0500
@@ -82,6 +82,7 @@ struct hwlat_sample {
u64 duration; /* delta */
u64 outer_duration; /* delta (outer loop) */
u64 nmi_total_ts; /* Total time spent in NMIs */
+ u64 nmi_max_ts; /* Max time spent in NMIs */
struct timespec timestamp; /* wall time */
int nmi_count; /* # NMIs during this sample */
};
@@ -124,6 +125,7 @@ static void trace_hwlat_sample(struct hw
entry->outer_duration = sample->outer_duration;
entry->timestamp = sample->timestamp;
entry->nmi_total_ts = sample->nmi_total_ts;
+ entry->nmi_max_ts = sample->nmi_max_ts;
entry->nmi_count = sample->nmi_count;

if (!call_filter_check_discard(call, entry, buffer, event))
@@ -168,11 +170,12 @@ void trace_hwlat_callback(bool enter)
static int get_sample(void)
{
struct trace_array *tr = hwlat_trace;
- time_type start, t1, t2, last_t2;
- s64 diff, total, last_total = 0;
+ time_type start, t1, t2, last_t2, nmi_t1, nmi_t2;
+ s64 diff, total, last_total = 0, last_nmi = 0;
u64 sample = 0;
u64 thresh = tracing_thresh;
u64 outer_sample = 0;
+ u64 nmi_sample = 0;
int ret = -1;

do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
@@ -190,9 +193,15 @@ static int get_sample(void)

do {

+ nmi_t1 = READ_ONCE(nmi_total_ts);
+ barrier();
+
t1 = time_get(); /* we'll look for a discontinuity */
t2 = time_get();

+ barrier();
+ nmi_t2 = READ_ONCE(nmi_total_ts);
+
if (time_u64(last_t2)) {
/* Check the delta from outer loop (t2 to next t1) */
diff = time_to_us(time_sub(t1, last_t2));
@@ -203,8 +212,13 @@ static int get_sample(void)
}
if (diff > outer_sample)
outer_sample = diff;
+
+ diff = nmi_t1 - last_nmi;
+ if (diff > nmi_sample)
+ nmi_sample = diff;
}
last_t2 = t2;
+ last_nmi = nmi_t2;

total = time_to_us(time_sub(t2, start)); /* sample width */

@@ -227,6 +241,10 @@ static int get_sample(void)
if (diff > sample)
sample = diff; /* only want highest value */

+ diff = nmi_t2 - nmi_t1;
+ if (diff > nmi_sample)
+ nmi_sample = diff;
+
} while (total <= hwlat_data.sample_width);

barrier(); /* finish the above in the view for NMIs */
@@ -244,6 +262,8 @@ static int get_sample(void)
/* We read in microseconds */
if (nmi_total_ts)
do_div(nmi_total_ts, NSEC_PER_USEC);
+ if (nmi_sample)
+ do_div(nmi_sample, NSEC_PER_USEC);

hwlat_data.count++;
s.seqnum = hwlat_data.count;
@@ -251,6 +271,7 @@ static int get_sample(void)
s.outer_duration = outer_sample;
s.timestamp = CURRENT_TIME;
s.nmi_total_ts = nmi_total_ts;
+ s.nmi_max_ts = nmi_sample;
s.nmi_count = nmi_count;
trace_hwlat_sample(&s);

Index: linux-trace.git/kernel/trace/trace_output.c
===================================================================
--- linux-trace.git.orig/kernel/trace/trace_output.c 2016-11-30 13:08:00.726199562 -0500
+++ linux-trace.git/kernel/trace/trace_output.c 2016-12-14 10:33:28.581929129 -0500
@@ -1121,9 +1121,12 @@ trace_hwlat_print(struct trace_iterator
* The generic sched_clock() is not NMI safe, thus
* we only record the count and not the time.
*/
- if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK))
+ if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
trace_seq_printf(s, " nmi-total:%llu",
field->nmi_total_ts);
+ trace_seq_printf(s, " nmi-max:%llu",
+ field->nmi_max_ts);
+ }
trace_seq_printf(s, " nmi-count:%u",
field->nmi_count);
}