[PATCH 5/9] perf trace: Handle unpaired raw_syscalls:sys_exit event

From: Arnaldo Carvalho de Melo
Date: Fri Mar 31 2017 - 22:13:06 EST


From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>

Which may happen when we start a tracing session and a thread is waiting
for something like "poll" to return, in which case we better print "?"
both for the syscall entry timestamp and for the duration.

E.g.:

Tracing existing mutt session:

# perf trace -p `pidof mutt`
? ( ? ): mutt/17135 ... [continued]: poll()) = 1
0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1
0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
<SNIP>

Before it would print a large number because we'd do:

ttrace->entry_time - trace->base_time

And entry_time would be 0, while base_time would be the timestamp for
the first event 'perf trace' reads, oops.

Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Luis Claudio GonÃalves <lclaudio@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Wang Nan <wangnan0@xxxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@xxxxxxxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c88f9f215e6f..7379792a6504 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -821,12 +821,21 @@ struct syscall {
void **arg_parm;
};

-static size_t fprintf_duration(unsigned long t, FILE *fp)
+/*
+ * We need to have this 'calculated' boolean because in some cases we really
+ * don't know what is the duration of a syscall, for instance, when we start
+ * a session and some threads are waiting for a syscall to finish, say 'poll',
+ * in which case all we can do is to print "( ? ) for duration and for the
+ * start timestamp.
+ */
+static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
{
double duration = (double)t / NSEC_PER_MSEC;
size_t printed = fprintf(fp, "(");

- if (duration >= 1.0)
+ if (!calculated)
+ printed += fprintf(fp, " ? ");
+ else if (duration >= 1.0)
printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
else if (duration >= 0.01)
printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
@@ -1028,13 +1037,27 @@ static bool trace__filter_duration(struct trace *trace, double t)
return t < (trace->duration_filter * NSEC_PER_MSEC);
}

-static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
{
double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;

return fprintf(fp, "%10.3f ", ts);
}

+/*
+ * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
+ * using ttrace->entry_time for a thread that receives a sys_exit without
+ * first having received a sys_enter ("poll" issued before tracing session
+ * starts, lost sys_enter exit due to ring buffer overflow).
+ */
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+ if (tstamp > 0)
+ return __trace__fprintf_tstamp(trace, tstamp, fp);
+
+ return fprintf(fp, " ? ");
+}
+
static bool done = false;
static bool interrupted = false;

@@ -1045,10 +1068,10 @@ static void sig_handler(int sig)
}

static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
- u64 duration, u64 tstamp, FILE *fp)
+ u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
{
size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
- printed += fprintf_duration(duration, fp);
+ printed += fprintf_duration(duration, duration_calculated, fp);

if (trace->multiple_threads) {
if (trace->show_comm)
@@ -1450,7 +1473,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp

duration = sample->time - ttrace->entry_time;

- printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output);
+ printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
ttrace->entry_pending = false;

@@ -1497,7 +1520,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,

if (sc->is_exit) {
if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
- trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output);
+ trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
}
} else {
@@ -1545,6 +1568,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
{
long ret;
u64 duration = 0;
+ bool duration_calculated = false;
struct thread *thread;
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
@@ -1573,6 +1597,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
duration = sample->time - ttrace->entry_time;
if (trace__filter_duration(trace, duration))
goto out;
+ duration_calculated = true;
} else if (trace->duration_filter)
goto out;

@@ -1588,7 +1613,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
if (trace->summary_only)
goto out;

- trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output);
+ trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);

if (ttrace->entry_pending) {
fprintf(trace->output, "%-70s", ttrace->entry_str);
@@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *trace,
thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
sample->ip, &al);

- trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+ trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);

fprintf(trace->output, "%sfault [",
evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
--
2.9.3