[PATCH v3 2/2] perf: add page fault duration measures in perf trace
From: Alexis Berlemont
Date: Thu Nov 17 2016 - 19:26:48 EST
Thanks to the tracepoints exceptions:page_fault_kernel_exit and
exceptions:page_fault_user_exit, the subcommand perf-trace can now
print the duration of page-fault exception handling just like it is
done for syscalls.
Here is the result; the page fault durations are indicated in
parenthesis (like syscalls):
Here is an example; the durations are indicated in parenthesis just
like with syscalls:
0.053 ( 0.053 ms): ls/152 minfault [__clear_user+0x25] => 0x694e7e (?k)
0.114 ( 0.027 ms): ls/152 minfault [__clear_user+0x25] => 0x7fb912eaafc0 (?k)
0.201 ( 0.030 ms): ls/152 minfault [_start+0x0] => /lib/ld-2.23.so@0xc70 (x.)
...
Signed-off-by: Alexis Berlemont <alexis.berlemont@xxxxxxxxx>
---
tools/perf/Documentation/perf-trace.txt | 4 +-
tools/perf/builtin-trace.c | 231 ++++++++++++++++++++++++++++----
2 files changed, 208 insertions(+), 27 deletions(-)
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 781b019..53c103c 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,7 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
-F=[all|min|maj]::
--pf=[all|min|maj]::
Trace pagefaults. Optionally, you can specify whether you want minor,
- major or all pagefaults. Default value is maj.
+ major or all pagefaults. Default value is maj. Durations of
+ page-fault handling will be printed if possible (need for some
+ architecture-dependent tracepoints).
--syscalls::
Trace system calls. This options is enabled by default.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5f45166..f0957a8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -861,6 +861,8 @@ struct thread_trace {
} paths;
struct intlist *syscall_stats;
+ u64 pgfault_entry_time;
+ char *pgfault_entry_str;
};
static struct thread_trace *thread_trace__new(void)
@@ -1797,21 +1799,56 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
return 0;
}
-static void print_location(FILE *f, struct perf_sample *sample,
- struct addr_location *al,
- bool print_dso, bool print_sym)
+static int trace__pgfault_enter(struct trace *trace,
+ struct perf_evsel *evsel __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
{
+ struct thread *thread;
+ struct thread_trace *ttrace;
+ int err = -1;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ if (!thread)
+ goto out;
+
+ ttrace = thread__trace(thread, trace->output);
+ if (ttrace == NULL)
+ goto out_put;
+
+ ttrace->pgfault_entry_time = sample->time;
+
+out:
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static size_t scnprintf_location(char *bf, size_t size,
+ struct perf_sample *sample,
+ struct addr_location *al,
+ bool print_dso, bool print_sym)
+{
+ size_t printed = 0;
if ((verbose || print_dso) && al->map)
- fprintf(f, "%s@", al->map->dso->long_name);
+ printed += scnprintf(bf + printed,
+ size - printed, "%s@", al->map->dso->long_name);
if ((verbose || print_sym) && al->sym)
- fprintf(f, "%s+0x%" PRIx64, al->sym->name,
- al->addr - al->sym->start);
+ printed += scnprintf(bf + printed,
+ size - printed,
+ "%s+0x%" PRIx64, al->sym->name,
+ al->addr - al->sym->start);
else if (al->map)
- fprintf(f, "0x%" PRIx64, al->addr);
+ printed += scnprintf(bf + printed,
+ size - printed, "0x%" PRIx64, al->addr);
else
- fprintf(f, "0x%" PRIx64, sample->addr);
+ printed += scnprintf(bf + printed,
+ size - printed, "0x%" PRIx64, sample->addr);
+
+ return printed;
}
static int trace__pgfault(struct trace *trace,
@@ -1823,13 +1860,22 @@ static int trace__pgfault(struct trace *trace,
struct addr_location al;
char map_type = 'd';
struct thread_trace *ttrace;
+ size_t printed = 0;
int err = -1;
int callchain_ret = 0;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ if (!thread)
+ goto out;
- if (sample->callchain) {
- callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ ttrace = thread__trace(thread, trace->output);
+ if (ttrace == NULL)
+ goto out_put;
+
+ if (!ttrace->pgfault_entry_time && sample->callchain) {
+ callchain_ret =
+ trace__resolve_callchain(trace, evsel,
+ sample, &callchain_cursor);
if (callchain_ret == 0) {
if (callchain_cursor.nr < trace->min_stack)
goto out_put;
@@ -1837,10 +1883,6 @@ static int trace__pgfault(struct trace *trace,
}
}
- ttrace = thread__trace(thread, trace->output);
- if (ttrace == NULL)
- goto out_put;
-
if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
ttrace->pfmaj++;
else
@@ -1849,18 +1891,27 @@ static int trace__pgfault(struct trace *trace,
if (trace->summary_only)
goto out;
+ if (ttrace->pgfault_entry_str == NULL) {
+ ttrace->pgfault_entry_str = malloc(trace__entry_str_size);
+ if (!ttrace->pgfault_entry_str)
+ goto out_put;
+ ttrace->pgfault_entry_str[0] = '\0';
+ }
+
thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
sample->ip, &al);
- trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
-
- fprintf(trace->output, "%sfault [",
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
- "maj" : "min");
+ printed += scnprintf(ttrace->pgfault_entry_str + printed,
+ trace__entry_str_size - printed, "%sfault [",
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+ "maj" : "min");
- print_location(trace->output, sample, &al, false, true);
+ printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+ trace__entry_str_size - printed,
+ sample, &al, false, true);
- fprintf(trace->output, "] => ");
+ printed += scnprintf(ttrace->pgfault_entry_str + printed,
+ trace__entry_str_size - printed, "] => ");
thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
sample->addr, &al);
@@ -1875,14 +1926,97 @@ static int trace__pgfault(struct trace *trace,
map_type = '?';
}
- print_location(trace->output, sample, &al, true, false);
+ printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+ trace__entry_str_size - printed,
+ sample, &al, true, false);
+
+ printed += scnprintf(ttrace->pgfault_entry_str + printed,
+ trace__entry_str_size - printed,
+ " (%c%c)", map_type, al.level);
+
+ if (!ttrace->pgfault_entry_time) {
+ trace__fprintf_entry_head(trace, thread,
+ 0, sample->time, trace->output);
+ fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n",
+ perf_evsel__name(evsel));
+ }
+
+out:
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int trace__pgfault_exit(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct thread_trace *ttrace;
+ u64 duration = 0;
+ int err = -1, callchain_ret = 0;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+ ttrace = thread__priv(thread);
+ if (!ttrace)
+ goto out_put;
+
+ if (!ttrace->pgfault_entry_time)
+ goto out;
+
+ /*
+ * The check below is necessary for a specific case: it is
+ * possible to enable only major (or minor) page faults
+ * software events but it is impossible to filter page-fault
+ * related tracepoints according to the major / minor
+ * characteristic.
+ */
+
+ if (!ttrace->pgfault_entry_str ||
+ strlen(ttrace->pgfault_entry_str) == 0)
+ goto out;
+
+ if (sample->callchain) {
+ callchain_ret =
+ trace__resolve_callchain(trace, evsel,
+ sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out;
+ callchain_ret = 1;
+ }
+ }
+
+ if (ttrace->pgfault_entry_time) {
+ duration = sample->time - ttrace->pgfault_entry_time;
+ if (trace__filter_duration(trace, duration))
+ goto out;
+ }
+
+ trace__fprintf_entry_head(trace, thread,
+ duration, sample->time, trace->output);
- fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+ fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+ /*
+ * Once the string is printed; clear it just in case the next
+ * software events is filtered (because of major / minor.
+ */
+ ttrace->pgfault_entry_str[0] = '\0';
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n",
+ perf_evsel__name(evsel));
+
out:
err = 0;
out_put:
@@ -2062,6 +2196,43 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
return evsel;
}
+static int trace__add_pgfault_newtp(struct trace *trace)
+{
+ int err = -1;
+ struct perf_evlist *evlist = trace->evlist;
+
+ /*
+ * The tracepoints exceptions::page_fault_* are not available
+ * on all architecture.; so, we consider it is not an error if
+ * the 1st one's initialization is KO...
+ */
+ err = perf_evlist__add_newtp(evlist, "exceptions",
+ "page_fault_kernel_exit", trace__pgfault_exit);
+ if (err < 0)
+ return 0;
+
+ /*
+ * ...however, if the 2nd, 3rd or 4th init is KO; there is
+ * definitely something wrong not related with the absence of
+ * tracepoints
+ */
+ err = perf_evlist__add_newtp(evlist, "exceptions",
+ "page_fault_user_exit", trace__pgfault_exit);
+ if (err < 0)
+ return -1;
+
+ err = perf_evlist__add_newtp(evlist, "exceptions",
+ "page_fault_kernel_entry",
+ trace__pgfault_enter);
+ if (err < 0)
+ return -1;
+
+ err = perf_evlist__add_newtp(evlist, "exceptions",
+ "page_fault_user_entry", trace__pgfault_enter);
+
+ return err;
+}
+
static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
{
const u32 type = event->header.type;
@@ -2193,9 +2364,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__add(evlist, pgfault_min);
}
+ if (trace->trace_pgfaults && trace__add_pgfault_newtp(trace))
+ goto out_error_pgfault_tp;
+
if (trace->sched &&
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
- trace__sched_stat_runtime))
+ perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+ trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
err = perf_evlist__create_maps(evlist, &trace->opts.target);
@@ -2399,6 +2573,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
goto out_error;
+out_error_pgfault_tp:
+ tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf),
+ "exceptions", "page_fault_(kernel|user)");
+ goto out_error;
+
out_error_mmap:
perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
--
2.10.2