[PATCH 05/10] perf tools: Add dynamic sort key for tracepoint events

From: Namhyung Kim
Date: Tue Dec 15 2015 - 10:38:53 EST


The existing sort keys are less useful for tracepoint events in that
they are always sampled at a same location.

For example, report on sched:sched_switch event looks like following

# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule

In fact, tracepoints have meaningful information in their fields but
there's no way to use in the perf report currently. The dynamic sort
keys are to overcome this problem.

The sched:sched_switch events have following fields:

# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;

field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;

print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio

With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field on the same data file.

$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233

Multiple dynamic sort keys are also supported:

$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi

Note that pid 0 exists for each cpu so have comm of 'swapper/N'.

Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/util/sort.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 223 insertions(+)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0c038a27fe5c..f2bac1c149a9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1531,6 +1531,226 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
return 0;
}

+struct hpp_dynamic_entry {
+ struct perf_hpp_fmt hpp;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ unsigned dynamic_len;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+ if (!hde->hpp.len) {
+ int len = hde->dynamic_len;
+ int namelen = strlen(hde->field->name);
+ int fieldlen = hde->field->size;
+
+ if (namelen > len)
+ len = namelen;
+
+ if (!(hde->field->flags & FIELD_IS_STRING)) {
+ /* length for print hex numbers */
+ fieldlen = hde->field->size * 2 + 2;
+ }
+ if (fieldlen > len)
+ len = fieldlen;
+
+ hde->hpp.len = len;
+ }
+ return hde->hpp.len;
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return len;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+ struct trace_seq seq;
+ int ret;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ if (hists_to_evsel(he->hists) != hde->evsel)
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, "N/A");
+
+ trace_seq_init(&seq);
+ print_event_field(&seq, he->raw_data, hde->field);
+ ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, seq.buffer);
+ trace_seq_destroy(&seq);
+ return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_dynamic_entry *hde;
+ struct format_field *field;
+ unsigned offset, size;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (hists_to_evsel(a->hists) != hde->evsel)
+ return 0;
+
+ field = hde->field;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ unsigned long long dyn;
+
+ pevent_read_number_field(field, a->raw_data, &dyn);
+ offset = dyn & 0xffff;
+ size = (dyn >> 16) & 0xffff;
+
+ /* record max width for output */
+ if (size > hde->dynamic_len)
+ hde->dynamic_len = size;
+ } else {
+ offset = field->offset;
+ size = field->size;
+ }
+
+ return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = malloc(sizeof(*hde));
+ if (hde == NULL) {
+ pr_debug("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hde->evsel = evsel;
+ hde->field = field;
+ hde->dynamic_len = 0;
+
+ hde->hpp.name = field->name;
+ hde->hpp.header = __sort__hde_header;
+ hde->hpp.width = __sort__hde_width;
+ hde->hpp.entry = __sort__hde_entry;
+ hde->hpp.color = NULL;
+
+ hde->hpp.cmp = __sort__hde_cmp;
+ hde->hpp.collapse = __sort__hde_cmp;
+ hde->hpp.sort = __sort__hde_cmp;
+
+ INIT_LIST_HEAD(&hde->hpp.list);
+ INIT_LIST_HEAD(&hde->hpp.sort_list);
+ hde->hpp.elide = false;
+ hde->hpp.len = 0;
+ hde->hpp.user_len = 0;
+
+ return hde;
+}
+
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+{
+ char *str, *event_name, *field_name;
+ struct perf_evsel *evsel, *pos;
+ struct event_format *format;
+ struct format_field *field;
+ struct hpp_dynamic_entry *hde;
+ int ret = 0;
+
+ if (evlist == NULL)
+ return -ENOENT;
+
+ str = strdup(tok);
+ if (str == NULL)
+ return -ENOMEM;
+
+ event_name = str;
+ field_name = strchr(str, '.');
+ if (field_name == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+ *field_name++ = '\0';
+
+ evsel = NULL;
+ evlist__for_each(evlist, pos) {
+ if (!strcmp(pos->name, event_name)) {
+ evsel = pos;
+ break;
+ }
+ }
+
+ if (evsel == NULL) {
+ pr_debug("Cannot find event: %s\n", event_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ pr_debug("%s is not a tracepoint event\n", event_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ format = pevent_find_event(evsel->tp_format->pevent,
+ evsel->attr.config);
+ if (format == NULL) {
+ pr_debug("Cannot find event format for %s (id: %u)\n",
+ event_name, (unsigned) evsel->attr.config);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ field = pevent_find_any_field(format, field_name);
+ if (field == NULL) {
+ pr_debug("Cannot find event field for %s.%s\n",
+ event_name, field_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ hde = __alloc_dynamic_entry(evsel, field);
+ if (hde == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ perf_hpp__register_sort_field(&hde->hpp);
+
+out:
+ free(str);
+ return ret;
+}
+
static int __sort_dimension__add(struct sort_dimension *sd)
{
if (sd->taken)
@@ -1667,6 +1887,9 @@ static int sort_dimension__add(const char *tok,
return 0;
}

+ if (!add_dynamic_entry(evlist, tok))
+ return 0;
+
return -ESRCH;
}

--
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/