[RFC PATCH 5/5] perf trace: Print content of bpf-output event

From: Wang Nan
Date: Thu Feb 25 2016 - 03:52:41 EST


With this patch the contend of BPF output event is printed by
'perf trace'. For example:

# ./perf trace -a --ev bpf-output/no-inherit,name=evt/ \
--ev ./test_bpf_trace.c/map:channel.event=evt/ \
usleep 100000
...
1.787 ( 0.004 ms): usleep/3832 nanosleep(rqtp: 0x7ffc78b18980 ) ...
1.787 ( ): evt:Raise a BPF event!..)
1.788 ( ): perf_bpf_probe:func_begin:(ffffffff810e97d0))
...
101.866 (87.038 ms): gmain/1654 poll(ufds: 0x7f57a80008c0, nfds: 2, timeout_msecs: 1000 ) ...
101.866 ( ): evt:Raise a BPF event!..)
101.867 ( ): perf_bpf_probe:func_end:(ffffffff810e97d0 <- ffffffff81796173))
101.869 (100.087 ms): usleep/3832 ... [continued]: nanosleep()) = 0
...

(There is an extra ')' at the end of several lines. However, it is
another problem, unrelated to this commit.)

Where test_bpf_trace.c is:
/************************ BEGIN **************************/
#include <uapi/linux/bpf.h>
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
#define SEC(NAME) __attribute__((section(NAME), used))
static u64 (*ktime_get_ns)(void) =
(void *)BPF_FUNC_ktime_get_ns;
static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)BPF_FUNC_trace_printk;
static int (*get_smp_processor_id)(void) =
(void *)BPF_FUNC_get_smp_processor_id;
static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
(void *)BPF_FUNC_perf_event_output;

struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};

static inline int __attribute__((always_inline))
func(void *ctx, int type)
{
char output_str[] = "Raise a BPF event!";
char err_str[] = "BAD %d\n";
int err;

err = perf_event_output(ctx, &channel, get_smp_processor_id(),
&output_str, sizeof(output_str));
if (err)
trace_printk(err_str, sizeof(err_str), err);
return 1;
}
SEC("func_begin=sys_nanosleep")
int func_begin(void *ctx) {return func(ctx, 1);}
SEC("func_end=sys_nanosleep%return")
int func_end(void *ctx) { return func(ctx, 2);}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Li Zefan <lizefan@xxxxxxxxxx>
Cc: pi3orama@xxxxxxx
---
tools/perf/builtin-trace.c | 48 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b7129d6..88d5ee7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2178,6 +2178,37 @@ out_dump:
return 0;
}

+static void bpf_output__printer(enum binary_printer_ops op,
+ unsigned int val, void *extra)
+{
+ FILE *output = extra;
+ unsigned char ch = (unsigned char)val;
+
+ switch (op) {
+ case BINARY_PRINT_CHAR_DATA:
+ fprintf(output, "%c", isprint(ch) ? ch : '.');
+ break;
+ case BINARY_PRINT_DATA_BEGIN:
+ case BINARY_PRINT_LINE_BEGIN:
+ case BINARY_PRINT_ADDR:
+ case BINARY_PRINT_NUM_DATA:
+ case BINARY_PRINT_NUM_PAD:
+ case BINARY_PRINT_SEP:
+ case BINARY_PRINT_CHAR_PAD:
+ case BINARY_PRINT_LINE_END:
+ case BINARY_PRINT_DATA_END:
+ default:
+ break;
+ }
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+ struct perf_sample *sample)
+{
+ print_binary(sample->raw_data, sample->raw_size, 8,
+ bpf_output__printer, trace->output);
+}
+
static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
@@ -2190,7 +2221,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,

fprintf(trace->output, "%s:", evsel->name);

- if (evsel->tp_format) {
+ if (perf_evsel__is_bpf_output(evsel)) {
+ bpf_output__fprintf(trace, sample);
+ } else if (evsel->tp_format) {
event_format__fprintf(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
@@ -2526,11 +2559,15 @@ out_enomem:
goto out;
}

-static int validate_evlist(struct perf_evlist *evlist)
+static int validate_evlist(struct perf_evlist *evlist, bool *has_bpf_output)
{
struct perf_evsel *evsel;

evlist__for_each(evlist, evsel) {
+ if (perf_evsel__is_bpf_output(evsel)) {
+ *has_bpf_output = true;
+ continue;
+ }
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
}
@@ -3118,6 +3155,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
const char * const trace_subcommands[] = { "record", NULL };
int err;
char bf[BUFSIZ];
+ bool has_bpf_output = false;

signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
@@ -3135,12 +3173,12 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);

- if (validate_evlist(trace.evlist)) {
- pr_err("Only support tracepoint events!\n");
+ if (validate_evlist(trace.evlist, &has_bpf_output)) {
+ pr_err("Only support tracepoint and bpf-output events!\n");
return -EINVAL;
}

- if (trace.trace_pgfaults) {
+ if (trace.trace_pgfaults || has_bpf_output) {
trace.opts.sample_address = true;
trace.opts.sample_time = true;
}
--
1.8.3.4