[PATCH v4 2/7] perf/sdt: Directly record SDT events with 'perf record'

From: Ravi Bangoria
Date: Mon Mar 06 2017 - 09:48:51 EST


From: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>

Add basic support for directly recording SDT events which are present
in the probe cache. Without this patch, we could probe into SDT events
using 'perf probe' and 'perf record'. With this patch, we can probe the
SDT events directly using 'perf record'.

For example :

$ sudo ./perf list sdt
sdt_libpthread:mutex_entry [SDT event]
sdt_libc:setjmp [SDT event]

$ sudo ./perf record -a -e sdt_libc:setjmp

$ sudo ./perf script
bash 793 [002] 260.382957: sdt_libc:setjmp: (7ff85b6596a1)
reset 1296 [000] 260.511983: sdt_libc:setjmp: (7f26862e06a1)

Recording on SDT events with same provider and marker names is also
supported:

$ readelf -n /usr/lib64/libpthread-2.24.so | grep -A2 Provider
Provider: libpthread
Name: mutex_entry
Location: 0x0000000000009ddb, Base: 0x00000000000139cc, ...
--
Provider: libpthread
Name: mutex_entry
Location: 0x000000000000bcbb, Base: 0x00000000000139cc, ...

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry
Warning : Recording on 2 occurences of sdt_libpthread:mutex_entry

$ sudo ./perf evlist
sdt_libpthread:mutex_entry_1
sdt_libpthread:mutex_entry

After invoking 'perf record', behind the scenes, it checks whether the
event specified is an SDT event using the string 'sdt_' or flag '%'.
After that, it does a lookup of the probe cache to find out the SDT
event. If its not present, it throws an error. Otherwise, it goes on
and writes the event into the uprobe_events file and starts recording.
It also maintains a list of the event names that were written to
uprobe_events file. At the end of the record session, it removes the
events from the uprobe_events file using the maintained name list.

As mentioned, it always tries to look for sdt event in probe cache and
ignores entries of uprobe_events. Hence, it creates new probe points
for event even if it already exists.

$ sudo ./perf probe sdt_libpthread:mutex_entry
Added new events:
sdt_libpthread:mutex_entry (on %mutex_entry in /usr/lib64/libpthread-2.24.so)
sdt_libpthread:mutex_entry_1 (on %mutex_entry in /usr/lib64/libpthread-2.24.so)

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry
Warning : Recording on 2 occurences of sdt_libpthread:mutex_entry

$ sudo ./perf evlist
sdt_libpthread:mutex_entry_3
sdt_libpthread:mutex_entry_2

As it does not look at uprobe_events file, it can't record those events
whose probe points are created with different name. For ex,

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry_1
Error: sdt_libpthread:mutex_entry_1 not found in the cache
invalid or unsupported event: 'sdt_libpthread:mutex_entry_1'

Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
---
tools/perf/builtin-record.c | 24 ++++++++
tools/perf/perf.h | 1 +
tools/perf/util/parse-events.c | 56 +++++++++++++++++-
tools/perf/util/parse-events.h | 2 +
tools/perf/util/probe-event.c | 35 ++++++++++-
tools/perf/util/probe-event.h | 4 ++
tools/perf/util/probe-file.c | 131 +++++++++++++++++++++++++++++++++++++++++
tools/perf/util/probe-file.h | 8 +++
8 files changed, 257 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bc84a37..e87b19b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -39,6 +39,7 @@
#include "util/trigger.h"
#include "util/perf-hooks.h"
#include "asm/bug.h"
+#include "util/probe-file.h"

#include <unistd.h>
#include <sched.h>
@@ -73,6 +74,7 @@ struct record {
bool timestamp_filename;
struct switch_output switch_output;
unsigned long long samples;
+ struct list_head sdt_event_list;
};

static volatile int auxtrace_record__snapshot_started;
@@ -1503,6 +1505,26 @@ static struct record record = {
},
};

+void sdt_event_list__add(struct list_head *sdt_event_list)
+{
+ if (list_empty(sdt_event_list))
+ return;
+ list_splice(sdt_event_list, &record.sdt_event_list);
+}
+
+bool is_cmd_record(void)
+{
+ return (record.evlist != NULL);
+}
+
+static void
+sdt_event_list__remove(struct list_head *sdt_event_list __maybe_unused)
+{
+#ifdef HAVE_LIBELF_SUPPORT
+ return remove_sdt_event_list(sdt_event_list);
+#endif
+}
+
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
"\n\t\t\t\tDefault: fp";

@@ -1671,6 +1693,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist == NULL)
return -ENOMEM;

+ INIT_LIST_HEAD(&rec->sdt_event_list);
err = perf_config(perf_record_config, rec);
if (err)
return err;
@@ -1841,6 +1864,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+ sdt_event_list__remove(&rec->sdt_event_list);
return err;
}

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 1c27d94..9d8e5fe 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -76,4 +76,5 @@ struct record_opts {
struct option;
extern const char * const *record_usage;
extern struct option *record_options;
+bool is_cmd_record(void);
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 54355d3..1fcc9d13 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1727,12 +1727,66 @@ static void parse_events_print_error(struct parse_events_error *err,

#undef MAX_WIDTH

+/* SDT event needs LIBELF support for creating a probe point */
+#ifdef HAVE_LIBELF_SUPPORT
+static int parse_sdt_event(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ char *ptr = NULL;
+ int ret;
+ struct list_head *sdt_evlist;
+ struct sdt_event_list *sdt_event;
+
+ if (str[0] == '%')
+ str++;
+
+ ptr = strdup(str);
+ if (ptr == NULL)
+ return -ENOMEM;
+
+ sdt_evlist = zalloc(sizeof(*sdt_evlist));
+ if (!sdt_evlist) {
+ free(ptr);
+ pr_debug("Error in sdt_evlist memory allocation\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(sdt_evlist);
+
+ /*
+ * If there is an error in this call, no need to free
+ * up sdt_evlist, its already free'ed up in the previous
+ * call. Free up 'ptr' though.
+ */
+ ret = add_sdt_event(ptr, sdt_evlist);
+ if (!ret) {
+ list_for_each_entry(sdt_event, sdt_evlist, list) {
+ ret = parse_events(evlist, sdt_event->name, err);
+ if (ret < 0)
+ goto ret;
+ }
+ /* Add it to the record struct */
+ sdt_event_list__add(sdt_evlist);
+ }
+
+ret:
+ free(ptr);
+ return ret;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
int parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct parse_events_error err = { .idx = 0, };
- int ret = parse_events(evlist, str, &err);
+ int ret = 0;
+
+#ifdef HAVE_LIBELF_SUPPORT
+ if (is_sdt_event((char *)str) && is_cmd_record())
+ ret = parse_sdt_event(evlist, str, &err);
+ else
+#endif
+ ret = parse_events(evlist, str, &err);

if (ret)
parse_events_print_error(&err, str);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c6172cd..0887269 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -208,4 +208,6 @@ static inline bool is_sdt_event(char *str)
(!strncmp(str, "sdt_", 4) &&
!!strchr(str, ':') && !strchr(str, '=')));
}
+
+void sdt_event_list__add(struct list_head *sdt_event_list);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2b1409f..b879076 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1293,7 +1293,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
return err;
}

-static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
{
char *ptr;

@@ -3125,8 +3125,8 @@ static int find_cached_events(struct perf_probe_event *pev,
}

/* Try to find probe_trace_event from all probe caches */
-static int find_cached_events_all(struct perf_probe_event *pev,
- struct probe_trace_event **tevs)
+int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
{
struct probe_trace_event *tmp_tevs = NULL;
struct strlist *bidlist;
@@ -3476,3 +3476,32 @@ int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
tvar->name = NULL;
return 0;
}
+
+/*
+ * Record session for SDT events has ended. Delete the SDT events
+ * from uprobe_events file that were created initially.
+ */
+void remove_sdt_event_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *sdt_event;
+ struct strfilter *filter = NULL;
+ const char *err = NULL;
+
+ if (list_empty(sdt_events))
+ return;
+
+ list_for_each_entry(sdt_event, sdt_events, list) {
+ if (!filter) {
+ filter = strfilter__new(sdt_event->name, &err);
+ if (!filter)
+ goto free_list;
+ } else {
+ strfilter__or(filter, sdt_event->name, &err);
+ }
+ }
+
+ del_perf_probe_events(filter);
+
+free_list:
+ free_sdt_list(sdt_events);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5d4e940..91e277e 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -182,4 +182,8 @@ struct map *get_target_map(const char *target, bool user);
void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
int ntevs);

+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev);
+
+int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 1a62dac..c1cf67f 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -27,8 +27,10 @@
#include "probe-event.h"
#include "probe-file.h"
#include "session.h"
+#include "probe-finder.h"

#define MAX_CMDLEN 256
+#define MAX_EVENT_LENGTH 512

static void print_open_warning(int err, bool uprobe)
{
@@ -935,3 +937,132 @@ bool probe_type_is_available(enum probe_type type)

return ret;
}
+
+void free_sdt_list(struct list_head *sdt_evlist)
+{
+ struct sdt_event_list *tmp, *ptr;
+
+ if (list_empty(sdt_evlist))
+ return;
+ list_for_each_entry_safe(tmp, ptr, sdt_evlist, list) {
+ list_del(&tmp->list);
+ free(tmp->name);
+ free(tmp);
+ }
+}
+
+static int get_sdt_events_from_cache(struct perf_probe_event *pev)
+{
+ int ret = 0;
+
+ pev->ntevs = find_cached_events_all(pev, &pev->tevs);
+
+ if (pev->ntevs < 0) {
+ pr_err("Error: Cache lookup failed (code: %d)\n", pev->ntevs);
+ ret = pev->ntevs;
+ } else if (!pev->ntevs) {
+ pr_err("Error: %s:%s not found in the cache\n",
+ pev->group, pev->event);
+ ret = -EINVAL;
+ } else if (pev->ntevs > 1) {
+ pr_warning("Warning : Recording on %d occurences of %s:%s\n",
+ pev->ntevs, pev->group, pev->event);
+ }
+
+ return ret;
+}
+
+static int add_event_to_sdt_evlist(struct probe_trace_event *tev,
+ struct list_head *sdt_evlist)
+{
+ struct sdt_event_list *tmp;
+
+ tmp = zalloc(sizeof(*tmp));
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->name = zalloc(MAX_EVENT_LENGTH * sizeof(char));
+ if (!tmp->name)
+ return -ENOMEM;
+
+ snprintf(tmp->name, strlen(tev->group) + strlen(tev->event) + 2,
+ "%s:%s", tev->group, tev->event);
+ list_add(&tmp->list, sdt_evlist);
+
+ return 0;
+}
+
+static int add_events_to_sdt_evlist(struct perf_probe_event *pev,
+ struct list_head *sdt_evlist)
+{
+ int i, ret;
+
+ for (i = 0; i < pev->ntevs; i++) {
+ ret = add_event_to_sdt_evlist(&pev->tevs[i], sdt_evlist);
+
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Find the SDT event from the cache and if found add it/them
+ * to the uprobe_events file
+ */
+int add_sdt_event(char *event, struct list_head *sdt_evlist)
+{
+ struct perf_probe_event *pev;
+ int ret;
+
+ pev = zalloc(sizeof(*pev));
+ if (!pev)
+ return -ENOMEM;
+
+ pev->sdt = true;
+ pev->uprobes = true;
+
+ /*
+ * Parse event to find the group name and event name of
+ * the sdt event.
+ */
+ ret = parse_perf_probe_event_name(&event, pev);
+ if (ret) {
+ pr_err("Error in parsing sdt event %s\n", event);
+ free(pev);
+ return ret;
+ }
+
+ probe_conf.max_probes = MAX_PROBES;
+ probe_conf.force_add = 1;
+
+ /* Fetch all matching events from cache. */
+ ret = get_sdt_events_from_cache(pev);
+ if (ret < 0)
+ goto free_pev;
+
+ /*
+ * Create probe point for all events by adding them in
+ * uprobe_events file
+ */
+ ret = apply_perf_probe_events(pev, 1);
+ if (ret) {
+ pr_err("Error in adding SDT event : %s\n", event);
+ goto free_pev;
+ }
+
+ /* Add events to sdt_evlist */
+ ret = add_events_to_sdt_evlist(pev, sdt_evlist);
+ if (ret < 0)
+ goto free_pev;
+
+ ret = 0;
+
+free_pev:
+ if (ret < 0)
+ free_sdt_list(sdt_evlist);
+ cleanup_perf_probe_events(pev, 1);
+ free(pev);
+ return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index a17a82e..6d2d3e5 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -19,6 +19,11 @@ struct probe_cache {
struct list_head entries;
};

+struct sdt_event_list {
+ char *name; /* group:event */
+ struct list_head list;
+};
+
enum probe_type {
PROBE_TYPE_U = 0,
PROBE_TYPE_S,
@@ -65,6 +70,9 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
const char *group, const char *event);
int probe_cache__show_all_caches(struct strfilter *filter);
bool probe_type_is_available(enum probe_type type);
+int add_sdt_event(char *event, struct list_head *sdt_event_list);
+void remove_sdt_event_list(struct list_head *sdt_event_list);
+void free_sdt_list(struct list_head *sdt_events);
#else /* ! HAVE_LIBELF_SUPPORT */
static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
{
--
2.9.3