[PATCH v2] perf/sdt: Directly record SDT events
From: Hemant Kumar
Date: Tue May 03 2016 - 17:15:13 EST
This patch adds support for directly recording SDT events which are
present in the probe cache. This patch is based on current SDT
enablement patchset (v5) by Masami :
https://lkml.org/lkml/2016/4/27/828
and it implements two points in the TODO list mentioned in the
cover note :
"- (perf record) Support SDT event recording directly"
"- (perf record) Try to unregister SDT events after record."
Without this patch, we could probe into SDT events using
"perf probe" and "perf record". With this patch, we can probe
the SDT events directly using "perf record".
For example :
# perf list sdt // List the SDT events
...
sdt_mysql:update__row__done [SDT event]
sdt_mysql:update__row__start [SDT event]
sdt_mysql:update__start [SDT event]
sdt_python:function__entry [SDT event]
sdt_python:function__return [SDT event]
sdt_test:marker1 [SDT event]
sdt_test:marker2 [SDT event]
...
# perf record -e %sdt_test:marker1 -e %sdt_test:marker2 -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.087 MB perf.data (22 samples) ]
# perf script
test_sdt 29230 [002] 405550.548017: sdt_test:marker1: (400534)
test_sdt 29230 [002] 405550.548064: sdt_test:marker2: (40053f)
test_sdt 29231 [002] 405550.962806: sdt_test:marker1: (400534)
test_sdt 29231 [002] 405550.962841: sdt_test:marker2: (40053f)
test_sdt 29232 [001] 405551.379327: sdt_test:marker1: (400534)
...
Recording on SDT events with same provider and marker names is also
supported :
# readelf -n ./test_sdt | grep test
Provider: test
Provider: test
Provider: test
# readelf -n ./test_sdt | grep -A2 test
Provider: test
Name: marker1
Location: 0x000000000040053e, Base: 0x0000000000400607, Semaphore: 0x0000000000000000
--
Provider: test
Name: marker1
Location: 0x0000000000400545, Base: 0x0000000000400607, Semaphore: 0x0000000000000000
--
Provider: test
Name: marker1
Location: 0x0000000000400550, Base: 0x0000000000400607, Semaphore: 0x0000000000000000
# perf record -e %sdt_test:marker1 -a
Warning : Recording on 3 occurences of sdt_test:marker1
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.208 MB perf.data (12 samples) ]
# perf script
...
test_sdt 1445 [001] 578689.792146: sdt_test:marker1_1: (400545)
test_sdt 1445 [001] 578689.792168: sdt_test:marker1_2: (400550)
test_sdt 1445 [001] 578689.792170: sdt_test:marker1: (40053e)
test_sdt 1454 [000] 578690.436117: sdt_test:marker1_1: (400545)
...
After invoking "perf record", behind the scenes, it checks whether the
event specified is an SDT event using the flag '%'. After that, it
does a lookup of the probe cache to find out the SDT event. If its not
present, it throws an error. Otherwise, it goes on and writes the event
into the uprobe_events file and sets up the probe event, trace events,
etc and starts recording. It also maintains a list of the event names
that were written to uprobe_events file. After finishing the record
session, it removes the events from the uprobe_events file using the
maintained name list.
Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
---
Changes since v1:
- Added support for recording on multiple SDT events with same names.
- Added support to show warning if multiple SDT events with same names are present.
- Used del_perf_probe_events() to delete the SDT events after recording is done.
- Moved function remove_sdt_event_list() to util/probe-event.c
- Added a function parse_sdt_event() to parse an SDT event from a string.
- Used find_cached_events_all() to find SDT events.
- Used group:event as a strfilter to delete the SDT events from the uprobe_events file.
(Thanks Masami for the above suggestions!)
tools/perf/builtin-record.c | 21 +++++++++
tools/perf/perf.h | 2 +
tools/perf/util/parse-events.c | 53 +++++++++++++++++++++-
tools/perf/util/parse-events.h | 1 +
tools/perf/util/probe-event.c | 40 ++++++++++++++++-
tools/perf/util/probe-event.h | 4 ++
tools/perf/util/probe-file.c | 99 ++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/probe-file.h | 8 ++++
8 files changed, 226 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 515510e..104eafe 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,6 +34,7 @@
#include "util/llvm-utils.h"
#include "util/bpf-loader.h"
#include "asm/bug.h"
+#include "util/probe-file.h"
#include <unistd.h>
#include <sched.h>
@@ -56,6 +57,7 @@ struct record {
bool no_buildid_cache_set;
bool buildid_all;
unsigned long long samples;
+ struct list_head sdt_event_list;
};
static int record__write(struct record *rec, void *bf, size_t size)
@@ -1077,6 +1079,23 @@ static struct record record = {
},
};
+void sdt_event_list__add(struct list_head *sdt_event_list)
+{
+ if (list_empty(sdt_event_list))
+ return;
+ list_splice(sdt_event_list, &record.sdt_event_list);
+}
+
+bool is_cmd_record(void)
+{
+ return (record.evlist != NULL);
+}
+
+static void sdt_event_list__remove(struct list_head *sdt_event_list)
+{
+ return remove_sdt_event_list(sdt_event_list);
+}
+
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
"\n\t\t\t\tDefault: fp";
@@ -1231,6 +1250,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist == NULL)
return -ENOMEM;
+ INIT_LIST_HEAD(&rec->sdt_event_list);
perf_config(perf_record_config, rec);
argc = parse_options(argc, argv, record_options, record_usage,
@@ -1330,6 +1350,7 @@ out_symbol_exit:
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+ sdt_event_list__remove(&rec->sdt_event_list);
return err;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 5381a01..44cd364 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -80,4 +80,6 @@ struct record_opts {
struct option;
extern const char * const *record_usage;
extern struct option *record_options;
+bool is_cmd_record(void);
+
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4033dce..dc97660 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1682,13 +1682,64 @@ static void parse_events_print_error(struct parse_events_error *err,
#undef MAX_WIDTH
+static int parse_sdt_event(const char *str, struct list_head **sdt_list)
+{
+ char *ptr = NULL;
+ int ret;
+ struct list_head *sdt_evlist;
+
+ ptr = strdup(str);
+ if (ptr == NULL)
+ return -ENOMEM;
+
+ sdt_evlist = zalloc(sizeof(*sdt_evlist));
+ if (!sdt_evlist) {
+ free(ptr);
+ pr_err("Error in sdt_evlist memory allocation\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(sdt_evlist);
+
+ /*
+ * If there is an error in this call, no need to free
+ * up sdt_evlist, its already free'ed up in the previous
+ * call. Free up 'ptr' though.
+ */
+ ret = add_sdt_event(ptr, sdt_evlist);
+
+ free(ptr);
+ if (!ret)
+ *sdt_list = sdt_evlist;
+
+ return ret;
+}
+
int parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct parse_events_error err = { .idx = 0, };
- int ret = parse_events(evlist, str, &err);
+ int ret = 0;
+ struct list_head *sdt_list = NULL;
+ struct sdt_event_list *event;
+
+ if (*str == '%' && is_cmd_record()) {
+ ret = parse_sdt_event(str, &sdt_list);
+ if (!ret) {
+ list_for_each_entry(event, sdt_list, list) {
+ ret = parse_events(evlist, event->event_info,
+ &err);
+ if (ret < 0)
+ goto error;
+ }
+ /* Add it to the record struct */
+ sdt_event_list__add(sdt_list);
+ }
+ } else {
+ ret = parse_events(evlist, str, &err);
+ }
+error:
if (ret)
parse_events_print_error(&err, str);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c08daa9..0c7f643 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -189,4 +189,5 @@ int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);
+void sdt_event_list__add(struct list_head *sdt_event_list);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index bb9fc34..7248c8b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1144,7 +1144,7 @@ err:
return err;
}
-static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
{
char *ptr;
@@ -2943,6 +2943,12 @@ static int find_cached_events_all(struct perf_probe_event *pev,
return ret;
}
+int find_sdt_events_from_cache(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ return find_cached_events_all(pev, tevs);
+}
+
static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
struct probe_trace_event **tevs)
{
@@ -3226,3 +3232,35 @@ int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
tvar->name = NULL;
return 0;
}
+
+/*
+ * Record session for SDT events has ended. Delete the SDT events
+ * from uprobe_events file that were created initially.
+ */
+void remove_sdt_event_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *event;
+ struct strfilter *filter = NULL;
+ const char *err = NULL;
+ int ret = 0;
+
+ if (list_empty(sdt_events))
+ return;
+
+ list_for_each_entry(event, sdt_events, list) {
+ if (!filter) {
+ filter = strfilter__new(event->event_info, &err);
+ if (!filter)
+ goto free_list;
+ } else {
+ ret = strfilter__or(filter, event->event_info, &err);
+ }
+ }
+
+ ret = del_perf_probe_events(filter);
+ if (ret)
+ pr_err("Error in deleting the SDT list\n");
+
+free_list:
+ free_sdt_list(sdt_events);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 39b5a35..9b61e77 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -173,4 +173,8 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
struct perf_probe_arg *pvar);
+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev);
+
+int find_sdt_events_from_cache(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 896d645..904f927 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -27,9 +27,12 @@
#include "probe-event.h"
#include "probe-file.h"
#include "session.h"
+#include "probe-finder.h"
#define MAX_CMDLEN 256
+#define MAX_EVENT_LENGTH 512
+
static void print_open_warning(int err, bool uprobe)
{
char sbuf[STRERR_BUFSIZE];
@@ -816,3 +819,99 @@ int probe_cache__show_all_caches(struct strfilter *filter)
return 0;
}
+
+void free_sdt_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *tmp, *ptr;
+
+ if (list_empty(sdt_events))
+ return;
+ list_for_each_entry_safe(tmp, ptr, sdt_events, list) {
+ list_del(&tmp->list);
+ free(tmp->event_info);
+ free(tmp);
+ }
+}
+
+/*
+ * Find the SDT event from the cache and if found add it/them
+ * to the uprobe_events file
+ */
+int add_sdt_event(char *event, struct list_head *sdt_events)
+{
+ struct perf_probe_event *pev;
+ int ret, i;
+ char *str = event + 1;
+ struct sdt_event_list *tmp;
+
+ pev = zalloc(sizeof(*pev));
+ if (!pev)
+ return -ENOMEM;
+
+ pev->sdt = true;
+ pev->uprobes = true;
+
+ /*
+ * Parse str to find the group name and event name of
+ * the sdt event.
+ */
+ ret = parse_perf_probe_event_name(&str, pev);
+ if (ret) {
+ pr_err("Error in parsing sdt event %s\n", str);
+ free(pev);
+ return ret;
+ }
+
+ probe_conf.max_probes = MAX_PROBES;
+ probe_conf.force_add = 1;
+
+ /*
+ * Find the sdt event from the cache, only cached SDT
+ * events can be directly recorded.
+ */
+ pev->ntevs = find_sdt_events_from_cache(pev, &pev->tevs);
+ if (pev->ntevs) {
+ if (pev->ntevs > 1) {
+ pr_warning("Warning : Recording on %d occurences of %s:%s\n",
+ pev->ntevs, pev->group, pev->event);
+ }
+ ret = apply_perf_probe_events(pev, 1);
+ if (ret) {
+ pr_err("Error in adding SDT event : %s\n", event);
+ goto free_pev;
+ }
+ } else {
+ pr_err(" %s:%s not found in the cache\n", pev->group,
+ pev->event);
+ ret = -EINVAL;
+ goto free_pev;
+ }
+
+ /* Add the event name to "sdt_events" list */
+ for (i = 0; i < pev->ntevs; i++) {
+ tmp = zalloc(sizeof(*tmp));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto free_pev;
+ }
+
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->event_info = zalloc(MAX_EVENT_LENGTH * sizeof(char));
+ if (!tmp->event_info) {
+ free_sdt_list(sdt_events);
+ ret = -ENOMEM;
+ goto free_pev;
+ }
+ snprintf(tmp->event_info, strlen(pev->tevs[i].group) +
+ strlen(pev->tevs[i].event) + 2,
+ "%s:%s", pev->tevs[i].group,
+ pev->tevs[i].event);
+ list_add(&tmp->list, sdt_events);
+ }
+
+ ret = 0;
+
+free_pev:
+ cleanup_perf_probe_events(pev, 1);
+ return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index a02bbbd..39ec335 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -32,6 +32,11 @@ struct probe_cache {
struct list_head list;
};
+struct sdt_event_list {
+ char *event_info;
+ struct list_head list;
+};
+
int probe_cache_entry__get_event(struct probe_cache_entry *entry,
struct probe_trace_event **tevs);
#define for_each_probe_cache_entry(entry, pcache) \
@@ -51,4 +56,7 @@ struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
const char *group, const char *event);
int probe_cache__show_all_caches(struct strfilter *filter);
+int add_sdt_event(char *event, struct list_head *sdt_event_list);
+void remove_sdt_event_list(struct list_head *sdt_event_list);
+void free_sdt_list(struct list_head *sdt_events);
#endif
--
1.9.3