Re: [PATCH 03/31] perf tools: Introduce dummy evsel

From: Namhyung Kim
Date: Wed Sep 02 2015 - 20:26:51 EST


Hi,

On Sat, Aug 29, 2015 at 04:21:37AM +0000, Wang Nan wrote:
> This patch allows linking dummy evsel onto evlist as a placeholder. It
> is for following patch which allows passing BPF object using '--event
> object.o'.
>
> Doesn't link other event selectors, if passing a BPF object file to
> '--event', nothing is linked onto evlist. Instead, events described in
> BPF object file are probed and linked in a delayed manner because we
> want do all probing work together. Therefore, evsel for events in BPF
> object would be linked at the end of evlist. Which causes a small
> problem that, if passing '--filter' setting after object file, the
> filter option won't be correctly applied to those events.
>
> This patch links dummy onto evlist, so following --filter can be
> collected by the dummy evsel. For this reason dummy evsels are set to
> PERF_TYPE_TRACEPOINT.

I understand the need of the dummy event. But we already have dummy
event so it's confusing to have similar event IMHO. So what about
using existing dummy event instead? You can save a link to a bpf
object in the dummy evsel (to check it later) and change to allow
setting filter on dummy events IMHO.

>
> Due to the possibility of existance of dummy evsel,
> perf_evlist__purge_dummy() must be called right after parse_options().
> This patch adds it to record, top, trace and stat builtin commands.
> Further patch moves it down after real BPF events are processed with.

IMHO it'd be better to do this kind of job in a single place -
e.g. perf_evlist__config() ? - so that other commands get benefits
from it easily.

Thanks,
Namhyung


>
> Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> Cc: Alexei Starovoitov <ast@xxxxxxxxxxxx>
> Cc: Brendan Gregg <brendan.d.gregg@xxxxxxxxx>
> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
> Cc: David Ahern <dsahern@xxxxxxxxx>
> Cc: He Kuang <hekuang@xxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Kaixu Xia <xiakaixu@xxxxxxxxxx>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Cc: Zefan Li <lizefan@xxxxxxxxxx>
> Cc: pi3orama@xxxxxxx
> Link: http://lkml.kernel.org/r/1440742821-44548-4-git-send-email-wangnan0@xxxxxxxxxx
> ---
> tools/perf/builtin-record.c | 2 ++
> tools/perf/builtin-stat.c | 1 +
> tools/perf/builtin-top.c | 1 +
> tools/perf/builtin-trace.c | 1 +
> tools/perf/util/evlist.c | 19 +++++++++++++++++++
> tools/perf/util/evlist.h | 1 +
> tools/perf/util/evsel.c | 32 ++++++++++++++++++++++++++++++++
> tools/perf/util/evsel.h | 6 ++++++
> tools/perf/util/parse-events.c | 25 +++++++++++++++++++++----
> 9 files changed, 84 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index a660022..81829de 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1112,6 +1112,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
>
> argc = parse_options(argc, argv, record_options, record_usage,
> PARSE_OPT_STOP_AT_NON_OPTION);
> + perf_evlist__purge_dummy(rec->evlist);
> +
> if (!argc && target__none(&rec->opts.target))
> usage_with_options(record_usage, record_options);
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 7aa039b..99b62f1 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1208,6 +1208,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
>
> argc = parse_options(argc, argv, options, stat_usage,
> PARSE_OPT_STOP_AT_NON_OPTION);
> + perf_evlist__purge_dummy(evsel_list);
>
> interval = stat_config.interval;
>
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 8c465c8..246203b 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1198,6 +1198,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
> perf_config(perf_top_config, &top);
>
> argc = parse_options(argc, argv, options, top_usage, 0);
> + perf_evlist__purge_dummy(top.evlist);
> if (argc)
> usage_with_options(top_usage, options);
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 4e3abba..57712b9 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -3099,6 +3099,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>
> argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
> trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
> + perf_evlist__purge_dummy(trace.evlist);
>
> if (trace.trace_pgfaults) {
> trace.opts.sample_address = true;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 8d00039..8a4e64d 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1696,3 +1696,22 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
>
> tracking_evsel->tracking = true;
> }
> +
> +void perf_evlist__purge_dummy(struct perf_evlist *evlist)
> +{
> + struct perf_evsel *pos, *n;
> +
> + /*
> + * Remove all dummy events.
> + * During linking, we don't touch anything except link
> + * it into evlist. As a result, we don't
> + * need to adjust evlist->nr_entries during removal.
> + */
> +
> + evlist__for_each_safe(evlist, n, pos) {
> + if (perf_evsel__is_dummy(pos)) {
> + list_del_init(&pos->node);
> + perf_evsel__delete(pos);
> + }
> + }
> +}
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index b39a619..7f15727 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -181,6 +181,7 @@ bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
> void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
> struct list_head *list,
> int nr_entries);
> +void perf_evlist__purge_dummy(struct perf_evlist *evlist);
>
> static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
> {
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index bac25f4..01267f4 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -213,6 +213,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
> evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
> perf_evsel__calc_id_pos(evsel);
> evsel->cmdline_group_boundary = false;
> + evsel->is_dummy = false;
> }
>
> struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
> @@ -225,6 +226,37 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
> return evsel;
> }
>
> +struct perf_evsel *perf_evsel__new_dummy(const char *name)
> +{
> + struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
> +
> + if (!evsel)
> + return NULL;
> +
> + /*
> + * Don't need call perf_evsel__init() for dummy evsel.
> + * Keep it simple.
> + */
> + evsel->name = strdup(name);
> + if (!evsel->name)
> + goto out_free;
> +
> + INIT_LIST_HEAD(&evsel->node);
> + INIT_LIST_HEAD(&evsel->config_terms);
> +
> + evsel->cmdline_group_boundary = false;
> + /*
> + * Set dummy evsel as TRACEPOINT event so it can collect filter
> + * options.
> + */
> + evsel->attr.type = PERF_TYPE_TRACEPOINT;
> + evsel->is_dummy = true;
> + return evsel;
> +out_free:
> + free(evsel);
> + return NULL;
> +}
> +
> struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
> {
> struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 298e6bb..0b8e47d 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -118,6 +118,7 @@ struct perf_evsel {
> struct perf_evsel *leader;
> char *group_name;
> bool cmdline_group_boundary;
> + bool is_dummy;
> struct list_head config_terms;
> };
>
> @@ -153,6 +154,11 @@ int perf_evsel__object_config(size_t object_size,
> void (*fini)(struct perf_evsel *evsel));
>
> struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
> +struct perf_evsel *perf_evsel__new_dummy(const char *name);
> +static inline bool perf_evsel__is_dummy(struct perf_evsel *evsel)
> +{
> + return evsel->is_dummy;
> +}
>
> static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
> {
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 14cd7e3..71d91fb 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -1141,7 +1141,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
> perf_pmu__parse_cleanup();
> if (!ret) {
> int entries = data.idx - evlist->nr_entries;
> - struct perf_evsel *last;
> + struct perf_evsel *last = NULL;
>
> if (!list_empty(&data.list)) {
> last = list_entry(data.list.prev,
> @@ -1149,8 +1149,25 @@ int parse_events(struct perf_evlist *evlist, const char *str,
> last->cmdline_group_boundary = true;
> }
>
> - perf_evlist__splice_list_tail(evlist, &data.list, entries);
> - evlist->nr_groups += data.nr_groups;
> + if (last && perf_evsel__is_dummy(last)) {
> + if (!list_is_singular(&data.list)) {
> + parse_events_evlist_error(&data, 0,
> + "Dummy evsel error: not on a singular list");
> + return -1;
> + }
> + /*
> + * We are introducing a dummy event. Don't touch
> + * anything, just link it.
> + *
> + * Don't use perf_evlist__splice_list_tail() since
> + * it alerts evlist->nr_entries, which affect header
> + * of resulting perf.data.
> + */
> + list_splice_tail(&data.list, &evlist->entries);
> + } else {
> + perf_evlist__splice_list_tail(evlist, &data.list, entries);
> + evlist->nr_groups += data.nr_groups;
> + }
>
> return 0;
> }
> @@ -1256,7 +1273,7 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist,
> struct perf_evsel *last = NULL;
> int err;
>
> - if (evlist->nr_entries > 0)
> + if (!list_empty(&evlist->entries))
> last = perf_evlist__last(evlist);
>
> do {
> --
> 2.1.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/