RE: [RFC PATCH v6 1/5] perf stat: Parse and find tpebs events when parsing metrics to prepare for perf record sampling

From: Wang, Weilin
Date: Mon Apr 01 2024 - 17:55:37 EST




> -----Original Message-----
> From: Namhyung Kim <namhyung@xxxxxxxxxx>
> Sent: Monday, April 1, 2024 1:35 PM
> To: Wang, Weilin <weilin.wang@xxxxxxxxx>
> Cc: Ian Rogers <irogers@xxxxxxxxxx>; Arnaldo Carvalho de Melo
> <acme@xxxxxxxxxx>; Peter Zijlstra <peterz@xxxxxxxxxxxxx>; Ingo Molnar
> <mingo@xxxxxxxxxx>; Alexander Shishkin
> <alexander.shishkin@xxxxxxxxxxxxxxx>; Jiri Olsa <jolsa@xxxxxxxxxx>; Hunter,
> Adrian <adrian.hunter@xxxxxxxxx>; Kan Liang <kan.liang@xxxxxxxxxxxxxxx>;
> linux-perf-users@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Taylor, Perry
> <perry.taylor@xxxxxxxxx>; Alt, Samantha <samantha.alt@xxxxxxxxx>; Biggers,
> Caleb <caleb.biggers@xxxxxxxxx>
> Subject: Re: [RFC PATCH v6 1/5] perf stat: Parse and find tpebs events when
> parsing metrics to prepare for perf record sampling
>
> Hello Weilin,
>
> On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@xxxxxxxxx> wrote:
> >
> > From: Weilin Wang <weilin.wang@xxxxxxxxx>
> >
> > Metrics that use tpebs values would use the R as retire_latency modifier in
> > formulas. We put all these events into a list and pass the list to perf
> > record to collect their retire latency value.
> >
> > Signed-off-by: Weilin Wang <weilin.wang@xxxxxxxxx>
> > Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
> > ---
> > tools/perf/builtin-stat.c | 38 +++++++++++++--
> > tools/perf/util/metricgroup.c | 88 +++++++++++++++++++++++++++++---
> ---
> > tools/perf/util/metricgroup.h | 10 +++-
> > tools/perf/util/stat.h | 2 +
> > 4 files changed, 119 insertions(+), 19 deletions(-)
> >
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index 6bba1a89d030..6291e1e24535 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -162,6 +162,7 @@ static struct perf_stat_config stat_config = {
> > .ctl_fd = -1,
> > .ctl_fd_ack = -1,
> > .iostat_run = false,
> > + .tpebs_events = LIST_HEAD_INIT(stat_config.tpebs_events),
> > };
> >
> > static bool cpus_map_matched(struct evsel *a, struct evsel *b)
> > @@ -686,6 +687,12 @@ static enum counter_recovery
> stat_handle_error(struct evsel *counter)
> > return COUNTER_FATAL;
> > }
> >
> > +static int __run_perf_record(void)
> > +{
> > + pr_debug("Prepare perf record for retire_latency\n");
> > + return 0;
> > +}
> > +
> > static int __run_perf_stat(int argc, const char **argv, int run_idx)
> > {
> > int interval = stat_config.interval;
> > @@ -703,6 +710,16 @@ static int __run_perf_stat(int argc, const char
> **argv, int run_idx)
> > int err;
> > bool second_pass = false;
> >
> > + /* Prepare perf record for sampling event retire_latency before fork and
> > + * prepare workload */
> > + if (stat_config.tpebs_event_size > 0) {
> > + int ret;
> > +
> > + ret = __run_perf_record();
> > + if (ret)
> > + return ret;
> > + }
> > +
> > if (forks) {
> > if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
> workload_exec_failed_signal) < 0) {
> > perror("failed to prepare workload");
> > @@ -2106,7 +2123,9 @@ static int add_default_attributes(void)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events);
> > + &stat_config.metric_events,
> > + &stat_config.tpebs_events,
> > + &stat_config.tpebs_event_size);
>
> Maybe it'd be better to pass the stat_config, but it can be done later.
>
>
> > }
> >
> > if (smi_cost) {
> > @@ -2139,7 +2158,9 @@ static int add_default_attributes(void)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events);
> > + &stat_config.metric_events,
> > + &stat_config.tpebs_events,
> > + &stat_config.tpebs_event_size);
> > }
> >
> > if (topdown_run) {
> > @@ -2173,7 +2194,9 @@ static int add_default_attributes(void)
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events) < 0)
> > + &stat_config.metric_events,
> > + &stat_config.tpebs_events,
> > + &stat_config.tpebs_event_size) < 0)
> > return -1;
> > }
> >
> > @@ -2214,7 +2237,9 @@ static int add_default_attributes(void)
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events) < 0)
> > + &stat_config.metric_events,
> > + /*&stat_config.tpebs_events=*/NULL,
> > + /*stat_config.tpebs_event_size=*/0) < 0)
> > return -1;
> >
> > evlist__for_each_entry(metric_evlist, metric_evsel) {
> > @@ -2736,6 +2761,7 @@ int cmd_stat(int argc, const char **argv)
> > }
> > }
> >
> > +
> > /*
> > * Metric parsing needs to be delayed as metrics may optimize events
> > * knowing the target is system-wide.
> > @@ -2748,7 +2774,9 @@ int cmd_stat(int argc, const char **argv)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events);
> > + &stat_config.metric_events,
> > + &stat_config.tpebs_events,
> > + &stat_config.tpebs_event_size);
> >
> > zfree(&metrics);
> > if (ret) {
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index 79ef6095ab28..8e007d60af91 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -277,7 +277,8 @@ static bool contains_metric_id(struct evsel
> **metric_events, int num_events,
> > */
> > static int setup_metric_events(const char *pmu, struct hashmap *ids,
> > struct evlist *metric_evlist,
> > - struct evsel ***out_metric_events)
> > + struct evsel ***out_metric_events,
> > + size_t tpebs_event_size)
> > {
> > struct evsel **metric_events;
> > const char *metric_id;
> > @@ -286,7 +287,7 @@ static int setup_metric_events(const char *pmu,
> struct hashmap *ids,
> > bool all_pmus = !strcmp(pmu, "all") || perf_pmus__num_core_pmus()
> == 1 || !is_pmu_core(pmu);
> >
> > *out_metric_events = NULL;
> > - ids_size = hashmap__size(ids);
> > + ids_size = hashmap__size(ids) - tpebs_event_size;
> >
> > metric_events = calloc(ids_size + 1, sizeof(void *));
> > if (!metric_events)
> > @@ -323,6 +324,7 @@ static int setup_metric_events(const char *pmu,
> struct hashmap *ids,
> > }
> > }
> > if (matched_events < ids_size) {
> > + pr_debug("Error: matched_events = %lu, ids_size = %lu\n",
> matched_events, ids_size);
> > free(metric_events);
> > return -EINVAL;
> > }
> > @@ -668,7 +670,9 @@ static int decode_all_metric_ids(struct evlist
> *perf_evlist, const char *modifie
> > static int metricgroup__build_event_string(struct strbuf *events,
> > const struct expr_parse_ctx *ctx,
> > const char *modifier,
> > - bool group_events)
> > + bool group_events,
> > + struct list_head *tpebs_events __maybe_unused,
> > + size_t *tpebs_event_size)
> > {
> > struct hashmap_entry *cur;
> > size_t bkt;
> > @@ -681,8 +685,56 @@ static int metricgroup__build_event_string(struct
> strbuf *events,
> > hashmap__for_each_entry(ctx->ids, cur, bkt) {
> > const char *sep, *rsep, *id = cur->pkey;
> > enum perf_tool_event ev;
> > + /*
> > + * Parse and search for event name with retire_latency modifier R.
> > + * If found, put event name into the tpebs_events list. This list
> > + * of events will be passed to perf record for sampling to get
> > + * their reitre_latency value.
> > + * Search for ":R" in event name without "@". Search for the
> > + * last "@R" in event name with "@".
>
> Hmm.. it seems you look for an 'R' modifier and then change it to 'p', right?
> Why not use strrchr to check ':' or '@' and if it's followed by 'R'?

Yes, this is looking for the 'R' modifier and add 'p' for sampling. We might want
to explore 'P' or 'ppp' later.

I will try strrchr out and update the code if that makes the code simpler!

>
> Is the 'R' modifier only used in the metric expressions? Also please mention
> why some events have "@" in the name and others don't.
>
>
> > + */
> > + char *p = strstr(id, ":R");
> > + char *p1 = strstr(id, "@R");
> > +
> > + if (p == NULL && p1) {
> > + p = strstr(p1+1, "@R");
> > + if (p == NULL)
> > + p = p1;
> > + p = p+1;
> > + }
> > +
> > + if (p) {
> > + char *name;
> > + char *at;
> > + struct tpebs_event *new_event = malloc(sizeof(struct
> tpebs_event));
> >
> > - pr_debug("found event %s\n", id);
> > + if (!new_event)
> > + return -ENOMEM;
> > +
> > + new_event->tpebs_name = strdup(id);
> > + *p = '\0';
>
> I think 'p' points to the 'id' string ("cur->pkey"). Is it ok to
> change it here?
> I guess you may want to do it on the tpebs_name.

If I understand your question correctly:

The tpebs_name wants to keep the full name with the modifier. But for the
rest places, we don't need to keep this modifier in the name. So we could
change 'p' like this.

Thanks,
Weilin

>
> Thanks,
> Namhyung
>
>
> > + name = malloc(strlen(id) + 2);
> > + if (!name)
> > + return -ENOMEM;
> > +
> > + at = strchr(id, '@');
> > + if (at != NULL) {
> > + *at = '/';
> > + at = strchr(id, '@');
> > + *at = '/';
> > + strcpy(name, id);
> > + strcat(name, "p");
> > + } else {
> > + strcpy(name, id);
> > + strcat(name, ":p");
> > + }
> > + new_event->name = name;
> > + *tpebs_event_size += 1;
> > + pr_debug("retire_latency required, tpebs_event_size=%lu,
> new_event=%s\n",
> > + *tpebs_event_size, new_event->name);
> > + list_add_tail(&new_event->nd, tpebs_events);
> > + continue;
> > + }
> >