RE: [RFC PATCH v10 0/8] TPEBS counting mode support

From: Wang, Weilin
Date: Mon Jun 03 2024 - 13:00:41 EST




> -----Original Message-----
> From: Namhyung Kim <namhyung@xxxxxxxxxx>
> Sent: Sunday, June 2, 2024 2:18 PM
> To: Wang, Weilin <weilin.wang@xxxxxxxxx>
> Cc: Ian Rogers <irogers@xxxxxxxxxx>; Arnaldo Carvalho de Melo
> <acme@xxxxxxxxxx>; Peter Zijlstra <peterz@xxxxxxxxxxxxx>; Ingo Molnar
> <mingo@xxxxxxxxxx>; Alexander Shishkin
> <alexander.shishkin@xxxxxxxxxxxxxxx>; Jiri Olsa <jolsa@xxxxxxxxxx>; Hunter,
> Adrian <adrian.hunter@xxxxxxxxx>; Kan Liang <kan.liang@xxxxxxxxxxxxxxx>;
> linux-perf-users@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Taylor, Perry
> <perry.taylor@xxxxxxxxx>; Alt, Samantha <samantha.alt@xxxxxxxxx>; Biggers,
> Caleb <caleb.biggers@xxxxxxxxx>
> Subject: Re: [RFC PATCH v10 0/8] TPEBS counting mode support
>
> On Fri, May 31, 2024 at 11:03:25PM +0000, Wang, Weilin wrote:
> >
> > > -----Original Message-----
> > > From: Namhyung Kim <namhyung@xxxxxxxxxx>
> > > As I said, please don't open event1:R (for perf stat) and let tpebs_stop() set
> > > the value using the data from perf record in background.
> >
> > I think this is exactly what I'm trying to achieve, not open event1:R for perf
> stat.
> > The problem is that I'm not sure how to do it properly in the code. Please
> give
> > me some suggestion here.
>
> Ok, I think the problem is in the read code. It requires the number of
> entries and the data size to match with what it calculates from the
> member events. It should not count TPEBS events as we don't want to
> open them.
>
> Something like below might work (on top of your series). Probably
> libperf should be aware of this..
>
Thanks Namhyung! I will integrate this patch and test it out.

Thanks,
Weilin

> Thanks,
> Namhyung
>
> ---8<---
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index ac7a98ff6b19..7913db4a99e0 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1559,6 +1559,60 @@ static void evsel__set_count(struct evsel
> *counter, int cpu_map_idx, int thread,
> perf_counts__set_loaded(counter->counts, cpu_map_idx, thread,
> true);
> }
>
> +static bool evsel__group_has_tpebs(struct evsel *leader)
> +{
> + struct evsel *evsel;
> +
> + for_each_group_evsel(evsel, leader) {
> + if (evsel__is_retire_lat(evsel))
> + return true;
> + }
> + return false;
> +}
> +
> +static u64 evsel__group_read_nr_members(struct evsel *leader)
> +{
> + u64 nr = leader->core.nr_members;
> + struct evsel *evsel;
> +
> + for_each_group_evsel(evsel, leader) {
> + if (evsel__is_retire_lat(evsel))
> + nr--;
> + }
> + return nr;
> +}
> +
> +static u64 evsel__group_read_size(struct evsel *leader)
> +{
> + u64 read_format = leader->core.attr.read_format;
> + int entry = sizeof(u64); /* value */
> + int size = 0;
> + int nr = 1;
> +
> + if (!evsel__group_has_tpebs(leader))
> + return perf_evsel__read_size(&leader->core);
> +
> + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> + size += sizeof(u64);
> +
> + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> + size += sizeof(u64);
> +
> + if (read_format & PERF_FORMAT_ID)
> + entry += sizeof(u64);
> +
> + if (read_format & PERF_FORMAT_LOST)
> + entry += sizeof(u64);
> +
> + if (read_format & PERF_FORMAT_GROUP) {
> + nr = evsel__group_read_nr_members(leader);
> + size += sizeof(u64);
> + }
> +
> + size += entry * nr;
> + return size;
> +}
> +
> static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx,
> int thread, u64 *data)
> {
> u64 read_format = leader->core.attr.read_format;
> @@ -1567,7 +1621,7 @@ static int evsel__process_group_data(struct evsel
> *leader, int cpu_map_idx, int
>
> nr = *data++;
>
> - if (nr != (u64) leader->core.nr_members)
> + if (nr != evsel__group_read_nr_members(leader))
> return -EINVAL;
>
> if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> @@ -1597,7 +1651,7 @@ static int evsel__read_group(struct evsel *leader,
> int cpu_map_idx, int thread)
> {
> struct perf_stat_evsel *ps = leader->stats;
> u64 read_format = leader->core.attr.read_format;
> - int size = perf_evsel__read_size(&leader->core);
> + int size = evsel__group_read_size(leader);
> u64 *data = ps->group_data;
>
> if (!(read_format & PERF_FORMAT_ID))
> @@ -2210,8 +2264,7 @@ static int evsel__open_cpu(struct evsel *evsel,
> struct perf_cpu_map *cpus,
>
> if (evsel__is_retire_lat(evsel)) {
> err = tpebs_start(evsel->evlist, cpus);
> - if (err)
> - return err;
> + return err;
> }
>
> err = __evsel__prepare_open(evsel, cpus, threads);
> diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
> index d099fc8080e1..a3857e88af96 100644
> --- a/tools/perf/util/intel-tpebs.c
> +++ b/tools/perf/util/intel-tpebs.c
> @@ -354,10 +354,11 @@ int tpebs_set_evsel(struct evsel *evsel, int
> cpu_map_idx, int thread)
> */
> if (cpu_map_idx == 0 && thread == 0) {
> /* Lost precision when casting from double to __u64. Any
> improvement? */
> - val = t->val;
> + val = t->val * 1000;
> } else
> val = 0;
>
> + evsel->scale = 1e-3;
> count->val = val;
> return 0;
> }