Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling

From: Namhyung Kim
Date: Thu Jul 25 2024 - 20:06:06 EST


Hi Ian,

On Sat, Jul 20, 2024 at 12:45:52AM -0700, Ian Rogers wrote:
> Andi Kleen reported a regression where `perf script +F metric` would
> crash. With this change the output is:
>
> ```
> $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
>
> 21.229620 GB/sec
>
> 15.751008 GB/sec
>
> 16.009221 GB/sec
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
> $ perf --no-pager script -F +metric
> perf 1912464 [000] 814503.473101: 6325 cycles: ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> perf 1912464 [000] 814503.473101: metric: 0.06 insn per cycle
> perf 1912464 [000] 814503.473101: 351 instructions: ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> perf 1912464 [000] 814503.473101: metric: 0.03 insn per cycle
> ...
> ```
>
> The change fixes perf script to update counts and thereby aggregate
> values which then get consumed by unchanged metric logic in the shadow
> stat output. Note, it would be preferential to switch to json metrics.
>
> Reported-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@xxxxxxxxxxxxxxx/
> Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> ---
> The code isn't well tested nor does it support non-leader sampling
> reading of counts based on periods that seemed to present in the
> previous code. Sending out for the sake of discussion. Andi's changes
> added a test and that should certainly be added.

You don't need to handle leader sampling specially since it's already
done by evlist__deliver_sample(). It will call tool->sample() for each
event (with sample->period is set) when leader sampling is enabled.

> ---
> tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
> 1 file changed, 93 insertions(+), 21 deletions(-)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c16224b1fef3..752d6219fb08 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -63,6 +63,7 @@
> #include "util/util.h"
> #include "util/cgroup.h"
> #include "perf.h"
> +#include <internal/threadmap.h>
>
> #include <linux/ctype.h>
> #ifdef HAVE_LIBTRACEEVENT
> @@ -334,16 +335,8 @@ struct evsel_script {
> char *filename;
> FILE *fp;
> u64 samples;
> - /* For metric output */
> - u64 val;
> - int gnum;
> };
>
> -static inline struct evsel_script *evsel_script(struct evsel *evsel)
> -{
> - return (struct evsel_script *)evsel->priv;
> -}
> -
> static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
> {
> struct evsel_script *es = zalloc(sizeof(*es));
> @@ -2107,6 +2100,12 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
> fputs("\tmetric: ", mctx->fp);
> }
>
> +static struct aggr_cpu_id perf_script__get_cpu(struct perf_stat_config *config __maybe_unused,
> + struct perf_cpu cpu)
> +{
> + return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
> +}
> +
> static void perf_sample__fprint_metric(struct perf_script *script,
> struct thread *thread,
> struct evsel *evsel,
> @@ -2126,23 +2125,96 @@ static void perf_sample__fprint_metric(struct perf_script *script,
> .force_header = false,
> };
> struct evsel *ev2;
> - u64 val;
> + struct perf_cpu sample_cpu = { .cpu = sample->cpu, };
> + int thread_idx, cpu_map_idx;
> + u64 read_format = evsel->core.attr.read_format;
> + int aggr_idx;
>
> + /* Only support leader sampling with a group of read events. */
> + if ((read_format & PERF_FORMAT_GROUP) == 0)
> + return;
> +
> + /* Lazy initialization of stats values. */
> if (!evsel->stats)
> evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
> - if (evsel_script(leader)->gnum++ == 0)
> - perf_stat__reset_shadow_stats();
> - val = sample->period * evsel->scale;
> - evsel_script(evsel)->val = val;
> - if (evsel_script(leader)->gnum == leader->core.nr_members) {
> - for_each_group_member (ev2, leader) {
> - perf_stat__print_shadow_stats(&stat_config, ev2,
> - evsel_script(ev2)->val,
> - sample->cpu,
> - &ctx,
> - NULL);
> + if (!stat_config.aggr_map) {
> + int nr_aggr;
> +
> + stat_config.aggr_get_id = perf_script__get_cpu;

To support per-thread mode, I think we should check thread_map of the
evlist if it has actual PID. And use the thread map index as an aggr
idx.

Thanks,
Namhyung


> + stat_config.aggr_map =
> + cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
> + aggr_cpu_id__cpu,
> + /*data=*/NULL,
> + /*needs_sort=*/false);
> + if (!stat_config.aggr_map) {
> + pr_err("cannot allocate aggr map\n");
> + return;
> + }
> + nr_aggr = stat_config.aggr_map->nrd;
> + if (evlist__alloc_aggr_stats(evsel->evlist, nr_aggr) < 0) {
> + pr_err("cannot allocate aggr counts\n");
> + return;
> }
> - evsel_script(leader)->gnum = 0;
> + }
> +
> + /* Add group counts from sample into appropriate evsel counts by id. */
> + for_each_group_evsel(ev2, leader) {
> + struct perf_thread_map *threads = perf_evsel__threads(&ev2->core);
> + struct perf_cpu_map *cpus = evsel__cpus(ev2);
> + int id_num = 0;
> + bool match = false;
> +
> + perf_cpu_map__for_each_idx(cpu_map_idx, cpus) {
> + for (thread_idx = 0; thread_idx < threads->nr; thread_idx++) {
> + struct sample_read_value *value = sample->read.group.values;
> + u64 id = ev2->core.id[id_num++];
> +
> + sample_read_group__for_each(value, sample->read.group.nr,
> + read_format) {
> + struct perf_counts_values *counts;
> +
> + if (value->id != id)
> + continue;
> +
> + counts = perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> + counts->val += value->value;
> + /*
> + * Ensure the enabled/running time isn't
> + * 0, which implies an error.
> + */
> + counts->ena += sample->read.time_enabled ?: sample->period;
> + counts->run += sample->read.time_running ?: sample->period;
> + match = true;
> + }
> + }
> + }
> + if (match) {
> + /* Update the aggreate count in ev2. */
> + perf_stat_process_counter(&stat_config, ev2);
> + }
> + }
> +
> + /* Find the appropriate indices for dumping of this sample. */
> + thread_idx = perf_thread_map__idx(perf_evsel__threads(&evsel->core),
> + thread__pid(thread));
> + cpu_map_idx = perf_cpu_map__idx(evsel__cpus(evsel), sample_cpu);
> + if (thread_idx == -1 || cpu_map_idx == -1)
> + return;
> +
> + cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
> + if (stat_config.aggr_map->map[aggr_idx].cpu.cpu == sample_cpu.cpu)
> + break;
> + }
> + /* Iterate all events and the leader of the group, trying to print stats. */
> + for_each_group_evsel(ev2, leader) {
> + struct perf_counts_values *counts =
> + perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> +
> + if (!counts)
> + continue;
> +
> + perf_stat__print_shadow_stats(&stat_config, ev2, counts->val * ev2->scale,
> + aggr_idx, &ctx, NULL);
> }
> }
>
> --
> 2.45.2.1089.g2a221341d9-goog
>