Re: [PATCH 18/20] perf stat: Display event stats using aggr counts
From: Athira Rajeev
Date: Tue Dec 06 2022 - 08:40:26 EST
> On 06-Dec-2022, at 12:30 AM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>
> Hello,
>
> On Mon, Dec 5, 2022 at 2:52 AM Athira Rajeev
> <atrajeev@xxxxxxxxxxxxxxxxxx> wrote:
>>
>>
>>
>>> On 18-Oct-2022, at 7:32 AM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>>>
>>> Now aggr counts are ready for use. Convert the display routines to use
>>> the aggr counts and update the shadow stat with them. It doesn't need
>>> to aggregate counts or collect aliases anymore during the display. Get
>>> rid of now unused struct perf_aggr_thread_value.
>>>
>>> Note that there's a difference in the display order among the aggr mode.
>>> For per-core/die/socket/node aggregation, it shows relevant events in
>>> the same unit together, whereas global/thread/no aggregation it shows
>>> the same events for different units together. So it still uses separate
>>> codes to display them due to the ordering.
>>>
>>> One more thing to note is that it breaks per-core event display for now.
>>> The next patch will fix it to have identical output as of now.
>>>
>>> Acked-by: Ian Rogers <irogers@xxxxxxxxxx>
>>> Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
>>> ---
>>> tools/perf/util/stat-display.c | 421 ++++-----------------------------
>>> tools/perf/util/stat.c | 5 -
>>> tools/perf/util/stat.h | 9 -
>>> 3 files changed, 49 insertions(+), 386 deletions(-)
>>>
>>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>>> index 4113aa86772f..bfae2784609c 100644
>>> --- a/tools/perf/util/stat-display.c
>>> +++ b/tools/perf/util/stat-display.c
>>> @@ -442,31 +442,6 @@ static void print_metric_header(struct perf_stat_config *config,
>>> fprintf(os->fh, "%*s ", config->metric_only_len, unit);
>>> }
>>>
>>> -static int first_shadow_map_idx(struct perf_stat_config *config,
>>> - struct evsel *evsel, const struct aggr_cpu_id *id)
>>> -{
>>> - struct perf_cpu_map *cpus = evsel__cpus(evsel);
>>> - struct perf_cpu cpu;
>>> - int idx;
>>> -
>>> - if (config->aggr_mode == AGGR_NONE)
>>> - return perf_cpu_map__idx(cpus, id->cpu);
>>> -
>>> - if (config->aggr_mode == AGGR_THREAD)
>>> - return id->thread_idx;
>>> -
>>> - if (!config->aggr_get_id)
>>> - return 0;
>>> -
>>> - perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
>>> -
>>> - if (aggr_cpu_id__equal(&cpu_id, id))
>>> - return idx;
>>> - }
>>> - return 0;
>>> -}
>>> -
>>> static void abs_printout(struct perf_stat_config *config,
>>> struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
>>> {
>>> @@ -537,7 +512,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
>>> static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
>>> struct evsel *counter, double uval,
>>> char *prefix, u64 run, u64 ena, double noise,
>>> - struct runtime_stat *st)
>>> + struct runtime_stat *st, int map_idx)
>>> {
>>> struct perf_stat_output_ctx out;
>>> struct outstate os = {
>>> @@ -648,8 +623,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
>>> print_running(config, run, ena);
>>> }
>>>
>>> - perf_stat__print_shadow_stats(config, counter, uval,
>>> - first_shadow_map_idx(config, counter, &id),
>>> + perf_stat__print_shadow_stats(config, counter, uval, map_idx,
>>> &out, &config->metric_events, st);
>>> if (!config->csv_output && !config->metric_only && !config->json_output) {
>>> print_noise(config, counter, noise);
>>> @@ -657,34 +631,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
>>> }
>>> }
>>>
>>> -static void aggr_update_shadow(struct perf_stat_config *config,
>>> - struct evlist *evlist)
>>> -{
>>> - int idx, s;
>>> - struct perf_cpu cpu;
>>> - struct aggr_cpu_id s2, id;
>>> - u64 val;
>>> - struct evsel *counter;
>>> - struct perf_cpu_map *cpus;
>>> -
>>> - for (s = 0; s < config->aggr_map->nr; s++) {
>>> - id = config->aggr_map->map[s];
>>> - evlist__for_each_entry(evlist, counter) {
>>> - cpus = evsel__cpus(counter);
>>> - val = 0;
>>> - perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> - s2 = config->aggr_get_id(config, cpu);
>>> - if (!aggr_cpu_id__equal(&s2, &id))
>>> - continue;
>>> - val += perf_counts(counter->counts, idx, 0)->val;
>>> - }
>>> - perf_stat__update_shadow_stats(counter, val,
>>> - first_shadow_map_idx(config, counter, &id),
>>> - &rt_stat);
>>> - }
>>> - }
>>> -}
>>> -
>>> static void uniquify_event_name(struct evsel *counter)
>>> {
>>> char *new_name;
>>> @@ -721,137 +667,51 @@ static void uniquify_event_name(struct evsel *counter)
>>> counter->uniquified_name = true;
>>> }
>>>
>>> -static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter,
>>> - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>> - bool first),
>>> - void *data)
>>> +static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
>>> {
>>> - struct evlist *evlist = counter->evlist;
>>> - struct evsel *alias;
>>> -
>>> - alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
>>> - list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
>>> - /* Merge events with the same name, etc. but on different PMUs. */
>>> - if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
>>> - alias->scale == counter->scale &&
>>> - alias->cgrp == counter->cgrp &&
>>> - !strcmp(alias->unit, counter->unit) &&
>>> - evsel__is_clock(alias) == evsel__is_clock(counter) &&
>>> - strcmp(alias->pmu_name, counter->pmu_name)) {
>>> - alias->merged_stat = true;
>>> - cb(config, alias, data, false);
>>> - }
>>> - }
>>> + return evsel__is_hybrid(evsel) && !config->hybrid_merge;
>>> }
>>>
>>> -static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
>>> - bool check)
>>> +static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
>>> {
>>> - if (evsel__is_hybrid(counter)) {
>>> - if (check)
>>> - return config->hybrid_merge;
>>> - else
>>> - return !config->hybrid_merge;
>>> - }
>>> -
>>> - return false;
>>> -}
>>> -
>>> -static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>> - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>> - bool first),
>>> - void *data)
>>> -{
>>> - if (counter->merged_stat)
>>> - return false;
>>> - cb(config, counter, data, true);
>>> - if (config->no_merge || hybrid_merge(counter, config, false))
>>> + if (config->no_merge || hybrid_uniquify(counter, config))
>>> uniquify_event_name(counter);
>>> - else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
>>> - collect_all_aliases(config, counter, cb, data);
>>> - return true;
>>> -}
>>> -
>>> -struct aggr_data {
>>> - u64 ena, run, val;
>>> - struct aggr_cpu_id id;
>>> - int nr;
>>> - int cpu_map_idx;
>>> -};
>>> -
>>> -static void aggr_cb(struct perf_stat_config *config,
>>> - struct evsel *counter, void *data, bool first)
>>> -{
>>> - struct aggr_data *ad = data;
>>> - int idx;
>>> - struct perf_cpu cpu;
>>> - struct perf_cpu_map *cpus;
>>> - struct aggr_cpu_id s2;
>>> -
>>> - cpus = evsel__cpus(counter);
>>> - perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> - struct perf_counts_values *counts;
>>> -
>>> - s2 = config->aggr_get_id(config, cpu);
>>> - if (!aggr_cpu_id__equal(&s2, &ad->id))
>>> - continue;
>>> - if (first)
>>> - ad->nr++;
>>> - counts = perf_counts(counter->counts, idx, 0);
>>> - /*
>>> - * When any result is bad, make them all to give
>>> - * consistent output in interval mode.
>>> - */
>>> - if (counts->ena == 0 || counts->run == 0 ||
>>> - counter->counts->scaled == -1) {
>>> - ad->ena = 0;
>>> - ad->run = 0;
>>> - break;
>>> - }
>>> - ad->val += counts->val;
>>> - ad->ena += counts->ena;
>>> - ad->run += counts->run;
>>> - }
>>> }
>>>
>>> static void print_counter_aggrdata(struct perf_stat_config *config,
>>> struct evsel *counter, int s,
>>> char *prefix, bool metric_only,
>>> - bool *first, struct perf_cpu cpu)
>>> + bool *first)
>>> {
>>> - struct aggr_data ad;
>>> FILE *output = config->output;
>>> u64 ena, run, val;
>>> - int nr;
>>> - struct aggr_cpu_id id;
>>> double uval;
>>> + struct perf_stat_evsel *ps = counter->stats;
>>> + struct perf_stat_aggr *aggr = &ps->aggr[s];
>>> + struct aggr_cpu_id id = config->aggr_map->map[s];
>>> + double avg = aggr->counts.val;
>>>
>>> - ad.id = id = config->aggr_map->map[s];
>>> - ad.val = ad.ena = ad.run = 0;
>>> - ad.nr = 0;
>>> - if (!collect_data(config, counter, aggr_cb, &ad))
>>> + if (counter->supported && aggr->nr == 0)
>>> return;
>>
>> Hi Namhyung,
>>
>> Observed one issue with this change in tmp.perf/core branch of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
>> Patch link: https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/?h=tmp.perf/core&id=91f85f98da7ab8c32105f42dd03884c01ec4498f
>>
>> If we have multiple events in a group and if all events in that group can't go in at once, the group will fail to schedule.
>> The perf stat result used to have status of all events in group.
>>
>> Example result from powerpc:
>> # perf stat -e '{r1001e,r401e0,r4c010}' <workload>
>> Performance counter stats for 'workload’:
>>
>> <not counted> r1001e
>> <not counted> r401e0
>> <not supported> r4c010
>>
>> But after this change, it shows only the event which broke the constraint.
>>
>> # ./perf stat -e '{r1001e,r401e0,r4c010}' <workload>
>> Performance counter stats for 'workload’:
>> <not supported> r4c010
>>
>> The new condition added by the patch is:
>>
>> if (counter->supported && aggr->nr == 0)
>> return;
>>
>>
>> The first two events r1001e and r401e0 are good to go in this group, hence
>> counter->supported is true where as aggr->nr is zero since this group is
>> not scheduled. Since counter->supported is false for third event which
>> broke the group constraint, the condition won't return and hence only prints
>> this event in output.
>>
>> Namhyung, is there any scenario because of which this condition was added ?
>> If not I would go ahead and sent a fix patch to remove this check.
>
> I remember it's for AGGR_THREAD and merged (uncore) aliases and hybrid
> events. In system-wide per-thread mode, many of them would get 0 results
> and we'd like to skip them. And now I see that we have evsel->merged_stat
> check already.
>
> Maybe we could just add the aggr_mode check there.
Thanks for the response Namhyung.
I will share my response in patch "[PATCH] perf stat: Update event skip condition”
Thanks
Athira
>
> Thanks,
> Namhyung