Re: [PATCH 1/2] perf stat: Fix segfault when counting armv8_pmu events
From: Jiri Olsa
Date: Wed Sep 23 2020 - 16:19:16 EST
On Wed, Sep 23, 2020 at 11:15:06PM +0900, Namhyung Kim wrote:
> On Wed, Sep 23, 2020 at 11:08 PM Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
> >
> > On Wed, Sep 23, 2020 at 10:49:52PM +0900, Namhyung Kim wrote:
> > > On Wed, Sep 23, 2020 at 2:44 PM Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
> > > >
> > > > On Tue, Sep 22, 2020 at 11:13:45AM +0800, Wei Li wrote:
> > > > > When executing perf stat with armv8_pmu events with a workload, it will
> > > > > report a segfault as result.
> > > >
> > > > please share the perf stat command line you see that segfault for
> > >
> > > It seems the description in the patch 0/2 already has it:
> > >
> > > [root@localhost hulk]# tools/perf/perf stat -e
> > > armv8_pmuv3_0/ll_cache_rd/,armv8_pmuv3_0/ll_cache_miss_rd/ ls >
> > > /dev/null
> > > Segmentation fault
> >
> > yea I found it, but can't reproduce it.. I see the issue from
> > patch 2, but not sure what's the problem so far
>
> I think the problem is that armv8_pmu has a cpumask,
> and the user requested per-task events.
>
> The code tried to open the event with a dummy cpu map
> since it's not a cpu event, but the pmu has cpu map and
> it's passed to evsel. So there's confusion somewhere
> whether it should use evsel->cpus or a dummy map.
you're right, I have following cpus file in pmu:
# cat /sys/devices/armv8_pmuv3_0/cpus
0-3
covering all the cpus.. and once you have cpumask/cpus file,
you're system wide by default in current code, but we should
not crash ;-)
I tried to cover this case in patch below and I probably broke
some other use cases, but perhaps we could allow to open counters
per cpus for given workload
I'll try to look at this more tomorrow
jirka
---
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7f8d756d9408..0c7f16a673c2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -379,12 +379,7 @@ static int read_affinity_counters(struct timespec *rs)
if (affinity__setup(&affinity) < 0)
return -1;
- ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
- if (!target__has_cpu(&target) || target__has_per_thread(&target))
- ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
- if (i >= ncpus)
- break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index fd865002cbbd..ef525eb2f619 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1861,6 +1861,16 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
+int evsel__open_threads_per_cpu(struct evsel *evsel, struct perf_thread_map *threads,
+ struct perf_cpu_map *cpus, int cpu)
+{
+ if (cpu == -1)
+ return evsel__open_cpu(evsel, cpus, threads, 0,
+ cpus ? cpus->nr : 1);
+
+ return evsel__open_cpu(evsel, cpus, threads, cpu, cpu + 1);
+}
+
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
{
if (cpu == -1)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 35e3f6d66085..1d055699bd1f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -231,6 +231,8 @@ int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu);
+int evsel__open_threads_per_cpu(struct evsel *evsel, struct perf_thread_map *threads,
+ struct perf_cpu_map *cpus, int cpu);
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index cdb154381a87..2b17f1315cfb 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -560,6 +560,11 @@ int create_perf_stat_counter(struct evsel *evsel,
attr->enable_on_exec = 1;
}
+ if (evsel->core.own_cpus && evsel->core.threads) {
+ return evsel__open_threads_per_cpu(evsel, evsel->core.threads,
+ evsel__cpus(evsel), cpu);
+ }
+
if (target__has_cpu(target) && !target__has_per_thread(target))
return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);