[PATCH 4/5] perf,tools: open/mmap event according to event's cpu map not evlist's

From: kan . liang
Date: Tue Mar 03 2015 - 11:04:37 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

perf tool can open/mmap events per cpu, but the cpu list is from
evlist's cpu map. That means all events share the same cpu map. However,
some events like uncore events have cpu mask. So the global cpu map
doesn't work well with mixed pmu events.

In this patch, the event's cpu map will be used to provide the available
cpu list for perf open. If it's unavailable, evlist's map will be used.
Since different event's cpu list could vary, we cannot rely on the index
to get group leader's fd. In get_group_fd, the cpu id is used to get
correct fd.
perf_evlist__mmap also need to be changed, since Leader's fd has to be
mmaped before member's fd. So evlist__for_each must be the outermost of
the loop.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
tools/perf/util/evlist.c | 110 ++++++++++++++++++++++++++++-------------------
tools/perf/util/evsel.c | 30 +++++++++----
2 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3c6115c..85c2ae0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -792,51 +792,48 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
return 0;
}

-static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
- struct mmap_params *mp, int cpu,
- int thread, int *output)
+static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int idx, struct mmap_params *mp,
+ int cpu, int thread, int *output)
{
- struct perf_evsel *evsel;
+ int fd;

- evlist__for_each(evlist, evsel) {
- int fd;
+ if (evsel->system_wide && thread)
+ return 0;

- if (evsel->system_wide && thread)
- continue;
+ fd = FD(evsel, cpu, thread);

- fd = FD(evsel, cpu, thread);
+ if (*output == -1) {
+ *output = fd;
+ if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
+ return -1;
+ } else {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+ return -1;

- if (*output == -1) {
- *output = fd;
- if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
- return -1;
- } else {
- if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
- return -1;
+ perf_evlist__mmap_get(evlist, idx);
+ }

- perf_evlist__mmap_get(evlist, idx);
- }
+ /*
+ * The system_wide flag causes a selected event to be opened
+ * always without a pid. Consequently it will never get a
+ * POLLHUP, but it is used for tracking in combination with
+ * other events, so it should not need to be polled anyway.
+ * Therefore don't add it for polling.
+ */
+ if (!evsel->system_wide &&
+ __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
+ perf_evlist__mmap_put(evlist, idx);
+ return -1;
+ }

- /*
- * The system_wide flag causes a selected event to be opened
- * always without a pid. Consequently it will never get a
- * POLLHUP, but it is used for tracking in combination with
- * other events, so it should not need to be polled anyway.
- * Therefore don't add it for polling.
- */
- if (!evsel->system_wide &&
- __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
- perf_evlist__mmap_put(evlist, idx);
+ if (evsel->attr.read_format & PERF_FORMAT_ID) {
+ if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+ fd) < 0)
return -1;
- }
-
- if (evsel->attr.read_format & PERF_FORMAT_ID) {
- if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
- fd) < 0)
- return -1;
- perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
- thread);
- }
+ perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
+ thread);
}

return 0;
@@ -848,23 +845,43 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
int cpu, thread;
int nr_cpus = cpu_map__nr(evlist->cpus);
int nr_threads = thread_map__nr(evlist->threads);
+ int *output = malloc(nr_cpus * sizeof(int));
+ struct cpu_map *map;
+ int evlist_cpu;
+ struct perf_evsel *evsel;

pr_debug2("perf event ring buffer mmapped per cpu\n");
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- int output = -1;

- for (thread = 0; thread < nr_threads; thread++) {
- if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output))
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ output[cpu] = -1;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->cpus)
+ map = evsel->cpus;
+ else
+ map = evlist->cpus;
+
+ for (cpu = 0; cpu < map->nr; cpu++) {
+ evlist_cpu = perf_evsel__get_cpumap_index(NULL, map->map[cpu], evlist->cpus);
+ if (evlist_cpu < 0)
goto out_unmap;
+
+ for (thread = 0; thread < nr_threads; thread++) {
+ if (perf_evlist__mmap_per_evsel(evlist, evsel, evlist_cpu,
+ mp, cpu, thread,
+ &output[evlist_cpu]))
+ goto out_unmap;
+ }
}
}

+ free(output);
return 0;

out_unmap:
for (cpu = 0; cpu < nr_cpus; cpu++)
__perf_evlist__munmap(evlist, cpu);
+ free(output);
return -1;
}

@@ -873,14 +890,17 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
{
int thread;
int nr_threads = thread_map__nr(evlist->threads);
+ struct perf_evsel *evsel;

pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;

- if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output))
- goto out_unmap;
+ evlist__for_each(evlist, evsel) {
+ if (perf_evlist__mmap_per_evsel(evlist, evsel, thread,
+ mp, 0, thread, &output))
+ goto out_unmap;
+ }
}

return 0;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bb4eff2..6077a83 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -981,10 +981,11 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
return 0;
}

-static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
+static int get_group_fd(struct perf_evsel *evsel, int cpu,
+ int thread, struct cpu_map *cpus)
{
struct perf_evsel *leader = evsel->leader;
- int fd;
+ int fd, leader_cpu;

if (perf_evsel__is_group_leader(evsel))
return -1;
@@ -995,9 +996,16 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
*/
BUG_ON(!leader->fd);

- fd = FD(leader, cpu, thread);
+ if (cpu < 0)
+ fd = FD(leader, 0, thread);
+ else {
+ leader_cpu = perf_evsel__get_cpumap_index(leader, cpu, cpus);
+ if (leader_cpu >= 0)
+ fd = FD(leader, leader_cpu, thread);
+ else
+ return -1;
+ }
BUG_ON(fd == -1);
-
return fd;
}

@@ -1068,6 +1076,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
int cpu, thread, nthreads;
unsigned long flags = PERF_FLAG_FD_CLOEXEC;
int pid = -1, err;
+ struct cpu_map *cpumap;
enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;

if (evsel->system_wide)
@@ -1084,6 +1093,11 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
pid = evsel->cgrp->fd;
}

+ if (evsel->cpus)
+ cpumap = evsel->cpus;
+ else
+ cpumap = cpus;
+
fallback_missing_features:
if (perf_missing_features.cloexec)
flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
@@ -1098,7 +1112,7 @@ retry_sample_id:
if (verbose >= 2)
perf_event_attr__fprintf(&evsel->attr, stderr);

- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = 0; cpu < cpumap->nr; cpu++) {

for (thread = 0; thread < nthreads; thread++) {
int group_fd;
@@ -1106,14 +1120,14 @@ retry_sample_id:
if (!evsel->cgrp && !evsel->system_wide)
pid = threads->map[thread];

- group_fd = get_group_fd(evsel, cpu, thread);
+ group_fd = get_group_fd(evsel, cpumap->map[cpu], thread, cpus);
retry_open:
pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n",
- pid, cpus->map[cpu], group_fd, flags);
+ pid, cpumap->map[cpu], group_fd, flags);

FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
pid,
- cpus->map[cpu],
+ cpumap->map[cpu],
group_fd, flags);
if (FD(evsel, cpu, thread) < 0) {
err = -errno;
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/