[PATCH v2 9/9] perf stat: Use affinity for enabling/disabling events

From: Andi Kleen
Date: Sun Oct 20 2019 - 13:52:44 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Restructure event enabling/disabling to use affinity, which
minimizes the number of IPIs needed.

Before on a large test case with 94 CPUs:

% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
54.65 1.899986 22 84812 660 ioctl

after:

39.21 0.930451 10 84796 644 ioctl

Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
tools/perf/lib/evsel.c | 49 ++++++++++++++++++++--------
tools/perf/lib/include/perf/evsel.h | 2 ++
tools/perf/util/evlist.c | 50 ++++++++++++++++++++++++++---
tools/perf/util/evsel.c | 13 ++++++++
tools/perf/util/evsel.h | 2 ++
5 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index ea775dacbd2d..89ddfade0b96 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -198,38 +198,61 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
}

static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
- int ioc, void *arg)
+ int ioc, void *arg,
+ int cpu)
{
- int cpu, thread;
+ int thread;

- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
- for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int fd = FD(evsel, cpu, thread),
- err = ioctl(fd, ioc, arg);
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread),
+ err = ioctl(fd, ioc, arg);

- if (err)
- return err;
- }
+ if (err)
+ return err;
}

return 0;
}

+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, cpu);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, i);
+ return err;
+}
+
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, cpu);
}

int perf_evsel__disable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, i);
+ return err;
}

int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
- return perf_evsel__run_ioctl(evsel,
+ int err = 0, i;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
- (void *)filter);
+ (void *)filter, i);
+ return err;
}

struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
index ed10a914cd3f..db31e512a120 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -32,7 +32,9 @@ LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index bcb8a3670f3f..55f38a71ad30 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -378,26 +378,66 @@ void evlist__cpu_iter_next(struct evsel *ev)
void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ struct perf_cpu_map *cpus;
+ int i;

+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ cpus = evlist__cpu_iter_start(evlist);
+ for (i = 0; i < cpus->nr; i++) {
+ int cpu = cpus->map[i];
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evlist__cpu_iter_skip(pos, cpu))
+ continue;
+ if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__disable_cpu(pos, pos->cpu_index);
+ evlist__cpu_iter_next(pos);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__disable(pos);
+ pos->disabled = true;
}
-
evlist->enabled = false;
}

void evlist__enable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ struct perf_cpu_map *cpus;
+ int i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ cpus = evlist__cpu_iter_start(evlist);
+ for (i = 0; i < cpus->nr; i++) {
+ int cpu = cpus->map[i];
+ affinity__set(&affinity, cpu);

+ evlist__for_each_entry(evlist, pos) {
+ if (evlist__cpu_iter_skip(pos, cpu))
+ continue;
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, pos->cpu_index);
+ evlist__cpu_iter_next(pos);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__enable(pos);
+ pos->disabled = false;
}
-
evlist->enabled = true;
}

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7106f9a067df..79050a6f4991 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1205,13 +1205,26 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
return perf_evsel__append_filter(evsel, "%s,%s", filter);
}

+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu)
+{
+ int err = perf_evsel__enable_cpu(&evsel->core, cpu);
+ return err;
+}
+
int evsel__enable(struct evsel *evsel)
{
int err = perf_evsel__enable(&evsel->core);

if (!err)
evsel->disabled = false;
+ return err;
+}

+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu)
+{
+ int err = perf_evsel__disable_cpu(&evsel->core, cpu);
return err;
}

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 9fc9f6698aa4..15977bbe7b63 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -222,8 +222,10 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_addr_filter(struct evsel *evsel,
const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu);

int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus,
--
2.21.0