[PATCH] perf stat: Ignore error thread when enabling system-wide --per-thread
From: Jin Yao
Date: Tue Jan 16 2018 - 02:48:29 EST
If we execute 'perf stat --per-thread' with non-root account
(even set kernel.perf_event_paranoid = -1 yet), it reports the error:
jinyao@skl:~$ perf stat --per-thread
Error:
You may not have permission to collect system-wide stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
>= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
Disallow raw tracepoint access by users without CAP_SYS_ADMIN
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
To make this setting permanent, edit /etc/sysctl.conf too, e.g.:
kernel.perf_event_paranoid = -1
Perhaps the ptrace rule doesn't allow to trace some processes. But anyway
the global --per-thread mode had better ignore such errors and continue
working on other threads.
This patch will record the index of error thread in perf_evsel__open()
and remove this thread before retrying.
For example (run with non-root, kernel.perf_event_paranoid isn't set):
jinyao@skl:~$ perf stat --per-thread
^C
Performance counter stats for 'system wide':
vmstat-3458 6.171984 cpu-clock:u (msec) # 0.000 CPUs utilized
perf-3670 0.515599 cpu-clock:u (msec) # 0.000 CPUs utilized
vmstat-3458 1,163,643 cycles:u # 0.189 GHz
perf-3670 40,881 cycles:u # 0.079 GHz
vmstat-3458 1,410,238 instructions:u # 1.21 insn per cycle
perf-3670 3,536 instructions:u # 0.09 insn per cycle
vmstat-3458 288,937 branches:u # 46.814 M/sec
perf-3670 936 branches:u # 1.815 M/sec
vmstat-3458 15,195 branch-misses:u # 5.26% of all branches
perf-3670 76 branch-misses:u # 8.12% of all branches
12.651675247 seconds time elapsed
Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
---
tools/perf/builtin-stat.c | 14 +++++++++++++-
tools/perf/util/evsel.c | 3 +++
tools/perf/util/thread_map.c | 1 +
tools/perf/util/thread_map.h | 1 +
4 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d3..bcdb47c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -632,7 +632,19 @@ static int __run_perf_stat(int argc, const char **argv)
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
- }
+ } else if (target__has_per_thread(&target) &&
+ evsel_list->threads &&
+ evsel_list->threads->err_thread != -1) {
+ /*
+ * For global --per-thread case, skip current
+ * error thread.
+ */
+ if (!thread_map__remove(evsel_list->threads,
+ evsel_list->threads->err_thread)) {
+ evsel_list->threads->err_thread = -1;
+ goto try_again;
+ }
+ }
perf_evsel__open_strerror(counter, &target,
errno, msg, sizeof(msg));
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a4d256e..12f8733 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1899,6 +1899,9 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
goto fallback_missing_features;
}
out_close:
+ if (err)
+ threads->err_thread = thread;
+
do {
while (--thread >= 0) {
close(FD(evsel, cpu, thread));
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 3e1038f..870cb0c 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -32,6 +32,7 @@ static void thread_map__reset(struct thread_map *map, int start, int nr)
size_t size = (nr - start) * sizeof(map->map[0]);
memset(&map->map[start], 0, size);
+ map->err_thread = -1;
}
static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 0a806b9..ac6baf1 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -14,6 +14,7 @@ struct thread_map_data {
struct thread_map {
refcount_t refcnt;
int nr;
+ int err_thread;
struct thread_map_data map[];
};
--
2.7.4