Re: [PATCH] perf, tools, script: Skip locking overhead in perf script
From: Arnaldo Carvalho de Melo
Date: Tue Apr 04 2017 - 12:17:51 EST
Em Thu, Mar 23, 2017 at 05:28:07PM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> I was tired of seeing ~10% locking overhead in perf script while
> doing larger PT decodes. perf script doesn't need any locking
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f53f449d864d..b404644f559a 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
Huh, should this be builtin-script? Anyway, below would be a more
complete patch, but I couldn't find improvement at least with a 50M
perf.data file with intel_pt data... will try some more later.
- Arnaldo
commit 42ee7ffd3f88047839837d9b35eef23740a86429
Author: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Date: Tue Apr 4 13:15:04 2017 -0300
WIP
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 46acc8ece41f..e6262d36f0de 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2493,6 +2493,8 @@ int cmd_script(int argc, const char **argv)
NULL
};
+ perf_set_singlethreaded();
+
setup_scripting();
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5c0ea11a8f0a..9319291414a5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -75,6 +75,7 @@ libperf-y += data.o
libperf-y += tsc.o
libperf-y += cloexec.o
libperf-y += call-path.o
+libperf-y += rwsem.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c1870ac365a3..08dd01fd2b44 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -476,7 +476,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
static void maps__init(struct maps *maps)
{
maps->entries = RB_ROOT;
- pthread_rwlock_init(&maps->lock, NULL);
+ init_rwsem(&maps->lock);
}
void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -505,9 +505,9 @@ static void __maps__purge(struct maps *maps)
static void maps__exit(struct maps *maps)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__purge(maps);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
void map_groups__exit(struct map_groups *mg)
@@ -574,7 +574,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
struct symbol *sym;
struct rb_node *nd;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -590,7 +590,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
sym = NULL;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return sym;
}
@@ -626,7 +626,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
size_t printed = 0;
struct rb_node *nd;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
}
}
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return printed;
}
@@ -670,7 +670,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
struct rb_node *next;
int err = 0;
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
root = &maps->entries;
next = rb_first(root);
@@ -738,7 +738,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
err = 0;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
return err;
}
@@ -759,7 +759,7 @@ int map_groups__clone(struct thread *thread,
struct map *map;
struct maps *maps = &parent->maps[type];
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
struct map *new = map__clone(map);
@@ -776,7 +776,7 @@ int map_groups__clone(struct thread *thread,
err = 0;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return err;
}
@@ -803,9 +803,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
void maps__insert(struct maps *maps, struct map *map)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__insert(maps, map);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
static void __maps__remove(struct maps *maps, struct map *map)
@@ -816,9 +816,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
void maps__remove(struct maps *maps, struct map *map)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__remove(maps, map);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
struct map *maps__find(struct maps *maps, u64 ip)
@@ -826,7 +826,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
struct rb_node **p, *parent = NULL;
struct map *m;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
p = &maps->entries.rb_node;
while (*p != NULL) {
@@ -842,7 +842,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
m = NULL;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return m;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c8a5a644c0a9..fd76534a7807 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -9,6 +9,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <linux/types.h>
+#include "rwsem.h"
enum map_type {
MAP__FUNCTION = 0,
@@ -61,7 +62,7 @@ struct kmap {
struct maps {
struct rb_root entries;
- pthread_rwlock_t lock;
+ struct rw_semaphore lock;
};
struct map_groups {
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000000..02b0f18b96eb
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,27 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000000..c06afd42dc11
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,18 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 9b4d8ba22fed..d3caa5e5f845 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -210,7 +210,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
struct maps *maps = &mg->maps[type];
struct map *next, *curr;
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
curr = maps__first(maps);
if (curr == NULL)
@@ -228,7 +228,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
curr->end = ~0ULL;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1561,7 +1561,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
struct maps *maps = &mg->maps[type];
struct map *map;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1571,7 +1571,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
map = NULL;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return map;
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index dcdb87a5d0a1..4da6e776a189 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -259,7 +259,7 @@ static int __thread__prepare_access(struct thread *thread)
struct maps *maps = &thread->mg->maps[i];
struct map *map;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
err = unwind__prepare_access(thread, map, &initialized);
@@ -267,7 +267,7 @@ static int __thread__prepare_access(struct thread *thread)
break;
}
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
}
return err;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d8b45cea54d0..a016880a55e6 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -20,6 +20,13 @@
#include "callchain.h"
#include "strlist.h"
+bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void)
+{
+ perf_singlethreaded = true;
+}
+
#define CALLCHAIN_PARAM_DEFAULT \
.mode = CHAIN_GRAPH_ABS, \
.min_percent = 0.5, \
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7cf5752b38fd..e59aa319a2e1 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -380,4 +380,7 @@ struct inline_node {
struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr);
void inline_node__delete(struct inline_node *node);
+extern bool perf_singlethreaded;
+void perf_set_singlethreaded(void);
+
#endif /* GIT_COMPAT_UTIL_H */