[PATCH RFC 01/10] perf tools: hashtable for machine threads
From: kan . liang
Date: Thu Sep 07 2017 - 13:58:32 EST
From: Kan Liang <kan.liang@xxxxxxxxx>
To process any events, it needs to find the thread in the machine first.
The machine maintains a rb tree to store all threads. The rb tree is
protected by a rw lock.
It is not a problem for current perf which serially processing events.
However, it will have scalability performance issue to process events in
parallel, especially on a heave load system which have many threads.
Introduce a hashtable to divide the big rb tree into many samll rb tree
for threads. The index is thread id % hashtable size. It can reduce the
lock contention.
Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
tools/perf/builtin-trace.c | 19 +++---
tools/perf/util/machine.c | 139 ++++++++++++++++++++++++++++----------------
tools/perf/util/machine.h | 23 ++++++--
tools/perf/util/rb_resort.h | 5 +-
4 files changed, 120 insertions(+), 66 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 771ddab..af4e4e4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
size_t printed = trace__fprintf_threads_header(fp);
struct rb_node *nd;
+ int i;
- if (threads == NULL) {
- fprintf(fp, "%s", "Error sorting output by nr_events!\n");
- return 0;
- }
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
- resort_rb__for_each_entry(nd, threads)
- printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+ if (threads == NULL) {
+ fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+ return 0;
+ }
- resort_rb__delete(threads);
+ resort_rb__for_each_entry(nd, threads)
+ printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+ resort_rb__delete(threads);
+ }
return printed;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index df70936..5e451f9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -33,6 +33,21 @@ static void dsos__init(struct dsos *dsos)
pthread_rwlock_init(&dsos->lock, NULL);
}
+static void machine__thread_init(struct machine *machine)
+{
+ struct machine_th *machine_th;
+ int i;
+
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ machine_th = &machine->threads[i];
+ machine_th->threads = RB_ROOT;
+ pthread_rwlock_init(&machine_th->threads_lock, NULL);
+ machine_th->nr_threads = 0;
+ INIT_LIST_HEAD(&machine_th->dead_threads);
+ machine_th->last_match = NULL;
+ }
+}
+
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
memset(machine, 0, sizeof(*machine));
@@ -40,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
- machine->threads = RB_ROOT;
- pthread_rwlock_init(&machine->threads_lock, NULL);
- machine->nr_threads = 0;
- INIT_LIST_HEAD(&machine->dead_threads);
- machine->last_match = NULL;
+ machine__thread_init(machine);
machine->vdso_info = NULL;
machine->env = NULL;
@@ -140,28 +151,39 @@ static void dsos__exit(struct dsos *dsos)
void machine__delete_threads(struct machine *machine)
{
+ struct machine_th *machine_th;
struct rb_node *nd;
+ int i;
- pthread_rwlock_wrlock(&machine->threads_lock);
- nd = rb_first(&machine->threads);
- while (nd) {
- struct thread *t = rb_entry(nd, struct thread, rb_node);
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ machine_th = &machine->threads[i];
+ pthread_rwlock_wrlock(&machine_th->threads_lock);
+ nd = rb_first(&machine_th->threads);
+ while (nd) {
+ struct thread *t = rb_entry(nd, struct thread, rb_node);
- nd = rb_next(nd);
- __machine__remove_thread(machine, t, false);
+ nd = rb_next(nd);
+ __machine__remove_thread(machine, t, false);
+ }
+ pthread_rwlock_unlock(&machine_th->threads_lock);
}
- pthread_rwlock_unlock(&machine->threads_lock);
}
void machine__exit(struct machine *machine)
{
+ struct machine_th *machine_th;
+ int i;
+
machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
- pthread_rwlock_destroy(&machine->threads_lock);
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ machine_th = &machine->threads[i];
+ pthread_rwlock_destroy(&machine_th->threads_lock);
+ }
}
void machine__delete(struct machine *machine)
@@ -382,7 +404,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
{
- struct rb_node **p = &machine->threads.rb_node;
+ struct machine_th *machine_th = machine_thread(machine, tid);
+ struct rb_node **p = &machine_th->threads.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
@@ -391,14 +414,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* so most of the time we dont have to look up
* the full rbtree:
*/
- th = machine->last_match;
+ th = machine_th->last_match;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
- machine->last_match = NULL;
+ machine_th->last_match = NULL;
}
while (*p != NULL) {
@@ -406,7 +429,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = rb_entry(parent, struct thread, rb_node);
if (th->tid == tid) {
- machine->last_match = th;
+ machine_th->last_match = th;
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
@@ -423,7 +446,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &machine->threads);
+ rb_insert_color(&th->rb_node, &machine_th->threads);
/*
* We have to initialize map_groups separately
@@ -434,7 +457,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &machine->threads);
+ rb_erase_init(&th->rb_node, &machine_th->threads);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -443,8 +466,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* It is now in the rbtree, get a ref
*/
thread__get(th);
- machine->last_match = th;
- ++machine->nr_threads;
+ machine_th->last_match = th;
+ ++machine_th->nr_threads;
}
return th;
@@ -458,21 +481,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct machine_th *machine_th = machine_thread(machine, tid);
struct thread *th;
- pthread_rwlock_wrlock(&machine->threads_lock);
+ pthread_rwlock_wrlock(&machine_th->threads_lock);
th = __machine__findnew_thread(machine, pid, tid);
- pthread_rwlock_unlock(&machine->threads_lock);
+ pthread_rwlock_unlock(&machine_th->threads_lock);
return th;
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct machine_th *machine_th = machine_thread(machine, tid);
struct thread *th;
- pthread_rwlock_rdlock(&machine->threads_lock);
+
+ pthread_rwlock_rdlock(&machine_th->threads_lock);
th = ____machine__findnew_thread(machine, pid, tid, false);
- pthread_rwlock_unlock(&machine->threads_lock);
+ pthread_rwlock_unlock(&machine_th->threads_lock);
return th;
}
@@ -719,21 +745,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- size_t ret;
+ struct machine_th *machine_th;
struct rb_node *nd;
+ size_t ret;
+ int i;
- pthread_rwlock_rdlock(&machine->threads_lock);
-
- ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ machine_th = &machine->threads[i];
+ pthread_rwlock_rdlock(&machine_th->threads_lock);
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- struct thread *pos = rb_entry(nd, struct thread, rb_node);
+ ret = fprintf(fp, "Threads: %u\n", machine_th->nr_threads);
- ret += thread__fprintf(pos, fp);
- }
+ for (nd = rb_first(&machine_th->threads); nd; nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread, rb_node);
- pthread_rwlock_unlock(&machine->threads_lock);
+ ret += thread__fprintf(pos, fp);
+ }
+ pthread_rwlock_unlock(&machine_th->threads_lock);
+ }
return ret;
}
@@ -1479,23 +1509,25 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
- if (machine->last_match == th)
- machine->last_match = NULL;
+ struct machine_th *machine_th = machine_thread(machine, th->tid);
+
+ if (machine_th->last_match == th)
+ machine_th->last_match = NULL;
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
- pthread_rwlock_wrlock(&machine->threads_lock);
- rb_erase_init(&th->rb_node, &machine->threads);
+ pthread_rwlock_wrlock(&machine_th->threads_lock);
+ rb_erase_init(&th->rb_node, &machine_th->threads);
RB_CLEAR_NODE(&th->rb_node);
- --machine->nr_threads;
+ --machine_th->nr_threads;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
- list_add_tail(&th->node, &machine->dead_threads);
+ list_add_tail(&th->node, &machine_th->dead_threads);
if (lock)
- pthread_rwlock_unlock(&machine->threads_lock);
+ pthread_rwlock_unlock(&machine_th->threads_lock);
thread__put(th);
}
@@ -2140,21 +2172,26 @@ int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv)
{
+ struct machine_th *machine_th;
struct rb_node *nd;
struct thread *thread;
int rc = 0;
+ int i;
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- thread = rb_entry(nd, struct thread, rb_node);
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
- }
+ for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) {
+ machine_th = &machine->threads[i];
+ for (nd = rb_first(&machine_th->threads); nd; nd = rb_next(nd)) {
+ thread = rb_entry(nd, struct thread, rb_node);
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
- list_for_each_entry(thread, &machine->dead_threads, node) {
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
+ list_for_each_entry(thread, &machine_th->dead_threads, node) {
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
}
return rc;
}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 3cdb134..745a0ca 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[];
struct vdso_info;
+#define MACHINE_TH_TABLE_BITS 8
+#define MACHINE_TH_TABLE_SIZE (1 << MACHINE_TH_TABLE_BITS)
+
+struct machine_th {
+ struct rb_root threads;
+ pthread_rwlock_t threads_lock;
+ unsigned int nr_threads;
+ struct list_head dead_threads;
+ struct thread *last_match;
+};
+
struct machine {
struct rb_node rb_node;
pid_t pid;
@@ -30,11 +41,7 @@ struct machine {
bool comm_exec;
bool kptr_restrict_warned;
char *root_dir;
- struct rb_root threads;
- pthread_rwlock_t threads_lock;
- unsigned int nr_threads;
- struct list_head dead_threads;
- struct thread *last_match;
+ struct machine_th threads[MACHINE_TH_TABLE_SIZE];
struct vdso_info *vdso_info;
struct perf_env *env;
struct dsos dsos;
@@ -49,6 +56,12 @@ struct machine {
};
static inline
+struct machine_th *machine_thread(struct machine *machine, pid_t tid)
+{
+ return &machine->threads[tid % MACHINE_TH_TABLE_SIZE];
+}
+
+static inline
struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
{
return machine->vmlinux_maps[type];
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 808cc45..bbd78fa 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
__ilist->rblist.nr_entries)
/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
- DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, tid) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[tid].threads, \
+ __machine->threads[tid].nr_threads)
#endif /* _PERF_RESORT_RB_H_ */
--
2.5.5