Re: [PATCH RFC V2 01/10] perf tools: hashtable for machine threads

From: Arnaldo Carvalho de Melo
Date: Wed Sep 13 2017 - 09:29:31 EST


Em Sun, Sep 10, 2017 at 07:23:14PM -0700, kan.liang@xxxxxxxxx escreveu:
> From: Kan Liang <kan.liang@xxxxxxxxx>
>
> To process any events, it needs to find the thread in the machine first.
> The machine maintains a rb tree to store all threads. The rb tree is
> protected by a rw lock.
> It is not a problem for current perf which serially processing events.
> However, it will have scalability performance issue to process events in
> parallel, especially on a heavy load system which have many threads.
>
> Introduce a hashtable to divide the big rb tree into many samll rb tree
> for threads. The index is thread id % hashtable size. It can reduce the
> lock contention.

Ok, I renamed variables of the type 'struct threads' from 'thread' to
'threads', to reduce confusion when looking at a specific variable as to
its type, so if you see a variable named 'threads', its of type 'struct
threads', if it is 'thread', ditto.

Interdiff from your patch to mine is below, now I'm testing this.

No need to resend anything now, I'll cope with whatever fallout due to
this comes up, if any.

- Arnaldo

diff -u b/tools/perf/util/machine.c b/tools/perf/util/machine.c
--- b/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -33,18 +33,17 @@
pthread_rwlock_init(&dsos->lock, NULL);
}

-static void machine__thread_init(struct machine *machine)
+static void machine__threads_init(struct machine *machine)
{
- struct threads *thread;
int i;

for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- thread = &machine->threads[i];
- thread->entries = RB_ROOT;
- pthread_rwlock_init(&thread->lock, NULL);
- thread->nr = 0;
- INIT_LIST_HEAD(&thread->dead);
- thread->last_match = NULL;
+ struct threads *threads = &machine->threads[i];
+ threads->entries = RB_ROOT;
+ pthread_rwlock_init(&threads->lock, NULL);
+ threads->nr = 0;
+ INIT_LIST_HEAD(&threads->dead);
+ threads->last_match = NULL;
}
}

@@ -55,7 +54,7 @@
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);

- machine__thread_init(machine);
+ machine__threads_init(machine);

machine->vdso_info = NULL;
machine->env = NULL;
@@ -151,27 +150,25 @@

void machine__delete_threads(struct machine *machine)
{
- struct threads *thread;
struct rb_node *nd;
int i;

for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- thread = &machine->threads[i];
- pthread_rwlock_wrlock(&thread->lock);
- nd = rb_first(&thread->entries);
+ struct threads *threads = &machine->threads[i];
+ pthread_rwlock_wrlock(&threads->lock);
+ nd = rb_first(&threads->entries);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);

nd = rb_next(nd);
__machine__remove_thread(machine, t, false);
}
- pthread_rwlock_unlock(&thread->lock);
+ pthread_rwlock_unlock(&threads->lock);
}
}

void machine__exit(struct machine *machine)
{
- struct threads *thread;
int i;

machine__destroy_kernel_maps(machine);
@@ -180,9 +177,10 @@
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
+
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- thread = &machine->threads[i];
- pthread_rwlock_destroy(&thread->lock);
+ struct threads *threads = &machine->threads[i];
+ pthread_rwlock_destroy(&threads->lock);
}
}

@@ -404,8 +402,8 @@
pid_t pid, pid_t tid,
bool create)
{
- struct threads *thread = machine__thread(machine, tid);
- struct rb_node **p = &thread->entries.rb_node;
+ struct threads *threads = machine__thread(machine, tid);
+ struct rb_node **p = &threads->entries.rb_node;
struct rb_node *parent = NULL;
struct thread *th;

@@ -414,14 +412,14 @@
* so most of the time we dont have to look up
* the full rbtree:
*/
- th = thread->last_match;
+ th = threads->last_match;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}

- thread->last_match = NULL;
+ threads->last_match = NULL;
}

while (*p != NULL) {
@@ -429,7 +427,7 @@
th = rb_entry(parent, struct thread, rb_node);

if (th->tid == tid) {
- thread->last_match = th;
+ threads->last_match = th;
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
@@ -446,7 +444,7 @@
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &thread->entries);
+ rb_insert_color(&th->rb_node, &threads->entries);

/*
* We have to initialize map_groups separately
@@ -457,7 +455,7 @@
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &thread->entries);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -466,8 +464,8 @@
* It is now in the rbtree, get a ref
*/
thread__get(th);
- thread->last_match = th;
- ++thread->nr;
+ threads->last_match = th;
+ ++threads->nr;
}

return th;
@@ -481,24 +479,24 @@
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
- struct threads *thread = machine__thread(machine, tid);
+ struct threads *threads = machine__thread(machine, tid);
struct thread *th;

- pthread_rwlock_wrlock(&thread->lock);
+ pthread_rwlock_wrlock(&threads->lock);
th = __machine__findnew_thread(machine, pid, tid);
- pthread_rwlock_unlock(&thread->lock);
+ pthread_rwlock_unlock(&threads->lock);
return th;
}

struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
- struct threads *thread = machine__thread(machine, tid);
+ struct threads *threads = machine__thread(machine, tid);
struct thread *th;

- pthread_rwlock_rdlock(&thread->lock);
+ pthread_rwlock_rdlock(&threads->lock);
th = ____machine__findnew_thread(machine, pid, tid, false);
- pthread_rwlock_unlock(&thread->lock);
+ pthread_rwlock_unlock(&threads->lock);
return th;
}

@@ -745,24 +743,23 @@

size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- struct threads *thread;
struct rb_node *nd;
size_t ret;
int i;

for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- thread = &machine->threads[i];
- pthread_rwlock_rdlock(&thread->lock);
+ struct threads *threads = &machine->threads[i];
+ pthread_rwlock_rdlock(&threads->lock);

- ret = fprintf(fp, "Threads: %u\n", thread->nr);
+ ret = fprintf(fp, "Threads: %u\n", threads->nr);

- for (nd = rb_first(&thread->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);

ret += thread__fprintf(pos, fp);
}

- pthread_rwlock_unlock(&thread->lock);
+ pthread_rwlock_unlock(&threads->lock);
}
return ret;
}
@@ -1509,25 +1506,25 @@

static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
- struct threads *thread = machine__thread(machine, th->tid);
+ struct threads *threads = machine__thread(machine, th->tid);

- if (thread->last_match == th)
- thread->last_match = NULL;
+ if (threads->last_match == th)
+ threads->last_match = NULL;

BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
- pthread_rwlock_wrlock(&thread->lock);
- rb_erase_init(&th->rb_node, &thread->entries);
+ pthread_rwlock_wrlock(&threads->lock);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
- --thread->nr;
+ --threads->nr;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
- list_add_tail(&th->node, &thread->dead);
+ list_add_tail(&th->node, &threads->dead);
if (lock)
- pthread_rwlock_unlock(&thread->lock);
+ pthread_rwlock_unlock(&threads->lock);
thread__put(th);
}