[PATCH v1 1/2] perf callchain: Fix stitch LBR memory leaks

From: Ian Rogers
Date: Thu Aug 08 2024 - 01:47:01 EST


The callchain_cursor_node has a map_symbol whose maps and map
variables are reference counted. Ensure these values use a _get
routine to increment the refernece counts and use map_symbol__exit to
release the reference counts. Do similar for thread's prev_lbr_cursor,
but save the size of the prev_lbr_cursor array so that it may be
iterated.

Ensure that when stitch_nodes are placed on the free list the
map_symbols are exited. Fix resolve_lbr_callchain_sample by replacing
list_replace_init to list_splice_init, so the whole list is moved and
nodes aren't leaked.

A reproduction of the memory leaks is possible with a leak sanitizer
build in the perf report command of:
```
$ perf record -e cycles --call-graph lbr perf test -w thloop
$ perf report --stitch-lbr
```

Fixes: ff165628d726 ("perf callchain: Stitch LBR call stack")
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/util/machine.c | 17 +++++++++++++++--
tools/perf/util/thread.c | 4 ++++
tools/perf/util/thread.h | 1 +
3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8477edefc299..706be5e4a076 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2270,8 +2270,12 @@ static void save_lbr_cursor_node(struct thread *thread,
cursor->curr = cursor->first;
else
cursor->curr = cursor->curr->next;
+
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[idx].ms);
memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
sizeof(struct callchain_cursor_node));
+ lbr_stitch->prev_lbr_cursor[idx].ms.maps = maps__get(cursor->curr->ms.maps);
+ lbr_stitch->prev_lbr_cursor[idx].ms.map = map__get(cursor->curr->ms.map);

lbr_stitch->prev_lbr_cursor[idx].valid = true;
cursor->pos++;
@@ -2482,6 +2486,9 @@ static bool has_stitched_lbr(struct thread *thread,
memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
sizeof(struct callchain_cursor_node));

+ stitch_node->cursor.ms.maps = maps__get(lbr_stitch->prev_lbr_cursor[i].ms.maps);
+ stitch_node->cursor.ms.map = map__get(lbr_stitch->prev_lbr_cursor[i].ms.map);
+
if (callee)
list_add(&stitch_node->node, &lbr_stitch->lists);
else
@@ -2505,6 +2512,8 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
if (!thread__lbr_stitch(thread)->prev_lbr_cursor)
goto free_lbr_stitch;

+ thread__lbr_stitch(thread)->prev_lbr_cursor_size = max_lbr + 1;
+
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists);
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists);

@@ -2560,8 +2569,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
max_lbr, callee);

if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
- list_replace_init(&lbr_stitch->lists,
- &lbr_stitch->free_lists);
+ struct stitch_list *stitch_node;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node)
+ map_symbol__exit(&stitch_node->cursor.ms);
+
+ list_splice_init(&lbr_stitch->lists, &lbr_stitch->free_lists);
}
memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 87c59aa9fe38..0ffdd52d86d7 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -476,6 +476,7 @@ void thread__free_stitch_list(struct thread *thread)
return;

list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ map_symbol__exit(&pos->cursor.ms);
list_del_init(&pos->node);
free(pos);
}
@@ -485,6 +486,9 @@ void thread__free_stitch_list(struct thread *thread)
free(pos);
}

+ for (unsigned int i = 0 ; i < lbr_stitch->prev_lbr_cursor_size; i++)
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[i].ms);
+
zfree(&lbr_stitch->prev_lbr_cursor);
free(thread__lbr_stitch(thread));
thread__set_lbr_stitch(thread, NULL);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 8b4a3c69bad1..6cbf6eb2812e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,6 +26,7 @@ struct lbr_stitch {
struct list_head free_lists;
struct perf_sample prev_sample;
struct callchain_cursor_node *prev_lbr_cursor;
+ unsigned int prev_lbr_cursor_size;
};

DECLARE_RC_STRUCT(thread) {
--
2.46.0.rc2.264.g509ed76dc8-goog