Re: [PATCH 3/3] perf cpumap: Add reference count checking
From: Arnaldo Carvalho de Melo
Date: Sat Jan 22 2022 - 15:12:02 EST
Em Fri, Jan 21, 2022 at 08:58:11PM -0800, Ian Rogers escreveu:
> Enabled when REFCNT_CHECKING is defined. The change adds a memory
> allocated pointer that is interposed between the reference counted
> cpu map at a get and freed by a put. The pointer replaces the original
> perf_cpu_map struct, so use of the perf_cpu_map via APIs remains
> unchanged. Any use of the cpu map without the API requires two versions,
> typically handled via an UNWRAP macro.
>
> This change is intended to catch:
> - use after put: using a cpumap after you have put it will cause a
> segv.
> - unbalanced puts: two puts for a get will result in a double free
> that can be captured and reported by tools like address sanitizer,
> including with the associated stack traces of allocation and frees.
> - missing puts: if a put is missing then the get turns into a memory
> leak that can be reported by leak sanitizer, including the stack
> trace at the point the get occurs.
I'll take a look at this after testing/pushing the perf/core queue to Linus.
- Arnaldo
> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> ---
> tools/lib/perf/cpumap.c | 120 ++++++++++++++++-------
> tools/lib/perf/include/internal/cpumap.h | 14 ++-
> tools/perf/tests/cpumap.c | 20 ++--
> tools/perf/util/cpumap.c | 42 ++++----
> tools/perf/util/pmu.c | 24 +++--
> 5 files changed, 146 insertions(+), 74 deletions(-)
>
> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> index ee66760f1e63..d401d133f84b 100644
> --- a/tools/lib/perf/cpumap.c
> +++ b/tools/lib/perf/cpumap.c
> @@ -10,10 +10,16 @@
> #include <ctype.h>
> #include <limits.h>
>
> -static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
> +#ifndef REFCNT_CHECKING
> +#define UNWRAP_MAP(x) x
> +#else
> +#define UNWRAP_MAP(x) x->orig
> +#endif
> +
> +#ifndef REFCNT_CHECKING
> +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
> {
> struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
> -
> if (cpus != NULL) {
> cpus->nr = nr_cpus;
> refcount_set(&cpus->refcnt, 1);
> @@ -21,13 +27,31 @@ static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
> }
> return cpus;
> }
> +#else
> +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
> +{
> + struct perf_cpu_map *wrapped_cpus = NULL;
> + struct original_perf_cpu_map *cpus =
> + malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
> + if (cpus != NULL) {
> + cpus->nr = nr_cpus;
> + refcount_set(&cpus->refcnt, 1);
> + wrapped_cpus = malloc(sizeof(*wrapped_cpus));
> + if (wrapped_cpus != NULL)
> + wrapped_cpus->orig = cpus;
> + else
> + free(cpus);
> + }
> + return wrapped_cpus;
> +}
> +#endif
>
> struct perf_cpu_map *perf_cpu_map__dummy_new(void)
> {
> struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
>
> if (cpus)
> - cpus->map[0].cpu = -1;
> + UNWRAP_MAP(cpus)->map[0].cpu = -1;
>
> return cpus;
> }
> @@ -35,23 +59,45 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void)
> static void cpu_map__delete(struct perf_cpu_map *map)
> {
> if (map) {
> - WARN_ONCE(refcount_read(&map->refcnt) != 0,
> + WARN_ONCE(refcount_read(&UNWRAP_MAP(map)->refcnt) != 0,
> "cpu_map refcnt unbalanced\n");
> +#ifdef REFCNT_CHECKING
> + free(map->orig);
> + map->orig = NULL;
> +#endif
> free(map);
> }
> }
>
> struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
> {
> - if (map)
> - refcount_inc(&map->refcnt);
> + if (map) {
> +#ifdef REFCNT_CHECKING
> + struct perf_cpu_map *new_wrapper;
> +#endif
> + refcount_inc(&UNWRAP_MAP(map)->refcnt);
> +#ifdef REFCNT_CHECKING
> + new_wrapper = malloc(sizeof(*new_wrapper));
> + new_wrapper->orig = map->orig;
> + map = new_wrapper;
> +#endif
> + }
> return map;
> }
>
> void perf_cpu_map__put(struct perf_cpu_map *map)
> {
> - if (map && refcount_dec_and_test(&map->refcnt))
> - cpu_map__delete(map);
> + if (map) {
> + if (refcount_dec_and_test(&UNWRAP_MAP(map)->refcnt))
> + cpu_map__delete(map);
> + else {
> +#ifdef REFCNT_CHECKING
> + /* Free the wrapper object but the reference counted object remains. */
> + map->orig = NULL;
> + free(map);
> +#endif
> + }
> + }
> }
>
> static struct perf_cpu_map *cpu_map__default_new(void)
> @@ -68,7 +114,7 @@ static struct perf_cpu_map *cpu_map__default_new(void)
> int i;
>
> for (i = 0; i < nr_cpus; ++i)
> - cpus->map[i].cpu = i;
> + UNWRAP_MAP(cpus)->map[i].cpu = i;
> }
>
> return cpus;
> @@ -94,15 +140,16 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
> int i, j;
>
> if (cpus != NULL) {
> - memcpy(cpus->map, tmp_cpus, payload_size);
> - qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
> + memcpy(UNWRAP_MAP(cpus)->map, tmp_cpus, payload_size);
> + qsort(UNWRAP_MAP(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
> /* Remove dups */
> j = 0;
> for (i = 0; i < nr_cpus; i++) {
> - if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
> - cpus->map[j++].cpu = cpus->map[i].cpu;
> + if (i == 0 ||
> + UNWRAP_MAP(cpus)->map[i].cpu != UNWRAP_MAP(cpus)->map[i - 1].cpu)
> + UNWRAP_MAP(cpus)->map[j++].cpu = UNWRAP_MAP(cpus)->map[i].cpu;
> }
> - cpus->nr = j;
> + UNWRAP_MAP(cpus)->nr = j;
> assert(j <= nr_cpus);
> }
> return cpus;
> @@ -263,20 +310,20 @@ struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
> .cpu = -1
> };
>
> - if (cpus && idx < cpus->nr)
> - return cpus->map[idx];
> + if (cpus && idx < UNWRAP_MAP(cpus)->nr)
> + return UNWRAP_MAP(cpus)->map[idx];
>
> return result;
> }
>
> int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
> {
> - return cpus ? cpus->nr : 1;
> + return cpus ? UNWRAP_MAP(cpus)->nr : 1;
> }
>
> bool perf_cpu_map__empty(const struct perf_cpu_map *map)
> {
> - return map ? map->map[0].cpu == -1 : true;
> + return map ? UNWRAP_MAP(map)->map[0].cpu == -1 : true;
> }
>
> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> @@ -287,10 +334,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> return -1;
>
> low = 0;
> - high = cpus->nr;
> + high = UNWRAP_MAP(cpus)->nr;
> while (low < high) {
> int idx = (low + high) / 2;
> - struct perf_cpu cpu_at_idx = cpus->map[idx];
> + struct perf_cpu cpu_at_idx = UNWRAP_MAP(cpus)->map[idx];
>
> if (cpu_at_idx.cpu == cpu.cpu)
> return idx;
> @@ -316,7 +363,7 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
> };
>
> // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
> - return map->nr > 0 ? map->map[map->nr - 1] : result;
> + return UNWRAP_MAP(map)->nr > 0 ? UNWRAP_MAP(map)->map[UNWRAP_MAP(map)->nr - 1] : result;
> }
>
> /*
> @@ -337,37 +384,36 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
>
> if (!orig && !other)
> return NULL;
> - if (!orig) {
> - perf_cpu_map__get(other);
> - return other;
> - }
> + if (!orig)
> + return perf_cpu_map__get(other);
> if (!other)
> return orig;
> - if (orig->nr == other->nr &&
> - !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
> + if (UNWRAP_MAP(orig)->nr == UNWRAP_MAP(other)->nr &&
> + !memcmp(UNWRAP_MAP(orig)->map, UNWRAP_MAP(other)->map,
> + UNWRAP_MAP(orig)->nr * sizeof(struct perf_cpu)))
> return orig;
>
> - tmp_len = orig->nr + other->nr;
> + tmp_len = UNWRAP_MAP(orig)->nr + UNWRAP_MAP(other)->nr;
> tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
> if (!tmp_cpus)
> return NULL;
>
> /* Standard merge algorithm from wikipedia */
> i = j = k = 0;
> - while (i < orig->nr && j < other->nr) {
> - if (orig->map[i].cpu <= other->map[j].cpu) {
> - if (orig->map[i].cpu == other->map[j].cpu)
> + while (i < UNWRAP_MAP(orig)->nr && j < UNWRAP_MAP(other)->nr) {
> + if (UNWRAP_MAP(orig)->map[i].cpu <= UNWRAP_MAP(other)->map[j].cpu) {
> + if (UNWRAP_MAP(orig)->map[i].cpu == UNWRAP_MAP(other)->map[j].cpu)
> j++;
> - tmp_cpus[k++] = orig->map[i++];
> + tmp_cpus[k++] = UNWRAP_MAP(orig)->map[i++];
> } else
> - tmp_cpus[k++] = other->map[j++];
> + tmp_cpus[k++] = UNWRAP_MAP(other)->map[j++];
> }
>
> - while (i < orig->nr)
> - tmp_cpus[k++] = orig->map[i++];
> + while (i < UNWRAP_MAP(orig)->nr)
> + tmp_cpus[k++] = UNWRAP_MAP(orig)->map[i++];
>
> - while (j < other->nr)
> - tmp_cpus[k++] = other->map[j++];
> + while (j < UNWRAP_MAP(other)->nr)
> + tmp_cpus[k++] = UNWRAP_MAP(other)->map[j++];
> assert(k <= tmp_len);
>
> merged = cpu_map__trim_new(k, tmp_cpus);
> diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
> index 581f9ffb4237..64ad56d167a0 100644
> --- a/tools/lib/perf/include/internal/cpumap.h
> +++ b/tools/lib/perf/include/internal/cpumap.h
> @@ -16,7 +16,12 @@ struct perf_cpu {
> * gaps if CPU numbers were used. For events associated with a pid, rather than
> * a CPU, a single dummy map with an entry of -1 is used.
> */
> -struct perf_cpu_map {
> +#ifndef REFCNT_CHECKING
> +struct perf_cpu_map
> +#else
> +struct original_perf_cpu_map
> +#endif
> +{
> refcount_t refcnt;
> /** Length of the map array. */
> int nr;
> @@ -24,10 +29,17 @@ struct perf_cpu_map {
> struct perf_cpu map[];
> };
>
> +#ifdef REFCNT_CHECKING
> +struct perf_cpu_map {
> + struct original_perf_cpu_map *orig;
> +};
> +#endif
> +
> #ifndef MAX_NR_CPUS
> #define MAX_NR_CPUS 2048
> #endif
>
> +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
>
> #endif /* __LIBPERF_INTERNAL_CPUMAP_H */
> diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
> index 84e87e31f119..65d9546aec6e 100644
> --- a/tools/perf/tests/cpumap.c
> +++ b/tools/perf/tests/cpumap.c
> @@ -11,6 +11,12 @@
>
> struct machine;
>
> +#ifndef REFCNT_CHECKING
> +#define UNWRAP_CPUMAP(x) x
> +#else
> +#define UNWRAP_CPUMAP(x) x->orig
> +#endif
> +
> static int process_event_mask(struct perf_tool *tool __maybe_unused,
> union perf_event *event,
> struct perf_sample *sample __maybe_unused,
> @@ -35,10 +41,10 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
> }
>
> map = cpu_map__new_data(data);
> - TEST_ASSERT_VAL("wrong nr", map->nr == 20);
> + TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 20);
>
> for (i = 0; i < 20; i++) {
> - TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i);
> + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i).cpu == i);
> }
>
> perf_cpu_map__put(map);
> @@ -66,10 +72,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
> TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256);
>
> map = cpu_map__new_data(data);
> - TEST_ASSERT_VAL("wrong nr", map->nr == 2);
> - TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1);
> - TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256);
> - TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
> + TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 2);
> + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 1);
> + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 1).cpu == 256);
> + TEST_ASSERT_VAL("wrong refcnt", refcount_read(&UNWRAP_CPUMAP(map)->refcnt) == 1);
> perf_cpu_map__put(map);
> return 0;
> }
> @@ -130,7 +136,7 @@ static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subte
> struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
> char buf[100];
>
> - TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5);
> + TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(c) == 5);
> cpu_map__snprint(c, buf, sizeof(buf));
> TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
> perf_cpu_map__put(b);
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 12b2243222b0..e9976ca238fc 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -22,6 +22,12 @@ static int max_node_num;
> */
> static int *cpunode_map;
>
> +#ifndef REFCNT_CHECKING
> +#define UNWRAP_MAP(x) x
> +#else
> +#define UNWRAP_MAP(x) x->orig
> +#endif
> +
> static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
> {
> struct perf_cpu_map *map;
> @@ -37,9 +43,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
> * otherwise it would become 65535.
> */
> if (cpus->cpu[i] == (u16) -1)
> - map->map[i].cpu = -1;
> + UNWRAP_MAP(map)->map[i].cpu = -1;
> else
> - map->map[i].cpu = (int) cpus->cpu[i];
> + UNWRAP_MAP(map)->map[i].cpu = (int) cpus->cpu[i];
> }
> }
>
> @@ -58,7 +64,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map
> int cpu, i = 0;
>
> for_each_set_bit(cpu, mask->mask, nbits)
> - map->map[i++].cpu = cpu;
> + UNWRAP_MAP(map)->map[i++].cpu = cpu;
> }
> return map;
>
> @@ -84,16 +90,13 @@ size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
>
> struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
> {
> - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
> + struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr);
>
> if (cpus != NULL) {
> int i;
>
> - cpus->nr = nr;
> for (i = 0; i < nr; i++)
> - cpus->map[i].cpu = -1;
> -
> - refcount_set(&cpus->refcnt, 1);
> + UNWRAP_MAP(cpus)->map[i].cpu = -1;
> }
>
> return cpus;
> @@ -163,7 +166,7 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
> {
> int idx;
> struct perf_cpu cpu;
> - struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
> + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(perf_cpu_map__nr(cpus));
>
> if (!c)
> return NULL;
> @@ -187,7 +190,7 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
> }
> }
> /* Trim. */
> - if (c->nr != cpus->nr) {
> + if (c->nr != perf_cpu_map__nr(cpus)) {
> struct cpu_aggr_map *trimmed_c =
> realloc(c,
> sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
> @@ -494,31 +497,32 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
>
> #define COMMA first ? "" : ","
>
> - for (i = 0; i < map->nr + 1; i++) {
> + for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> struct perf_cpu cpu = { .cpu = INT_MAX };
> - bool last = i == map->nr;
> + bool last = i == perf_cpu_map__nr(map);
>
> if (!last)
> - cpu = map->map[i];
> + cpu = perf_cpu_map__cpu(map, i);
>
> if (start == -1) {
> start = i;
> if (last) {
> ret += snprintf(buf + ret, size - ret,
> "%s%d", COMMA,
> - map->map[i].cpu);
> + perf_cpu_map__cpu(map, i).cpu);
> }
> - } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
> + } else if (((i - start) != (cpu.cpu - perf_cpu_map__cpu(map, start).cpu)) || last) {
> int end = i - 1;
>
> if (start == end) {
> ret += snprintf(buf + ret, size - ret,
> "%s%d", COMMA,
> - map->map[start].cpu);
> + perf_cpu_map__cpu(map, start).cpu);
> } else {
> ret += snprintf(buf + ret, size - ret,
> "%s%d-%d", COMMA,
> - map->map[start].cpu, map->map[end].cpu);
> + perf_cpu_map__cpu(map, start).cpu,
> + perf_cpu_map__cpu(map, end).cpu);
> }
> first = false;
> start = i;
> @@ -545,7 +549,7 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
> int i, cpu;
> char *ptr = buf;
> unsigned char *bitmap;
> - struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
> + struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
>
> if (buf == NULL)
> return 0;
> @@ -556,7 +560,7 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
> return 0;
> }
>
> - for (i = 0; i < map->nr; i++) {
> + for (i = 0; i < perf_cpu_map__nr(map); i++) {
> cpu = perf_cpu_map__cpu(map, i).cpu;
> bitmap[cpu / 8] |= 1 << (cpu % 8);
> }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 8dfbba15aeb8..1649321fe86f 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -1992,13 +1992,20 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
> return 0;
> }
>
> +#ifndef REFCNT_CHECKING
> +#define UNWRAP_CPUMAP(x) x
> +#else
> +#define UNWRAP_CPUMAP(x) x->orig
> +#endif
> +
> int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
> struct perf_cpu_map **mcpus_ptr,
> struct perf_cpu_map **ucpus_ptr)
> {
> struct perf_cpu_map *pmu_cpus = pmu->cpus;
> struct perf_cpu_map *matched_cpus, *unmatched_cpus;
> - int matched_nr = 0, unmatched_nr = 0;
> + struct perf_cpu cpu;
> + int i, matched_nr = 0, unmatched_nr = 0;
>
> matched_cpus = perf_cpu_map__default_new();
> if (!matched_cpus)
> @@ -2010,18 +2017,15 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
> return -1;
> }
>
> - for (int i = 0; i < cpus->nr; i++) {
> - int cpu;
> -
> - cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]);
> - if (cpu == -1)
> - unmatched_cpus->map[unmatched_nr++] = cpus->map[i];
> + perf_cpu_map__for_each_cpu(cpu, i, cpus) {
> + if (!perf_cpu_map__has(pmu_cpus, cpu))
> + UNWRAP_CPUMAP(unmatched_cpus)->map[unmatched_nr++] = cpu;
> else
> - matched_cpus->map[matched_nr++] = cpus->map[i];
> + UNWRAP_CPUMAP(matched_cpus)->map[matched_nr++] = cpu;
> }
>
> - unmatched_cpus->nr = unmatched_nr;
> - matched_cpus->nr = matched_nr;
> + UNWRAP_CPUMAP(unmatched_cpus)->nr = unmatched_nr;
> + UNWRAP_CPUMAP(matched_cpus)->nr = matched_nr;
> *mcpus_ptr = matched_cpus;
> *ucpus_ptr = unmatched_cpus;
> return 0;
> --
> 2.35.0.rc0.227.g00780c9af4-goog
--
- Arnaldo