Re: [PATCH V2 2/5] perf mem: Clean up perf_mem_events__ptr()

From: Leo Yan
Date: Fri Dec 08 2023 - 23:31:52 EST


On Thu, Dec 07, 2023 at 11:23:35AM -0800, kan.liang@xxxxxxxxxxxxxxx wrote:
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>
> The mem_events can be retrieved from the struct perf_pmu now. An ARCH
> specific perf_mem_events__ptr() is not required anymore. Remove all of
> them.
>
> The Intel hybrid has multiple mem-events-supported PMUs. But they share
> the same mem_events. Other ARCHs only support one mem-events-supported
> PMU. In the configuration, it's good enough to only configure the
> mem_events for one PMU. Add perf_mem_events_find_pmu() which returns the
> first mem-events-supported PMU.
>
> In the perf_mem_events__init(), the perf_pmus__scan() is not required
> anymore. It avoids checking the sysfs for every PMU on the system.
>
> Make the perf_mem_events__record_args() more generic. Remove the
> perf_mem_events__print_unsupport_hybrid().
>
> Since pmu is added as a new parameter, rename perf_mem_events__ptr() to
> perf_pmu__mem_events_ptr(). Several other functions also do a similar
> rename.
>
> Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
> Tested-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> ---
> tools/perf/arch/arm64/util/mem-events.c | 10 +--
> tools/perf/arch/x86/util/mem-events.c | 18 ++---
> tools/perf/builtin-c2c.c | 28 +++++--
> tools/perf/builtin-mem.c | 28 +++++--
> tools/perf/util/mem-events.c | 103 ++++++++++++------------
> tools/perf/util/mem-events.h | 9 ++-
> 6 files changed, 104 insertions(+), 92 deletions(-)
>
> diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
> index aaa4804922b4..2602e8688727 100644
> --- a/tools/perf/arch/arm64/util/mem-events.c
> +++ b/tools/perf/arch/arm64/util/mem-events.c
> @@ -12,17 +12,9 @@ struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = {
>
> static char mem_ev_name[100];
>
> -struct perf_mem_event *perf_mem_events__ptr(int i)
> -{
> - if (i >= PERF_MEM_EVENTS__MAX)
> - return NULL;
> -
> - return &perf_mem_events_arm[i];
> -}
> -
> const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
> {
> - struct perf_mem_event *e = perf_mem_events__ptr(i);
> + struct perf_mem_event *e = &perf_mem_events_arm[i];
>
> if (i >= PERF_MEM_EVENTS__MAX)
> return NULL;

Nitpick: it's good to check if 'i' is a valid value and then access the
array with a valid index.

if (i >= PERF_MEM_EVENTS__MAX)
return NULL;

e = &perf_mem_events_arm[i];

> diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
> index 2b81d229982c..5fb41d50118d 100644
> --- a/tools/perf/arch/x86/util/mem-events.c
> +++ b/tools/perf/arch/x86/util/mem-events.c
> @@ -28,17 +28,6 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
> E("mem-ldst", "ibs_op//", "ibs_op"),
> };
>
> -struct perf_mem_event *perf_mem_events__ptr(int i)
> -{
> - if (i >= PERF_MEM_EVENTS__MAX)
> - return NULL;
> -
> - if (x86__is_amd_cpu())
> - return &perf_mem_events_amd[i];
> -
> - return &perf_mem_events_intel[i];
> -}
> -
> bool is_mem_loads_aux_event(struct evsel *leader)
> {
> struct perf_pmu *pmu = perf_pmus__find("cpu");
> @@ -54,7 +43,12 @@ bool is_mem_loads_aux_event(struct evsel *leader)
>
> const char *perf_mem_events__name(int i, const char *pmu_name)
> {
> - struct perf_mem_event *e = perf_mem_events__ptr(i);
> + struct perf_mem_event *e;

A nitpick as well:

Given perf's mem/c2c, callers will almostly invoke a valid index via the
argument 'i', but I still think here is a best place to return NULL
pointer for an invalid index rather than returning a wild pointer.

Thus I suggest to apply checking for x86 and other archs:

if (i >= PERF_MEM_EVENTS__MAX)
return NULL;

> +
> + if (x86__is_amd_cpu())
> + e = &perf_mem_events_amd[i];
> + else
> + e = &perf_mem_events_intel[i];
>
> if (!e)
> return NULL;

[...]

> int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
> char **rec_tmp, int *tmp_nr)
> {
> const char *mnt = sysfs__mount();
> + struct perf_pmu *pmu = NULL;
> int i = *argv_nr, k = 0;
> struct perf_mem_event *e;
>
> - for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
> - e = perf_mem_events__ptr(j);
> - if (!e->record)
> - continue;
>
> - if (perf_pmus__num_mem_pmus() == 1) {
> - if (!e->supported) {
> - pr_err("failed: event '%s' not supported\n",
> - perf_mem_events__name(j, NULL));
> - return -1;
> - }
> + while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
> + for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
> + e = perf_pmu__mem_events_ptr(pmu, j);
>
> - rec_argv[i++] = "-e";
> - rec_argv[i++] = perf_mem_events__name(j, NULL);
> - } else {
> - struct perf_pmu *pmu = NULL;
> + if (!e->record)
> + continue;
>
> if (!e->supported) {
> - perf_mem_events__print_unsupport_hybrid(e, j);
> + pr_err("failed: event '%s' not supported\n",
> + perf_mem_events__name(j, pmu->name));
> return -1;
> }
>
> - while ((pmu = perf_pmus__scan(pmu)) != NULL) {
> + if (perf_pmus__num_mem_pmus() == 1) {
> + rec_argv[i++] = "-e";
> + rec_argv[i++] = perf_mem_events__name(j, NULL);
> + } else {
> const char *s = perf_mem_events__name(j, pmu->name);
>
> if (!perf_mem_event__supported(mnt, pmu, e))

I think we can improve a bit for this part.

Current implementation uses perf_pmus__num_mem_pmus() to decide if
system has only one memory PMU or multiple PMUs, and multiple PMUs
the tool iterates all memory PMUs to synthesize event options.

In this patch, it has changed to iterate all memory PMUs, no matter the
system has only one memory PMU or multiple PMUs. Thus, I don't see the
point for the condition checking for "perf_pmus__num_mem_pmus() == 1".
We can consolidate into the unified code like:

char *copy;
const char *s = perf_pmu__mem_events_name(j, pmu);

if (!s)
continue;

if (!perf_pmu__mem_events_supported(mnt, pmu, e))
continue;

copy = strdup(s);
if (!copy)
return -1;

rec_argv[i++] = "-e";
rec_argv[i++] = copy;
rec_tmp[k++] = copy;

Thanks,
Leo