Re: [PATCH v2 4/5] perf arm-spe: Support metadata version 2

From: Namhyung Kim
Date: Fri Sep 27 2024 - 02:36:02 EST


On Sat, Sep 14, 2024 at 10:54:57PM +0100, Leo Yan wrote:
> This commit is to support metadata version 2 and at the meantime it is
> backward compatible for version 1's format.
>
> The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field.
> As version 1 is fixed with two u64 fields, by checking the metadata
> size, it distinguishes the metadata is version 1 or version 2 (and any
> new versions if later will have). For version 2, it reads out CPU number
> and retrieves the metadata info for every CPU.
>
> Signed-off-by: Leo Yan <leo.yan@xxxxxxx>
> ---
> tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 92 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 70989b1bae47..17782cb40fb5 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -78,6 +78,10 @@ struct arm_spe {
>
> unsigned long num_events;
> u8 use_ctx_pkt_for_pid;
> +
> + u64 **metadata;
> + u64 metadata_ver;
> + u64 metadata_nr_cpu;
> };
>
> struct arm_spe_queue {
> @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused,
> return 0;
> }
>
> +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size)
> +{
> + u64 *metadata;
> +
> + metadata = zalloc(sizeof(*metadata) * cpu_size);

Maybe calloc() is slightly better here, but not a strong opinion.


> + if (!metadata)
> + return NULL;
> +
> + memcpy(metadata, buf, cpu_size);

I'm not sure if it's correct since you allocated cpu_size * 8 and copies
only cpu_size.


> + return metadata;
> +}
> +
> +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
> +{
> + int i;
> +
> + for (i = 0; i < nr_cpu; i++)
> + zfree(&metadata[i]);
> + free(metadata);
> +}
> +
> +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
> + u64 *ver, int *nr_cpu)
> +{
> + u64 *ptr = (u64 *)info->priv;
> + u64 metadata_size;
> + u64 **metadata = NULL;
> + int hdr_sz, cpu_sz, i;
> +
> + metadata_size = info->header.size -
> + sizeof(struct perf_record_auxtrace_info);
> +
> + /* Metadata version 1 */
> + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
> + *ver = 1;
> + *nr_cpu = 0;
> + /* No per CPU metadata */
> + return NULL;
> + }
> +
> + *ver = ptr[ARM_SPE_HEADER_VERSION];
> + hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
> + *nr_cpu = ptr[ARM_SPE_CPUS_NUM];
> +
> + metadata = zalloc(sizeof(*metadata) * (*nr_cpu));

calloc() instead? But probably better defining a struct for metadata.

Thanks,
Namhyung


> + if (!metadata)
> + return NULL;
> +
> + /* Locate the start address of per CPU metadata */
> + ptr += hdr_sz;
> + cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
> +
> + for (i = 0; i < *nr_cpu; i++) {
> + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, cpu_sz);
> + if (!metadata[i])
> + goto err_per_cpu_metadata;
> +
> + ptr += cpu_sz / sizeof(u64);
> + }
> +
> + return metadata;
> +
> +err_per_cpu_metadata:
> + arm_spe__free_metadata(metadata, *nr_cpu);
> + return NULL;
> +}
> +
> static void arm_spe_free_queue(void *priv)
> {
> struct arm_spe_queue *speq = priv;
> @@ -1050,6 +1121,7 @@ static void arm_spe_free(struct perf_session *session)
> auxtrace_heap__free(&spe->heap);
> arm_spe_free_events(session);
> session->auxtrace = NULL;
> + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
> free(spe);
> }
>
> @@ -1267,15 +1339,24 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
> const char *cpuid = perf_env__cpuid(session->evlist->env);
> u64 midr = strtol(cpuid, NULL, 16);
> struct arm_spe *spe;
> - int err;
> + u64 **metadata = NULL;
> + u64 metadata_ver;
> + int nr_cpu, err;
>
> if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
> min_sz)
> return -EINVAL;
>
> + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
> + &nr_cpu);
> + if (!metadata && metadata_ver != 1) {
> + pr_err("Failed to parse Arm SPE metadata.\n");
> + return -EINVAL;
> + }
> +
> spe = zalloc(sizeof(struct arm_spe));
> if (!spe)
> - return -ENOMEM;
> + goto err_free_metadata;
>
> err = auxtrace_queues__init(&spe->queues);
> if (err)
> @@ -1284,8 +1365,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
> spe->session = session;
> spe->machine = &session->machines.host; /* No kvm support */
> spe->auxtrace_type = auxtrace_info->type;
> - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
> + if (metadata_ver == 1)
> + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
> + else
> + spe->pmu_type = auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE];
> spe->midr = midr;
> + spe->metadata = metadata;
> + spe->metadata_ver = metadata_ver;
> + spe->metadata_nr_cpu = nr_cpu;
>
> spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
>
> @@ -1346,5 +1433,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
> session->auxtrace = NULL;
> err_free:
> free(spe);
> +err_free_metadata:
> + arm_spe__free_metadata(metadata, nr_cpu);
> return err;
> }
> --
> 2.34.1
>