Re: [PATCH v5 4/5] perf arm-spe: Use SPE data source for neoverse cores

From: Leo Yan
Date: Wed Apr 20 2022 - 04:31:09 EST


On Fri, Apr 08, 2022 at 07:53:43PM +0000, Ali Saidi wrote:
> When synthesizing data from SPE, augment the type with source information
> for Arm Neoverse cores. The field is IMPLDEF but the Neoverse cores all use
> the same encoding. I can't find encoding information for any other SPE
> implementations to unify their choices with Arm's thus that is left for
> future work.
>
> This change populates the mem_lvl_num for Neoverse cores instead of the
> deprecated mem_lvl namespace.
>
> Signed-off-by: Ali Saidi <alisaidi@xxxxxxxxxx>
> ---
> .../util/arm-spe-decoder/arm-spe-decoder.c | 1 +
> .../util/arm-spe-decoder/arm-spe-decoder.h | 12 ++
> tools/perf/util/arm-spe.c | 127 ++++++++++++++++--
> 3 files changed, 126 insertions(+), 14 deletions(-)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> index 5e390a1a79ab..091987dd3966 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> @@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
>
> break;
> case ARM_SPE_DATA_SOURCE:
> + decoder->record.source = payload;
> break;
> case ARM_SPE_BAD:
> break;
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 69b31084d6be..46a61df1145b 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -29,6 +29,17 @@ enum arm_spe_op_type {
> ARM_SPE_ST = 1 << 1,
> };
>
> +enum arm_spe_neoverse_data_source {
> + ARM_SPE_NV_L1D = 0x0,
> + ARM_SPE_NV_L2 = 0x8,
> + ARM_SPE_NV_PEER_CORE = 0x9,
> + ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
> + ARM_SPE_NV_SYS_CACHE = 0xb,
> + ARM_SPE_NV_PEER_CLUSTER = 0xc,
> + ARM_SPE_NV_REMOTE = 0xd,
> + ARM_SPE_NV_DRAM = 0xe,
> +};
> +
> struct arm_spe_record {
> enum arm_spe_sample_type type;
> int err;
> @@ -40,6 +51,7 @@ struct arm_spe_record {
> u64 virt_addr;
> u64 phys_addr;
> u64 context_id;
> + u16 source;
> };
>
> struct arm_spe_insn;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index d2b64e3f588b..a20285cf98e3 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -34,6 +34,7 @@
> #include "arm-spe-decoder/arm-spe-decoder.h"
> #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
>
> +#include "../../arch/arm64/include/asm/cputype.h"
> #define MAX_TIMESTAMP (~0ULL)
>
> struct arm_spe {
> @@ -45,6 +46,7 @@ struct arm_spe {
> struct perf_session *session;
> struct machine *machine;
> u32 pmu_type;
> + u64 midr;
>
> struct perf_tsc_conversion tc;
>
> @@ -399,33 +401,127 @@ static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
> return false;
> }
>
> -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
> +static const struct midr_range neoverse_spe[] = {
> + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
> + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
> + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
> + {},
> +};
> +
> +
> +static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
> + union perf_mem_data_src *data_src)
> {
> - union perf_mem_data_src data_src = { 0 };
> + /*
> + * Even though four levels of cache hierarchy are possible, no known
> + * production Neoverse systems currently include more than three levels
> + * so for the time being we assume three exist. If a production system
> + * is built with four the this function would have to be changed to
> + * detect the number of levels for reporting.
> + */
>
> - if (record->op == ARM_SPE_LD)
> - data_src.mem_op = PERF_MEM_OP_LOAD;
> - else
> - data_src.mem_op = PERF_MEM_OP_STORE;
> + /*
> + * We have no data on the hit level or data source for stores in the
> + * Neoverse SPE records.
> + */
> + if (record->op & ARM_SPE_ST) {
> + data_src->mem_lvl = PERF_MEM_LVL_NA;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NA;
> + return;
> + }

For the store operation, I found we need to use more strictly criteria
to check memory operations, otherwise, we might wrongly synthesize
memory sample even for other types of operations.

To fix the issue, I think we need to add below patch; if this is okay
for you, please consider to include it in the next patch set version.

Thanks,
Leo