Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne

From: Ilkka Koskinen
Date: Mon Oct 28 2024 - 22:28:44 EST



Hi Leo,

On Fri, 25 Oct 2024, Leo Yan wrote:
On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:

Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.

Signed-off-by: Ilkka Koskinen <ilkka@xxxxxxxxxxxxxxxxxxxxxx>
---
.../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
2 files changed, 70 insertions(+)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1443c28545a9..e4115b1e92b2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
ARM_SPE_NV_DRAM = 0xe,
};

+enum arm_spe_ampereone_data_source {
+ ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
+ ARM_SPE_AMPEREONE_SLC = 0x3,
+ ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
+ ARM_SPE_AMPEREONE_DDR = 0x7,
+ ARM_SPE_AMPEREONE_L1D = 0x8,
+ ARM_SPE_AMPEREONE_L2D = 0x9,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 138ffc71b32d..04bd21ad7ea8 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}

+static const struct midr_range ampereone_source_spe[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src,
+ u64 midr)
+{
+ if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
+ arm_spe__synth_data_source_generic(record, data_src);
+ return;
+ }

With James' suggestion, I don't think here need to check the CPU
variant again. All generic data source generating should run in the
arm_spe__synth_data_source() function.

Yep, checking the implementor ID was just wrong and unnecessary.
I fix that.


+
+ if (record->op & ARM_SPE_OP_ST) {
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+ return;
+ }
+
+ switch (record->source) {
+ case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_AMPEREONE_SLC:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ break;
+ case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_AMPEREONE_DDR:
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_AMPEREONE_L1D:
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_AMPEREONE_L2D:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;

We have another way to do this. If convert the SoC specific data source
to common data source values, e.g.

ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
...

Then we don't need to maintain two functions with almost same setting.

I have no strong opinion for this. A dedicated function for Ampere CPU
might give a bit flexiblity for later tweaking. It is up to you.

Let me think about it and see how it would look like.


Last thing, please work on the the latest perf-tools-next branch:

https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
branch: perf-tools-next

Recently we have Arm SPE data source refactoring, please rebase on it.

Uh, for some reason I rebased it on top of Will's arm64 tree. Sorry about that.

Cheers, Ilkka


Thanks,
Leo

+ default:
+ break;
+ }
+}
+
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+ bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);

if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m

if (is_neoverse)
arm_spe__synth_data_source_neoverse(record, &data_src);
+ else if (is_ampereone)
+ arm_spe__synth_data_source_ampereone(record, &data_src, midr);
else
arm_spe__synth_data_source_generic(record, &data_src);

--
2.47.0