[PATCH] perf/arm-cmn: Check all the DTCs when reading global counters

From: Ilkka Koskinen
Date: Fri Jan 20 2023 - 21:11:56 EST


Some events may be available on nodes, none of which belongs to DTC0.
When the driver reads the global counters, it stops as soon as it finds a
DTC that's not being used and, thus, ignores the rest. As the driver
doesn't read the paired global counters, overflowing local counters are
regarded as overflowing global counters (assuming the new local counter
value is smaller than the previous one) and therefore we can see values
around 2^64. Fix the issue by checking all the used DTCs.

The driver is still trying to find a counter that's available on all the
DTCs rather than doing per-DTC allocation of global counters. We may
need to change it in the future, if needed.

Signed-off-by: Ilkka Koskinen <ilkka@xxxxxxxxxxxxxxxxxxxxxx>
---
drivers/perf/arm-cmn.c | 41 ++++++++++++++++++++++++++++++++---------
1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b80a9b74662b..c516f091a002 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -540,7 +540,7 @@ struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
u64 dtm_idx[4];
unsigned int dtc_idx;
- u8 dtcs_used;
+ unsigned long dtcs_used;
u8 num_dns;
u8 dtm_offset;
bool wide_sel;
@@ -550,6 +550,9 @@ struct arm_cmn_hw_event {
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)

+#define for_each_used_dtc(hw, cmn, i) \
+ for_each_set_bit(i, &(hw)->dtcs_used, (cmn)->num_dtcs)
+
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1272,7 +1275,7 @@ static void arm_cmn_init_counter(struct perf_event *event)
unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
u64 count;

- for (i = 0; hw->dtcs_used & (1U << i); i++) {
+ for_each_used_dtc(hw, cmn, i) {
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
cmn->dtc[i].counters[hw->dtc_idx] = event;
}
@@ -1301,7 +1304,7 @@ static void arm_cmn_event_read(struct perf_event *event)
delta = new - prev;

local_irq_save(flags);
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
+ for_each_used_dtc(hw, cmn, i) {
new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
delta += new << 16;
}
@@ -1614,10 +1617,32 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));

- for (i = 0; hw->dtcs_used & (1U << i); i++)
+ for_each_used_dtc(hw, cmn, i)
cmn->dtc[i].counters[hw->dtc_idx] = NULL;
}

+static int arm_cmn_get_global_counter(struct arm_cmn *cmn,
+ struct arm_cmn_hw_event *hw)
+{
+ int dtc_idx, i;
+ bool available;
+
+ for (dtc_idx = 0; dtc_idx < CMN_DT_NUM_COUNTERS; dtc_idx++) {
+ available = true;
+ for_each_used_dtc(hw, cmn, i) {
+ if (cmn->dtc[i].counters[dtc_idx]) {
+ available = false;
+ break;
+ }
+ }
+
+ if (available)
+ return dtc_idx;
+ }
+
+ return -ENOSPC;
+}
+
static int arm_cmn_event_add(struct perf_event *event, int flags)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
@@ -1642,11 +1667,9 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
return 0;
}

- /* Grab a free global counter first... */
- dtc_idx = 0;
- while (dtc->counters[dtc_idx])
- if (++dtc_idx == CMN_DT_NUM_COUNTERS)
- return -ENOSPC;
+ dtc_idx = arm_cmn_get_global_counter(cmn, hw);
+ if (dtc_idx < 0)
+ return dtc_idx;

hw->dtc_idx = dtc_idx;

--
2.17.1