On Wed, 21 Jun 2023 18:11:41 -0700
Ilkka Koskinen <ilkka@xxxxxxxxxxxxxxxxxxxxxx> wrote:
Ampere SoC PMU follows CoreSight PMU architecture. It uses implementationHi Ilkka,
specific registers to filter events rather than PMEVFILTnR registers.
Signed-off-by: Ilkka Koskinen <ilkka@xxxxxxxxxxxxxxxxxxxxxx>
Drive by review so not super detailed (I was curious) but a few questions/comments inline.
Jonathan
---
.../admin-guide/perf/ampere_cspmu.rst | 29 +++
drivers/perf/arm_cspmu/Makefile | 2 +-
drivers/perf/arm_cspmu/ampere_cspmu.c | 232 ++++++++++++++++++
drivers/perf/arm_cspmu/ampere_cspmu.h | 17 ++
drivers/perf/arm_cspmu/arm_cspmu.c | 7 +
5 files changed, 286 insertions(+), 1 deletion(-)
create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst
create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c
create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.h
diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
new file mode 100644
index 000000000000..bf86bffeef63
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
@@ -0,0 +1,29 @@
+
+Example for perf tool use::
+
+ / # perf list ampere
+
+ ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
+ <...>
+ ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
+ <...>
+
+ / # perf stat -a -e ampere_mcu_pmu_0/act_sent,filter_enable=3,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+ sleep 1
Why filter_enable=3?
+static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+{
Whilst lots of other comments on this - perhaps add another one here to
why this is a noop.
+ return 0;
+}
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+ struct perf_event *new)
+{
+ struct perf_event *curr, *leader = new->group_leader;
+ unsigned int idx;
+ int ret;
+
+ ret = ampere_cspmu_validate_configs(new, leader);
+ if (ret)
+ return ret;
+
+ /* We compare the global filter settings to existing events */
+ idx = find_first_bit(cspmu->hw_events.used_ctrs,
+ cspmu->cycle_counter_logical_idx);
+
+ /* This is the first event */
Maybe add why that matters to the comment?
+ if (idx == cspmu->cycle_counter_logical_idx)
+ return 0;
+
+ curr = cspmu->hw_events.events[idx];
+
+ return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+ const char *name_pattern)
+{
+ struct device *dev = cspmu->dev;
+ static atomic_t pmu_generic_idx = {0};
Why not an ida?
If the pmu drivers ever become easy to unbind then you won't get ID
reusage like this an eventually you will run into overflow problems.
+
+ return devm_kasprintf(dev, GFP_KERNEL, name_pattern,
+ atomic_fetch_inc(&pmu_generic_idx));
+}
+
+int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+ struct device *dev = cspmu->dev;
+ struct ampere_cspmu_ctx *ctx;
+ struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+ ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->event_attr = ampereone_mcu_pmu_event_attrs;
+ ctx->format_attr = ampereone_mcu_format_attrs;
+ ctx->name = ampere_cspmu_format_name(cspmu,
+ "ampere_mcu_pmu_%u");
Long line and need to break avoided if you don't bother trying to align the = signs...
Personally I don't like this style as it causes a lot of churn as drivers
evolve, but meh, it's up to you.
Given the result is confusing if the allocation fails (name not what is expected)
I would also check that allocation and error out if it fails. Obviously it won't
under realistic circumstances, but a bit of paranoia never hurt anyone.
+ cspmu->impl.ctx = ctx;
+
+ impl_ops->event_filter = ampere_cspmu_event_filter;
+ impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
+ impl_ops->validate_event = ampere_cspmu_validate_event;
+ impl_ops->get_name = ampere_cspmu_get_name;
+ impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
+ impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
+
+ return 0;
+}
+
+MODULE_LICENSE("GPL v2");
...
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 471d6d7ac81a..587515eea0b4 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -29,6 +29,7 @@
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include "ampere_cspmu.h"
I'd be tempted to keep the generic header in a separate block then
follow with the vendor ones. Not particularly important though.
#include "arm_cspmu.h"
#include "nvidia_cspmu.h"
@@ -114,6 +115,7 @@
/* JEDEC-assigned JEP106 identification code */
#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
static unsigned long arm_cspmu_cpuhp_state;
@@ -388,6 +390,11 @@ static const struct impl_match impl_match[] = {
.mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
.impl_init_ops = nv_cspmu_init_ops
},
+ {
+ .pmiidr = ARM_CSPMU_IMPL_ID_AMPERE,
+ .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+ .impl_init_ops = ampere_cspmu_init_ops
+ },
{}
};