[PATCH v11 04/10] powerpc/perf: Add generic IMC pmu group and event functions
From: Anju T Sudhakar
Date: Wed Jun 28 2017 - 15:00:01 EST
Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".
Patch adds a set of generic imc pmu related event functions to be
used by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.
Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.
Signed-off-by: Anju T Sudhakar <anju@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Madhavan Srinivasan <maddy@xxxxxxxxxxxxxxxxxx>
---
arch/powerpc/include/asm/imc-pmu.h | 5 +
arch/powerpc/perf/Makefile | 3 +
arch/powerpc/perf/imc-pmu.c | 279 ++++++++++++++++++++++++++++++
arch/powerpc/platforms/powernv/opal-imc.c | 11 +-
4 files changed, 296 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/perf/imc-pmu.c
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 2a0239e..25d0c57 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -63,6 +63,9 @@ struct imc_events {
#define IMC_CPUMASK_ATTR 1
#define IMC_EVENT_ATTR 2
#define IMC_NULL_ATTR 3
+#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
+#define IMC_EVENT_RVALUE_MASK 0x100000000ULL
+#define IMC_NEST_EVENT_MODE 0x1fe00000000ULL
/*
* Device tree parser code detects IMC pmu support and
@@ -101,4 +104,6 @@ enum {
*/
#define IMC_DOMAIN_NEST 1
+extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b9..b29d918 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o
+
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o
+
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 0000000..326c9ea
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,279 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+
+/* Needed for sanity check */
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *nest_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_offset.attr,
+ &format_attr_rvalue.attr,
+ &format_attr_mode.attr,
+ NULL,
+};
+
+static struct attribute_group imc_format_group = {
+ .name = "format",
+ .attrs = nest_imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+ int chip_id;
+ u32 l_config, config = event->attr.config;
+ struct imc_mem_info *pcni;
+ struct imc_pmu *pmu;
+ bool flag = false;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ pmu = imc_event_to_pmu(event);
+ /*
+ * Sanity check for config (event offset, mode and rvalue).
+ * mode and rvalue should be zero, if not just return.
+ */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+ ((config & IMC_EVENT_RVALUE_MASK) != 0) ||
+ ((config & IMC_NEST_EVENT_MODE) != 0))
+ return -EINVAL;
+
+ chip_id = topology_physical_package_id(event->cpu);
+ pcni = pmu->mem_info;
+ do {
+ if (pcni->id == chip_id) {
+ flag = true;
+ break;
+ }
+ pcni++;
+ } while (pcni);
+ if (!flag)
+ return -ENODEV;
+ /*
+ * Memory for Nest HW counter data could be in multiple pages.
+ * Hence check and pick the right event base page for chip with
+ * "chip_id" and add "config" to it".
+ */
+ l_config = config & IMC_EVENT_OFFSET_MASK;
+ event->hw.event_base = (u64)pcni->vbase[l_config/PAGE_SIZE] + (config & ~PAGE_MASK);
+ return 0;
+}
+
+static void imc_read_counter(struct perf_event *event)
+{
+ u64 *addr, data;
+
+ /*
+ * In-Memory Collection (IMC) counters are free flowing counters.
+ * So we take a snapshot of the counter value on enable and save it
+ * to calculate the delta at later stage to present the event counter
+ * value.
+ */
+ addr = (u64 *)event->hw.event_base;
+ data = __be64_to_cpu(READ_ONCE(*addr));
+ local64_set(&event->hw.prev_count, data);
+}
+
+static void imc_perf_event_update(struct perf_event *event)
+{
+ u64 counter_prev, counter_new, final_count, *addr;
+
+ addr = (u64 *)event->hw.event_base;
+ counter_prev = local64_read(&event->hw.prev_count);
+ counter_new = __be64_to_cpu(READ_ONCE(*addr));
+ final_count = counter_new - counter_prev;
+
+ /*
+ * Need to update prev_count is that, counter could be
+ * read in a periodic interval from the tool side.
+ */
+ local64_set(&event->hw.prev_count, counter_new);
+ /* Update the delta to the event count */
+ local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+ /*
+ * In Memory Counters are free flowing counters. HW or the microcode
+ * keeps adding to the counter offset in memory. To get event
+ * counter value, we snapshot the value here and we calculate
+ * delta at later point.
+ */
+ imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_perf_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+ if (!pmu)
+ return -EINVAL;
+
+ pmu->pmu.task_ctx_nr = perf_invalid_context;
+ pmu->pmu.event_init = nest_imc_event_init;
+ pmu->pmu.add = imc_event_add;
+ pmu->pmu.del = imc_event_stop;
+ pmu->pmu.start = imc_event_start;
+ pmu->pmu.stop = imc_event_stop;
+ pmu->pmu.read = imc_perf_event_update;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+ pmu->pmu.attr_groups = pmu->attr_groups;
+
+ return 0;
+}
+
+/* dev_str_attr : Populate event "name" and string "str" in attribute */
+static struct attribute *dev_str_attr(const char *name, const char *str)
+{
+ struct perf_pmu_events_attr *attr;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+ sysfs_attr_init(&attr->attr.attr);
+
+ attr->event_str = str;
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = perf_event_sysfs_show;
+
+ return &attr->attr.attr;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct imc_events *events,
+ int idx, struct imc_pmu *pmu)
+{
+ struct attribute_group *attr_group;
+ struct attribute **attrs;
+ int i;
+
+ /* If there is no events for this pmu, just return zero */
+ if (!events)
+ return 0;
+
+ /* Allocate memory for attribute group */
+ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ /* Allocate memory for attributes */
+ attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL);
+ if (!attrs) {
+ kfree(attr_group);
+ return -ENOMEM;
+ }
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ for (i = 0; i < idx; i++, events++) {
+ attrs[i] = dev_str_attr((char *)events->ev_name,
+ (char *)events->ev_value);
+ }
+
+ /* Save the event attribute */
+ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+ return 0;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @events: events memory for this pmu.
+ * @idx: number of event entries created.
+ * @pmu_ptr: memory allocated for this pmu.
+ */
+int init_imc_pmu(struct imc_events *events, int idx,
+ struct imc_pmu *pmu_ptr)
+{
+ int ret = -ENODEV;
+
+ ret = update_events_in_group(events, idx, pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = update_pmu_ops(pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+ if (ret)
+ goto err_free;
+
+ pr_info("%s performance monitor hardware support registered\n",
+ pmu_ptr->pmu.name);
+
+ return 0;
+
+err_free:
+ /* Only free the attr_groups which are dynamically allocated */
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) {
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs)
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 839c257..a68d66d 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -34,8 +34,6 @@
#include <asm/cputable.h>
#include <asm/imc-pmu.h>
-struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
-
static int imc_event_prop_update(char *name, struct imc_events *events)
{
char *buf;
@@ -452,8 +450,17 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
if (prop)
imc_events_setup(parent, pmu_index, pmu_ptr, prop, &idx);
}
+ /* Function to register IMC pmu */
+ ret = init_imc_pmu(pmu_ptr->events, idx, pmu_ptr);
+ if (ret) {
+ pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+ goto free_events;
+ }
return 0;
+free_events:
+ if (pmu_ptr->events)
+ imc_free_events(pmu_ptr->events, idx);
free_pmu:
if (pmu_ptr)
kfree(pmu_ptr);
--
2.7.4