Measuring DDR bandwidth allows for an accurate measurement
of memory throughput achieved for a given workload. It also
breaks down the traffic measurement on a per DDR channel basis.
Signed-off-by: Rahul Ramasubramanian <rahulr@xxxxxxxxxxxxxx>
Signed-off-by: Agustin Vega-Frias <agustinv@xxxxxxxxxxxxxx>
---
drivers/perf/Kconfig | 10 +
drivers/perf/Makefile | 1 +
drivers/perf/qcom_bandwidth_perf_events.c | 970 ++++++++++++++++++++++++++++++
drivers/perf/qcom_bandwidth_perf_events.h | 156 +++++
4 files changed, 1137 insertions(+)
create mode 100644 drivers/perf/qcom_bandwidth_perf_events.c
create mode 100644 drivers/perf/qcom_bandwidth_perf_events.h
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 08ebaf7..3c9f7e9 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -87,6 +87,16 @@ config QCOM_L3_PMU
Adds the L3 cache PMU into the perf events subsystem for
monitoring L3 cache events.
+config QCOM_BANDWIDTH_PMU
+ bool "Qualcomm Technologies Memory Bandwidth PMU"
+ depends on ARCH_QCOM && ARM64 && ACPI
+ select QCOM_IRQ_COMBINER
+ help
+ Provides support for the memory bandwidth performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the Memory Bandwidth PMU into the perf events subsystem for
+ monitoring memory bandwidth events.
+
config XGENE_PMU
depends on ARCH_XGENE
bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b3902bd..50c20ec 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_QCOM_BANDWIDTH_PMU) += qcom_bandwidth_perf_events.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/qcom_bandwidth_perf_events.c b/drivers/perf/qcom_bandwidth_perf_events.c
new file mode 100644
index 0000000..bcc5667
--- /dev/null
+++ b/drivers/perf/qcom_bandwidth_perf_events.c
@@ -0,0 +1,970 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This driver adds support for perf events to monitor the DDR
+ * bandwidth in Qualcomm Technologies chips. Each switch in the
+ * interconnect is connected to tthe memory controller and contains a
+ * performace monitoring unit (PMU) that the driver exposes
+ * through the perf events framework.
+ *
+ * The PMU Event Counters
+ * - Event counters, which count occurrences of a configured event.
+ *
+ * These resources are exposed as perf counting events, there is no
+ * support for sampling based on events exposed by the driver. Event
+ * counters are always accumulating.
+ * Events associated with event counters are the following:
+ * ddr-read-bytes: The driver scales the raw pmu count to provide the
+ * number of bytes read from a specific memory controller.
+ *
+ * ddr-write-bytes: The driver scales the raw pmu count to provide the
+ * number of bytes read from a specific memory controller.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/acpi.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include "qcom_bandwidth_perf_events.h"
+
+
+
+/*
+ * Structures representing a HW PMU and other associated resources
+ */
+
+/*
+ * Represents an event counter
+ *
+ * This type is used to make these operations polymorphic depending on the
+ * type of hardware resources an event uses. The general idea is to associate
+ * a perf_event with a switch_pmu_counter via the index contained in its
+ * hw_perf_event. To accomplish this, an array of switch_pmu_counters is used
+ * and event counters use the BANDWIDTH_NUM_EVENT_COUNTERS indexes, so the
+ * event counter index is found by the using the index directly:
+ */
+struct switch_pmu_counter {
+ struct perf_event *event;
+ /* Called to start event monitoring */
+ void (*start)(struct perf_event *event);
+ /* Called to stop event monitoring (optional) */
+ void (*stop)(struct perf_event *event, int flags);
+ /* Called when the counter overflows (optional) */
+ void (*wrap)(struct perf_event *event);
+ /* Called to update the perf_event */
+ void (*update)(struct perf_event *event)> +};
+
+/*
+ * Represents the hardware PMU
+ *
+ * This type inherits from the core perf events struct pmu and adds data
+ * to manage the PMU resources.
+ */
+struct switch_pmu {
+ /* Base perf pmu */
+ struct pmu perf_pmu;
+ /* CPU mask exported for user space tools via sysfs */
+ cpumask_t cpu;
+ /* Node for the hotplug notifier hlist */
+ struct hlist_node node;
+ /* Register base address */
+ void __iomem *regs;
+ /* Spinlock used to protect indexed accesses to event counters */
+ raw_spinlock_t ecsel_lock;
+
+ /* Bitmap to track counter use */
+ unsigned long used_mask[BITS_TO_LONGS(BANDWIDTH_NUM_TOTAL_COUNTERS)];
+ /* Counter resources */
+ struct switch_pmu_counter counters[BANDWIDTH_NUM_TOTAL_COUNTERS];
+};
+
+#define FIRST_EVENT_COUNTER 0
+
+#define to_switch_pmu(p) (container_of(p, struct switch_pmu, perf_pmu))
+
+static int cpuhp_state_num;
+
+/*
+ * Decoding of settings from perf_event_attr
+ *
+ * Common bits:
+ *
+ * The config format for perf events associated with event counters is:
+ * - config: bits 0-3:event selector, bits 16-22:source selector
+ * - config1: bits 0-21,24-30:filter config, bits 32-45,48-54:filter enable
+ *
+ */
+
+#define PERF_EVENT_ATTR_EXTRACTOR(_name, _config, _size, _shift) \
+ static inline u32 get_##_name(struct perf_event *event) \
+ { \
+ return (event->attr._config >> _shift) \
+ & GENMASK(_size - 1, 0); \
+ }
+
+PERF_EVENT_ATTR_EXTRACTOR(ec_event_sel, config, 4, 0);
+PERF_EVENT_ATTR_EXTRACTOR(ec_event_lc, config, 1, 32);
+PERF_EVENT_ATTR_EXTRACTOR(ec_source_sel, config, 7, 16);
+
+
+/*
+ * Implementation of global HW PMU operations
+ */
+
+static inline int event_num_counters(struct perf_event *event)
+{
+ return (get_ec_event_lc(event) == 0) ? 1 : 2;
+}
+
+static
+bool switch_pmu__inuse(struct switch_pmu *pmu)
+{
+ /* Check if a given PMU is already in use by IMC */
+ return readl_relaxed(pmu->regs + BANDWIDTH_EC_ENABLE_SET) == 0xF000;
+
+}
+
+static
+void switch_pmu__reset(struct switch_pmu *pmu)
+{
+ u32 all = GENMASK(BANDWIDTH_NUM_EVENT_COUNTERS - 1, 0);
+
+ if (!switch_pmu__inuse(pmu)) {
+ /* Enable access by writing the LAR key */
+ writel_relaxed(BANDWIDTH_LAR_KEY, pmu->regs + BANDWIDTH_LAR);
+
+
+ /* Disable IRQonMSB */
+
+ writel_relaxed(0x0, pmu->regs + BANDWIDTH_EC_IRQ_CONTROL);
+
+ /*
+ * Assert reset to the EC hardware, use writel to ensure the
+ * CLEAR commands have been seen by the device before this
+ * write.
+ */
+
+ writel(SET(GLOBAL_RESET, 1), pmu->regs +
+ BANDWIDTH_EC_GLOBAL_CONTROL);
+
+ /*
+ * De-assert reset to the EC hardware, use writel to ensure
+ * the reset command has been seen by the device.
+ */
+
+ writel(SET(GLOBAL_RESET, 0), pmu->regs +
+ BANDWIDTH_EC_GLOBAL_CONTROL);
+ writel(SET(RETRIEVAL_MODE, 1)
+ | SET(GLOBAL_ENABLE, 1) | SET(GLOBAL_TRIGOVRD, 1), > + pmu->regs + BANDWIDTH_EC_GLOBAL_CONTROL);
+ }
+
+ /* clear the interuppts and event counters */
+ writel_relaxed(all, pmu->regs + BANDWIDTH_EC_ENABLE_CLEAR);
+ writel_relaxed(all, pmu->regs + BANDWIDTH_EC_INTERRUPT_ENABLE_CLEAR);
+};
+
+/*
+ * Event counter operations
+ */
+
+static inline
+void switch_pmu__ec_set_event(struct switch_pmu *pmu, u8 cntr, u32 event)
+{
+
+ writel_relaxed(event, pmu->regs + qcom_bandwidth_ec_source_sel(cntr));
+}
+
+static inline
+void switch_pmu__ec_enable(struct switch_pmu *pmu, u32 cntr)
+{
+ writel_relaxed(SET(ECENSET(cntr), 1), pmu->regs +
+ BANDWIDTH_EC_ENABLE_SET);
+}
+
+static inline
+void switch_pmu__ec_disable(struct switch_pmu *pmu, u32 cntr)
+{
+ writel_relaxed(SET(ECENSET(cntr), 1),
+ pmu->regs + BANDWIDTH_EC_ENABLE_CLEAR);
+}
+
+static inline
+void switch_pmu__ec_enable_interrupt(struct switch_pmu *pmu, u32 cntr)
+{
+ u32 val = readl_relaxed(pmu->regs + BANDWIDTH_EC_IRQ_CONTROL);
+
+ writel_relaxed(val | BIT(cntr), pmu->regs + BANDWIDTH_EC_IRQ_CONTROL);
+ writel_relaxed(SET(ECINTENCLR(cntr), 1),
+ pmu->regs + BANDWIDTH_EC_INTERRUPT_ENABLE_SET);
+}
+
+static inline
+void switch_pmu__ec_disable_interrupt(struct switch_pmu *pmu, u32 cntr)
+{
+ u32 val = readl_relaxed(pmu->regs + BANDWIDTH_EC_IRQ_CONTROL);
+
+ writel(val & ~BIT(cntr), pmu->regs + BANDWIDTH_EC_IRQ_CONTROL);
+ writel(SET(ECINTENCLR(cntr), 1),
+ pmu->regs + BANDWIDTH_EC_INTERRUPT_ENABLE_CLEAR);
+}
+
+static inline
+u32 switch_pmu__ec_read_ovsr(struct switch_pmu *pmu)
+{
+ return readl_relaxed(pmu->regs + BANDWIDTH_EC_OVF_STATUS);
+}
+
+static inline
+void switch_pmu__ec_write_ovsr(struct switch_pmu *pmu, u32 value)
+{
+ writel_relaxed(value, pmu->regs + BANDWIDTH_EC_OVF_STATUS);
+}
+
+static inline
+bool switch_pmu__any_event_counter_overflowed(u32 ovsr)
+{
+ return (ovsr & GENMASK(BANDWIDTH_NUM_EVENT_COUNTERS - 1, 0)) != 0;
+}
+
+static inline
+int switch_pmu__ec_has_overflowed(u32 ovsr, u8 cntr)
+{
+ return GET(ECOVF(cntr), ovsr) != 0;
+}
+
+static inline
+void switch_pmu__ec_set_value(struct switch_pmu *pmu, u8 cntr, u32 value)
+{
+ unsigned long flags;
+ bool reenable = false;
+
+ /*
+ * Quirk: The counter needs to be disabled before updating.
+ */
+ if ((readl_relaxed(pmu->regs + BANDWIDTH_EC_ENABLE_SET) &
+ SET(ECENSET(cntr), 1)) != 0) {
+ switch_pmu__ec_disable(pmu, cntr);
+ reenable = true;
+ }
+
+ raw_spin_lock_irqsave(&pmu->ecsel_lock, flags);
+ writel_relaxed(SET(ECSEL, cntr), pmu->regs + BANDWIDTH_EC_COUNTER_SEL);
+
+ /*
+ * Use writel because the write to BANDWIDTH_EC_COUNTER_SEL needs
+ * to be observed before the write to BANDWIDTH_EC_COUNT.
+ */
+
+ writel(value, pmu->regs + BANDWIDTH_EC_COUNT);
+ raw_spin_unlock_irqrestore(&pmu->ecsel_lock, flags);
+
+ if (reenable)
+ switch_pmu__ec_enable(pmu, cntr);
+}
+
+static inline
+u32 switch_pmu__ec_get_value(struct switch_pmu *pmu, u8 cntr)
+{
+ u32 result;
+ u32 sel;
+ unsigned long flags;
+ unsigned long num_attempts = 0;
+
+ do {
+ raw_spin_lock_irqsave(&pmu->ecsel_lock, flags);
+ writel_relaxed(SET(ECSEL, cntr), pmu->regs +
+ BANDWIDTH_EC_COUNTER_SEL);
+
+ /*
+ * The write to BANDWIDTH_EC_COUNTER_SEL needs to be observed
+ * before the read to BANDWIDTH_EC_COUNT.
+ */
+ mb();
+
+ result = readl_relaxed(pmu->regs + BANDWIDTH_EC_COUNT);
+ raw_spin_unlock_irqrestore(&pmu->ecsel_lock, flags);
+ num_attempts++;
+ sel = readl_relaxed(pmu->regs + BANDWIDTH_EC_COUNTER_SEL);
+ } while ((sel != SET(ECSEL, cntr))
+ && (num_attempts <= DDRBW_MAX_RETRIES));
+
+ /* Exit gracefully to avoid freeze */
+ if (num_attempts >= DDRBW_MAX_RETRIES)
+ return DDR_BW_READ_FAIL;
+
+ return result;
+}
+
+static inline
+bool switch_pmu__any_event_counter_active(struct switch_pmu *pmu)
+{
+ int idx = find_next_bit(pmu->used_mask, BANDWIDTH_NUM_TOTAL_COUNTERS,
+ FIRST_EVENT_COUNTER);
+
+ return idx != BANDWIDTH_NUM_TOTAL_COUNTERS;
+}
+
+/*
+ * Event counter switch_pmu_counter method implementation.
+ */
+
+static
+void switch_pmu__32bit_event_counter_update(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+ u32 delta, prev, now;
+
+ do {
+ prev = (u32)local64_read(&event->hw.prev_count);
+ now = switch_pmu__ec_get_value(pmu, ec_idx);
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev);
+
+ delta = now - prev;
+ local64_add(delta, &event->count);
+}
+
+static
+void switch_pmu__64bit_event_counter_update(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ int idx = event->hw.idx - FIRST_EVENT_COUNTER;
+ u32 hi, lo;
+ u64 prev, now;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ do {
+ hi = switch_pmu__ec_get_value(pmu, idx + 1);
+ lo = switch_pmu__ec_get_value(pmu, idx);
+ } while (hi != switch_pmu__ec_get_value(pmu, idx + 1));
+ now = ((u64)hi << 32) | lo;
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev);
+
+ local64_add(now - prev, &event->count);
+}
+
+static
+void switch_pmu__event_counter_program(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+ u32 ev_type = SET(ECSOURCESEL, get_ec_source_sel(event)) |
+ SET(ECEVENTSEL, get_ec_event_sel(event));
+
+ event->hw.state = 0;
+
+ local64_set(&event->hw.prev_count, 0);
+ switch_pmu__ec_set_value(pmu, ec_idx, 0);
+ switch_pmu__ec_set_event(pmu, ec_idx, ev_type);
+}
+
+static
+void enable_64bit_ganging(struct perf_event *event, u32 idx)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+
+ /* according to errata doc, this needs to be done
+ * for the odd counter
+ */
+ u16 gang_regs;
+ u32 ev_type = SET(ECSOURCESEL, 0x0) | SET(ECEVENTSEL, 0xf);
+
+ switch_pmu__ec_set_event(pmu, idx, ev_type);
+
+ /* enable ganging RMW */
+ gang_regs = readl_relaxed(pmu->regs + BANDWIDTH_EC_GANG);
+ gang_regs |= BIT(idx);
+ writel_relaxed(gang_regs, pmu->regs + BANDWIDTH_EC_GANG);
+
+}
+
+static
+void disable_64bit_ganging(struct perf_event *event, u32 idx)
+{
+ u16 gang_regs;
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+
+ gang_regs = readl_relaxed(pmu->regs + BANDWIDTH_EC_GANG);
+ gang_regs = gang_regs & ~BIT(idx);
+ writel_relaxed(gang_regs, pmu->regs + BANDWIDTH_EC_GANG);
+
+}
+static
+void switch_pmu_event_32bit_counter_start(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+
+ switch_pmu__event_counter_program(pmu->counters[ec_idx].event);
+ switch_pmu__ec_enable_interrupt(pmu, ec_idx);
+ switch_pmu__ec_enable(pmu, ec_idx);
+}
+
+static
+void switch_pmu_event_64bit_counter_start(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+
+ switch_pmu__event_counter_program(pmu->counters[ec_idx].event);
+ enable_64bit_ganging(event, ec_idx + 1);
+ switch_pmu__ec_enable(pmu, ec_idx);
+ switch_pmu__ec_enable(pmu, ec_idx + 1);
+}
+
+static
+void switch_pmu_event_32bit_counter_stop(struct perf_event *event, int flags)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+
+ switch_pmu__ec_disable_interrupt(pmu, ec_idx);
+ switch_pmu__ec_disable(pmu, ec_idx);
+}
+
+static
+void switch_pmu_event_64bit_counter_stop(struct perf_event *event, int flags)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ u32 ec_idx = event->hw.idx - FIRST_EVENT_COUNTER;
+
+ switch_pmu__ec_disable_interrupt(pmu, ec_idx);
+ switch_pmu__ec_disable(pmu, ec_idx);
+ switch_pmu__ec_disable_interrupt(pmu, ec_idx + 1);
+ switch_pmu__ec_disable(pmu, ec_idx + 1);
+ disable_64bit_ganging(event, ec_idx + 1);
+}
+
+static
+void switch_pmu_event_32bit_counter_wrap(struct perf_event *event)
+{
+ switch_pmu__32bit_event_counter_update(event);
+}
+
+/*
+ * Core abstract PMU functions and management of the software counters.
+ */
+
+static
+void switch_pmu__nop(struct pmu *perf_pmu)
+{
+}
+
+static
+int switch_pmu__reserve_event_counter(struct switch_pmu *pmu,
+ struct perf_event *event, int sz)
+{
+ int idx;
+
+ idx = bitmap_find_free_region(pmu->used_mask,
+ BANDWIDTH_NUM_TOTAL_COUNTERS, sz);
+ if (idx < 0)
+ return -EAGAIN;
+ return idx;
+}
+
+/*
+ * We must NOT create groups containing events from multiple hardware PMUs,
+ * although mixing different software and hardware PMUs is allowed.
+ */
+static bool switch_pmu__validate_event_group(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counters = 0;
+
+ if (leader->pmu != event->pmu && !is_software_event(leader))
+ return false;
+
+ counters = event_num_counters(event);
+ counters += event_num_counters(leader);
+
+ for_each_sibling_event(sibling, leader) {
+ if (is_software_event(sibling))
+ continue;
+ if (sibling->pmu != event->pmu)
+ return false;
+ counters += event_num_counters(sibling);
+ }
+
+ /*
+ * If the group requires more counters than the HW has, it
+ * cannot ever be scheduled.
+ */
+ return counters <= BANDWIDTH_NUM_TOTAL_COUNTERS;
+}
+static
+int switch_pmu__event_init(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * We cannot filter accurately so we just don't allow it at all.
+ */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_hv || event->attr.exclude_idle)
+
+static
+int switch_pmu__event_add(struct perf_event *event, int flags)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+ int err = 0;
+ int sz;
+
+ sz = get_ec_event_lc(event);
+
+ /* Try to find a hardware resource for this event */
+ idx = switch_pmu__reserve_event_counter(pmu, event, sz);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
+
+ hwc->idx = idx;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (sz == 0) {
+ pmu->counters[idx] = (struct switch_pmu_counter) {
+ .event = event,
+ .start = switch_pmu_event_32bit_counter_start,
+ .stop = switch_pmu_event_32bit_counter_stop,
+ .wrap = switch_pmu_event_32bit_counter_wrap,
+ .update = switch_pmu__32bit_event_counter_update,
+ };
+
+ } else {
+ pmu->counters[idx] = (struct switch_pmu_counter) {
+ .event = event,
+ .start = switch_pmu_event_64bit_counter_start,
+ .stop = switch_pmu_event_64bit_counter_stop,
+ .update = switch_pmu__64bit_event_counter_update,
+ .wrap = NULL
+ };
+ pmu->counters[idx + 1] = pmu->counters[idx];
+ }
+
+ if (flags & PERF_EF_START)
+ pmu->counters[idx].start(pmu->counters[idx].event);
+
+out:
+ return err;
+}
+
+static
+void switch_pmu__event_start(struct perf_event *event, int flags)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ pmu->counters[hwc->idx].start(pmu->counters[hwc->idx].event);
+}
+
+static
+void switch_pmu__event_stop(struct perf_event *event, int flags)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct switch_pmu_counter *c = &pmu->counters[hwc->idx];
+
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ if (c->stop)
+ c->stop(c->event, flags);
+
+ if (flags & PERF_EF_UPDATE)
+ c->update(c->event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static
+void switch_pmu__event_del(struct perf_event *event, int flags)
+{
+
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct switch_pmu_counter *c = &pmu->counters[hwc->idx];
+ struct switch_pmu_counter *cl = &pmu->counters[hwc->idx + 1];
+ int sz;
+
+ sz = get_ec_event_lc(event);
+
+ if (c->stop)
+ c->stop(c->event, flags | PERF_EF_UPDATE);
+ c->update(c->event);
+ c->event = NULL;
+ bitmap_release_region(pmu->used_mask, hwc->idx, sz);
+
+ /* Null set the upper counter when the long counter was enabled*/
+ if (sz)
+ cl->event = NULL;
+}
+
+
+static
+void switch_pmu__event_read(struct perf_event *event)
+{
+ struct switch_pmu *pmu = to_switch_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ pmu->counters[hwc->idx].update(pmu->counters[hwc->idx].event);
+}
+
+static
+int dummy_event_idx(struct perf_event *event)
+{
+ return 0;
+}
+
+static
+bool switch_pmu__ec_handle_irq(struct switch_pmu *pmu)
+{
+ bool handled = false;
+ u32 ovs = switch_pmu__ec_read_ovsr(pmu);
+ int idx;
+
+ switch_pmu__ec_write_ovsr(pmu, ovs);
+
+ if (!switch_pmu__any_event_counter_overflowed(ovs))
+ return handled;
+
+ for (idx = 0; idx < BANDWIDTH_NUM_EVENT_COUNTERS; ++idx) {
+ struct switch_pmu_counter *counter;
+
+ if (!switch_pmu__ec_has_overflowed(ovs, idx))
+ continue;
+ counter = &pmu->counters[idx + FIRST_EVENT_COUNTER];
+ if (!counter->event)
+ continue;
+ counter->wrap(counter->event);
+ handled = true;
+ }
+
+ return handled;
+}
+
+
+static
+irqreturn_t switch_pmu__handle_irq(int irq_num, void *data)
+{
+ bool handled = false;
+ struct switch_pmu *pmu = data;
+
+ if (switch_pmu__any_event_counter_active(pmu))
+ handled = switch_pmu__ec_handle_irq(pmu);
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+
+ irq_work_run();
+
+ return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*
+ * Fixed attribute groups exposed for perf in the format group.
+ *
+ * The config format for perf events associated with event counters is:
+ * - config: bits 0-3:event selector, bits 16-22:source selector
+ * - config1: bits 0-21,24-30:filter config, bits 32-45,48-54:filter enable
+ *
+ */
+
+/* Event counters */
+
+#define DDRBW_ATTR(_name, _str) \
+ (&((struct perf_pmu_events_attr[]){ \
+ {.attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str = _str } \
+ })[0].attr.attr)
+
+
+static struct attribute *qcom_bandwidth_pmu_formats[] = {
+ DDRBW_ATTR(ecsourcesel, "config:16-22"),
+ DDRBW_ATTR(eceventsel, "config:0-3"),
+ DDRBW_ATTR(lc, "config:32"),
+ NULL,
+};
+
+static struct attribute_group qcom_bandwidth_pmu_format_group = {
+ .name = "format",
+ .attrs = qcom_bandwidth_pmu_formats,
+};
+
+static ssize_t qcom_bandwidth_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct switch_pmu *pmu = to_switch_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &pmu->cpu);
+}
+
+static struct device_attribute qcom_bandwidth_pmu_cpumask_attr =
+ __ATTR(cpumask, 0444, qcom_bandwidth_pmu_cpumask_show, NULL);
+
+static struct attribute *qcom_bandwidth_pmu_cpumask_attrs[] = {
+ &qcom_bandwidth_pmu_cpumask_attr.attr,
+ NULL,
+};
+
+static struct attribute_group qcom_bandwidth_pmu_cpumask_attr_group = {
+ .attrs = qcom_bandwidth_pmu_cpumask_attrs,
+};
+
+
+static struct attribute *qcom_ddrbw_pmu_events[] = {
+ DDRBW_ATTR(ddr-read-beats, "ecsourcesel=0x14, eceventsel=0"),
+ DDRBW_ATTR(ddr-read-beats.unit, "Bytes"),
+ DDRBW_ATTR(ddr-read-beats.scale, "32"),
+ DDRBW_ATTR(ddr-write-beats, "ecsourcesel=0x15, eceventsel=0"),
+ DDRBW_ATTR(ddr-write-beats.unit, "Bytes"),
+ DDRBW_ATTR(ddr-write-beats.scale, "32"),
+ NULL
+};
+
+static struct attribute_group qcom_bandwidth_pmu_events_group = {
+ .name = "events",
+ .attrs = qcom_ddrbw_pmu_events,
+};
+
+static const struct attribute_group **init_attribute_groups(void)
+{
+ static const struct attribute_group *result[4];
+
+ result[0] = &qcom_bandwidth_pmu_format_group;
+ result[1] = &qcom_bandwidth_pmu_cpumask_attr_group;
+ result[2] = &qcom_bandwidth_pmu_events_group;
+ result[3] = NULL;
+ return result;
+}
+
+static const struct attribute_group **attr_groups;
+
+/*
+ * Device probing and initialization.
+ */
+
+static int qcom_bandwidth_pmu_offline_cpu(unsigned int cpu,
+ struct hlist_node *node)
+{
+ struct switch_pmu *pmu = hlist_entry_safe(node,
+ struct switch_pmu, node);
+ unsigned int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpu))
+ return 0;
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
perf_pmu_migrate_context(&pmu->perf_pmu, cpu, target); + cpumask_set_cpu(target, &pmu->cpu);
+ return 0;
+}
+
+static const struct acpi_device_id qcom_bandwidth_pmu_acpi_match[] = {
+ { "QCOM80C1", },
+ { }
+};
+
+MODULE_DEVICE_TABLE(acpi, qcom_bandwidth_pmu_acpi_match);
+
+static int qcom_bandwidth_pmu_probe(struct platform_device *pdev)
+{
+ int result, irq, err;
+ struct resource *regs_rc;
+ struct switch_pmu *pmu;
+ unsigned long uid;
+ struct acpi_device *device;
+ char *name;
+
+ regs_rc = platform_get_resource(pdev, IORESOURCE_MEM, RES_BW);
+
+ name = devm_kzalloc(&pdev->dev, DDRBW_PMU_NAME_LEN, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ *pmu = (struct switch_pmu) {
+ .perf_pmu = {
+ /* Tag this as a SW context to disable multiplexing */
+ .task_ctx_nr = perf_invalid_context,
+
+ .pmu_enable = switch_pmu__nop,
+ .pmu_disable = switch_pmu__nop,
+ .event_init = switch_pmu__event_init,
+ .add = switch_pmu__event_add,
+ .del = switch_pmu__event_del,
+ .start = switch_pmu__event_start,
+ .stop = switch_pmu__event_stop,
+ .read = switch_pmu__event_read,
+
+ .event_idx = dummy_event_idx,
+
+ .attr_groups = attr_groups
+ },
+ .counters = {
+ [0 ... BANDWIDTH_NUM_TOTAL_COUNTERS - 1] {}
+ }
+ };
+
+ raw_spin_lock_init(&pmu->ecsel_lock);
+
+ /* Add this instance to the list used by the offline callback */
+ cpuhp_state_add_instance_nocalls(cpuhp_state_num, &pmu->node);
+
+ platform_set_drvdata(pdev, pmu);
+
+ return result;
+}
+
+static int qcom_bandwidth_pmu_remove(struct platform_device *pdev)
+{
+ struct switch_pmu *pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &pmu->node);
+ perf_pmu_unregister(&pmu->perf_pmu);
+ return 0;
+}
+
+static struct platform_driver qcom_bandwidth_pmu_driver = {
+ .driver = {
+ .name = "qcom-bandwidth-pmu-v1",
+ .owner = THIS_MODULE,
+ .acpi_match_table = ACPI_PTR(qcom_bandwidth_pmu_acpi_match),
+ },
+ .probe = qcom_bandwidth_pmu_probe,
+ .remove = qcom_bandwidth_pmu_remove,
+};
+
+static int __init register_qcom_bandwidth_pmu_driver(void)
+{
+ if (attr_groups == NULL)
+ attr_groups = init_attribute_groups();
+
+ /* Install a hook to update the context CPU in case it goes offline */
+ cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/qcom/msw:online", NULL, qcom_bandwidth_pmu_offline_cpu);
+ if (cpuhp_state_num < 0)
+ return cpuhp_state_num;
+
+ return platform_driver_register(&qcom_bandwidth_pmu_driver);
+}
+
+static void __exit unregister_qcom_bandwidth_pmu_driver(void)
+{
+ cpuhp_remove_multi_state(cpuhp_state_num);
+ platform_driver_unregister(&qcom_bandwidth_pmu_driver);
+}
+
+module_init(register_qcom_bandwidth_pmu_driver);
+module_exit(unregister_qcom_bandwidth_pmu_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/qcom_bandwidth_perf_events.h b/drivers/perf/qcom_bandwidth_perf_events.h
new file mode 100644
index 0000000..120a10b
--- /dev/null
+++ b/drivers/perf/qcom_bandwidth_perf_events.h
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _QCOM_BANDWIDTH_PERF_EVENTS_H_
+#define _QCOM_BANDWIDTH_PERF_EVENTS_H_
+
+#include<linux/bitops.h>
+
+/*
+ * General constants
+ */
+
+
+#define BANDWIDTH_NUM_EVENT_COUNTERS 12
+#define BANDWIDTH_NUM_TOTAL_COUNTERS BANDWIDTH_NUM_EVENT_COUNTERS
+#define BANDWIDTH_LAR_KEY 0xC5ACCE55
+
+/*
+ * Register offsets
+ */
+
+/* ID and Coresight registers */
+
+#define BANDWIDTH_LAR 0xFB0
+
+
+/* Event counter registers */
+
+/*
+ * Because of interleaving, some gaps in the map exists
+ * (7th bit cannot be used).
+ * To accommodate this mapping,
+ * we have different offsets for different sets of counters.
+ */
+
+
+static inline u32 qcom_bandwidth_ec_source_sel(u8 __cntr)
+{
+ if (__cntr >= 0 && __cntr <= 2)
+ return (0x240 + ((__cntr) & 0xF) * 24);
+ else if (__cntr >= 3 && __cntr <= 7/;=)
+ return (0x2C0 + ((__cntr) & 0xF) * 24);
+ else if (__cntr >= 8 && __cntr <= 13)
+ return (0x340 + ((__cntr) & 0xF) * 24);
+ else
+ return (0x3C0 + ((__cntr) & 0xF) * 24);
+}
+
+
+#define BANDWIDTH_EC_GLOBAL_CONTROL 0xA00
+#define BANDWIDTH_EC_ENABLE_SET 0xA10
+#define BANDWIDTH_EC_ENABLE_CLEAR 0xA18
+#define BANDWIDTH_EC_INTERRUPT_ENABLE_SET 0xA20
+#define BANDWIDTH_EC_INTERRUPT_ENABLE_CLEAR 0xA28
+#define BANDWIDTH_EC_TRIGGER_THRESHOLD_LO 0xA30
+#define BANDWIDTH_EC_TRIGGER_THRESHOLD_HI 0xC30
+#define BANDWIDTH_EC_GANG 0xE30
+#define BANDWIDTH_EC_GANG_CONFIG0 0xE38
+#define BANDWIDTH_EC_GANG_CONFIG1 0xE40
+#define BANDWIDTH_EC_GANG_CONFIG2 0xE48
+#define BANDWIDTH_EC_OVF_STATUS 0xF00
+#define BANDWIDTH_EC_COUNTER_SEL 0xF08
+#define BANDWIDTH_EC_COUNT 0xF10
+#define BANDWIDTH_EC_SWINC 0x1320
+#define BANDWIDTH_EC_IRQ_CONTROL 0x1358
+
+/* IRQ/resource position in ACPI */
+#define IRQ_BW 2
+#define RES_BW 4
+#define DDRBW_PMU_NAME_FORMAT "bwddr_0_%ld"
+#define DDRBW_PMU_NAME_LEN 11
+#define DDRBW_MAX_RETRIES 3
+#define DDR_BW_READ_FAIL 0
+/*
+ * Bit field definitions, defined as (<size>, <shift>)
+ * Please note that fields that take up the whole register
+ * are not included here, as those can be set/read directly.
+ */
+
+/* BANDWIDTH_EC_SOURCE_SEL */
+#define ECSOURCESEL (7, 16)
+#define ECEVENTSEL (4, 0)
+
+
+
+/* BANDWIDTH_EC_GLOBAL_CONTROL/MONACO_TC_GLOBAL_CONTROL */
+
+#define GLOBAL_TRIGOVRD (1, 4)
+#define CAPTURE (1, 3)
+#define RETRIEVAL_MODE (1, 2)
+#define GLOBAL_RESET (1, 1)
+#define GLOBAL_ENABLE (1, 0)
+
+/* MONACO_EC_ROLLOVER_CONTROL */
+
+#define ECSATURATEEN(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_ENABLE_SET */
+
+#define ECENSET(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_ENABLE_CLEAR */
+
+#define ECENCLEAR(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_INTERRUPT_ENABLE_SET */
+
+#define ECINTENSET(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_INTERRUPT_ENABLE_CLEAR */
+
+#define ECINTENCLR(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_GANG */
+
+#define ECGANGEN(__pair) (1, (((__pair) & 0x7) * 2 + 1))
+
+/* MONACO_EC_OVF_STATUS */
+
+#define ECOVF(__cntr) (1, ((__cntr) & 0xF))
+
+/* MONACO_EC_COUNTER_SEL */
+
+#define ECSEL (4, 0)
+
+/* MONACO_EC_SWINC */
+
+#define ECSWINC(__cntr) (1, ((__cntr) & 0xF))
+
+
+/* MONACO_LSR */
+
+#define NTT (1, 2)
+#define SLK (1, 1)
+#define SLI (1, 0)
+
+/*
+ * Bit field manipulation macros.
+ * These use the bitfield definitions above to set or get the given field.
+ */
+
+#define __SIZE(__sz, __sh) __sz
+#define __SHIFT(__sz, __sh) __sh
+#define __SETI(__b, __s, __v) ((u32)(((__v) & GENMASK((__b - 1), 0)) << (__s)))
+#define __CLRI(__b, __s, __v) ((u32)((__v) & ~(GENMASK((__b - 1) + __s, __s))))
+#define __GETI(__b, __s, __v) ((u32)(((__v) >> (__s)) & GENMASK((__b - 1), 0)))
+
+/* Return a value with the given bitfield set to the given value */
+#define SET(__f, __v) __SETI(__SIZE __f, __SHIFT __f, (__v))
+
+/* Return a value with the given bitfield set to zero */
+#define CLR(__f, __v) __CLRI(__SIZE __f, __SHIFT __f, (__v))
+
+/* Retrieve the given bitfield from the given value */
+#define GET(__f, __v) __GETI(__SIZE __f, __SHIFT __f, (__v))