Re: [PATCH V5 21/25] perf: Introduce PERF_TYPE_HARDWARE_PMU and PERF_TYPE_HW_CACHE_PMU

From: Peter Zijlstra
Date: Fri Apr 09 2021 - 05:22:18 EST


On Mon, Apr 05, 2021 at 08:11:03AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote:
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>
> Current Hardware events and Hardware cache events have special perf
> types, PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE. The two types don't
> pass the PMU type in the user interface. For a hybrid system, the perf
> subsystem doesn't know which PMU the events belong to. The first capable
> PMU will always be assigned to the events. The events never get a chance
> to run on the other capable PMUs.
>
> Add a PMU aware version PERF_TYPE_HARDWARE_PMU and
> PERF_TYPE_HW_CACHE_PMU. The PMU type ID is stored at attr.config[40:32].
> Support the new types for X86.

Obviously ARM would need the same, but also, I don't think I see the
need to introduce new types. AFAICT there is nothing that stops this
scheme from working for the existing types.

Also, pmu type is 32bit, not 8bit.

So how about something like this?

---
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3f7f89ea5e51..074c7687d466 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -260,15 +260,16 @@ struct perf_event;
/**
* pmu::capabilities flags
*/
-#define PERF_PMU_CAP_NO_INTERRUPT 0x01
-#define PERF_PMU_CAP_NO_NMI 0x02
-#define PERF_PMU_CAP_AUX_NO_SG 0x04
-#define PERF_PMU_CAP_EXTENDED_REGS 0x08
-#define PERF_PMU_CAP_EXCLUSIVE 0x10
-#define PERF_PMU_CAP_ITRACE 0x20
-#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
-#define PERF_PMU_CAP_NO_EXCLUDE 0x80
-#define PERF_PMU_CAP_AUX_OUTPUT 0x100
+#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
+#define PERF_PMU_CAP_NO_NMI 0x0002
+#define PERF_PMU_CAP_AUX_NO_SG 0x0004
+#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
+#define PERF_PMU_CAP_EXCLUSIVE 0x0010
+#define PERF_PMU_CAP_ITRACE 0x0020
+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040
+#define PERF_PMU_CAP_NO_EXCLUDE 0x0080
+#define PERF_PMU_CAP_AUX_OUTPUT 0x0100
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200

struct perf_output_handle;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f07943183041..910a0666ebfe 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11113,14 +11113,21 @@ static struct pmu *perf_init_event(struct perf_event *event)
* are often aliases for PERF_TYPE_RAW.
*/
type = event->attr.type;
- if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
- type = PERF_TYPE_RAW;
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
+ type = event->attr.config >> 32;
+ if (!type)
+ type = PERF_TYPE_RAW;
+ }

again:
rcu_read_lock();
pmu = idr_find(&pmu_idr, type);
rcu_read_unlock();
if (pmu) {
+ if (event->attr.type != type && type != PERF_TYPE_RAW &&
+ !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE))
+ goto fail;
+
ret = perf_try_init_event(pmu, event);
if (ret == -ENOENT && event->attr.type != type) {
type = event->attr.type;
@@ -11143,6 +11150,7 @@ static struct pmu *perf_init_event(struct perf_event *event)
goto unlock;
}
}
+fail:
pmu = ERR_PTR(-ENOENT);
unlock:
srcu_read_unlock(&pmus_srcu, idx);