Re: [PATCH v2 2/3] perf/ibs: Fix interface via core pmu events
From: Peter Zijlstra
Date: Sun Mar 12 2023 - 10:54:43 EST
On Thu, Mar 09, 2023 at 03:41:10PM +0530, Ravi Bangoria wrote:
> diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
> index 8c45b198b62f..81d67b899371 100644
> --- a/arch/x86/events/amd/core.c
> +++ b/arch/x86/events/amd/core.c
> @@ -371,10 +371,15 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
> static int amd_pmu_hw_config(struct perf_event *event)
> {
> int ret;
> + u64 dummy;
>
> - /* pass precise event sampling to ibs: */
> - if (event->attr.precise_ip && get_ibs_caps())
> - return -ENOENT;
> + if (event->attr.precise_ip) {
> + /* pass precise event sampling to ibs by returning -ESRCH */
> + if (get_ibs_caps() && !ibs_core_pmu_event(event, &dummy))
> + return -ESRCH;
> + else
> + return -ENOENT;
> + }
>
> if (has_branch_stack(event) && !x86_pmu.lbr_nr)
> return -EOPNOTSUPP;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f79fd8b87f75..e990c71ba34a 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -11639,18 +11639,26 @@ static struct pmu *perf_init_event(struct perf_event *event)
> goto again;
> }
>
> + /*
> + * pmu->event_init() should return -ESRCH only when it
> + * wants to forward the event to other pmu.
> + */
> + if (ret == -ESRCH)
> + goto try_all;
> +
> if (ret)
> pmu = ERR_PTR(ret);
>
> goto unlock;
> }
>
> +try_all:
> list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
> ret = perf_try_init_event(pmu, event);
> if (!ret)
> goto unlock;
>
> - if (ret != -ENOENT) {
> + if (ret != -ENOENT && ret != -ESRCH) {
> pmu = ERR_PTR(ret);
> goto unlock;
> }
Urgh.. So amd_pmu_hw_config() knows what PMU it should be but has no
real way to communicate this, so you make it try all of them again.
Now, we already have a gruesome hack in there, and I'm thikning you
should use that instead of adding yet another one. Note:
if (ret == -ENOENT && event->attr.type != type && !extended_type) {
type = event->attr.type;
goto again;
So if you have amd_pmu_hw_config() do:
event->attr.type = ibs_pmu.type;
return -ENOENT;
it should all just work no?
And now thinking about this, I'm thinking we can clean up the whole
swevent mess too, a little something like the below perhaps... Then it
might just be possible to remove that list_for_each_entry_rcu()
entirely.
Hmm?
---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f79fd8b87f75..26130d1ca40b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9951,6 +9951,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
swevent_hlist_put();
}
+static struct pmu perf_cpu_clock; /* fwd declaration */
+static struct pmu perf_task_clock;
+
static int perf_swevent_init(struct perf_event *event)
{
u64 event_id = event->attr.config;
@@ -9966,7 +9969,11 @@ static int perf_swevent_init(struct perf_event *event)
switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
+ event->attr.type = perf_cpu_clock.type;
+ return -ENOENT;
+
case PERF_COUNT_SW_TASK_CLOCK:
+ event->attr.type = perf_task_clock.type;
return -ENOENT;
default: