Re: [RFC v2] perf: Rewrite core context handling

From: Ravi Bangoria
Date: Mon Aug 22 2022 - 12:38:08 EST


On 22-Aug-22 9:13 PM, Peter Zijlstra wrote:
> On Mon, Aug 22, 2022 at 05:29:11PM +0200, Peter Zijlstra wrote:
>> On Tue, Aug 02, 2022 at 11:41:42AM +0530, Ravi Bangoria wrote:
>>>
>>>> pulling up the ctx->mutex makes things simpler, but also violates the
>>>> locking order vs exec_update_lock.
>>>>
>>>> Pull that lock up as well...
>>>
>>> I'm not able to apply this patch as is but I get the idea. Few
>>> questions below...
>>
>> I was just about to rebase the 'series' to current, let me do that and
>> get back to you on the specifics.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=perf/wip.rewrite

Additional set of changes on top of this tree is required to build and boot,
atleast on my AMD machine:

---
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ccd231ea6a4e..94fb65d7b291 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1248,7 +1248,7 @@ static inline void amd_pmu_brs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu_ctx->pmu);
cpuc->lbr_users++;
/*
* No need to reset BRS because it is reset
@@ -1263,7 +1263,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);

- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu_ctx->pmu);
}

void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 31ae032d6783..086e37fa32be 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -843,7 +843,7 @@ static void perf_cgroup_switch(struct task_struct *task)

WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
if (READ_ONCE(cpuctx->cgrp) == cgrp)
- continue;
+ return;

perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_ctx_disable(&cpuctx->ctx);
@@ -881,7 +881,7 @@ static int perf_cgroup_ensure_storage(struct perf_event *event,
heap_size++;

for_each_possible_cpu(cpu) {
- cpuctx = this_cpu_ptr(&cpu_context);
+ cpuctx = per_cpu_ptr(&cpu_context, cpu);
if (heap_size <= cpuctx->heap_size)
continue;

@@ -2315,7 +2315,7 @@ __perf_remove_from_context(struct perf_event *event,
if (!pmu_ctx->nr_events) {
pmu_ctx->rotate_necessary = 0;

- if (ctx->task) {
+ if (ctx->task && ctx->is_active) {
struct perf_cpu_pmu_context *cpc;

cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context);
@@ -11972,6 +11972,15 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
goto out;
}

+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+ if (b < a)
+ swap(a, b);
+
+ mutex_lock(a);
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
---

With this, I can run 'perf test' and perf_event_tests without any error in
dmesg. I'll run perf fuzzer over night and see if it reports any issue.

Thanks,
Ravi