Re: [RFC][PATCH] perf: Rewrite core context handling
From: Alexey Budankov
Date: Wed Oct 17 2018 - 04:59:00 EST
Hi,
On 10.10.2018 13:45, Peter Zijlstra wrote:
<SNIP>
> -static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
> +/*
> + * XXX somewhat completely buggered; this is in cpu_pmu_context, but we need
> + * event_pmu_context for rotations. We also need event_pmu_context specific
> + * scheduling routines. ARGH
> + *
> + * - fixed the cpu_pmu_context vs event_pmu_context thingy
> + * (cpu_pmu_context embeds an event_pmu_context)
> + *
> + * - need nr_events/nr_active in epc to do per epc rotation
> + * (done)
> + *
> + * - need cpu and task pmu ctx together...
> + * (cpc->task_epc)
> + */
> +static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc)
Since it reduces to single cpu context (and single task context) at all times,
ideally, it would probably be coded as simple as this:
perf_rotate_context()
{
cpu = this_cpu_ptr(&cpu_context)
for_every_pmu(pmu, cpu)
for_every_event_ctx(event_ctx, pmu)
rotate(event_ctx, pmu)
}
so rotate(event_ctx, pmu) would operate on common events objects semantics
and memory layout, and PMU specific code handle SW/HW programming differences.
Implementing that implies this data relations:
cpu (struct perf_cpu_context)
|
v
cpu_context ---> cpu_0->cpu_1->cpu_2->cpu_3
| | | |
v v v v
pmu0 (struct pmu) pmu00 pmu01 pmu02 pmu03
| | | |
v v v v
pmu1 pmu10 pmu11 pmu12 pmu13
| | | |
v v v v
pmu2 pmu20 pmu21 *pmu22* pmu23 <- pmu (struct perf_cpu_pmu_context)
event_ctx
|
v
*pmu22* (struct perf_cpu_pmu_context) -> ctx22_0 -> ctx22_1
| |
v v
event00 event01
| |
v v
*event10* event11 <- event
| |
v v
event20 event21
In new schema that would result in one more link on the right:
cpu_context[NR_CPUS]
|
v
task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context -----,
^ | ^ ^ |
`---------------------------------' | | |
| `--> perf_event_pmu_context | <- link
| ^ ^ |
| | | |
| ,-----' v |
| | perf_cpu_pmu_context <-'
| | ^
| | |
v v v
perf_event ---> pmu[,cpu_pmu_ctx[NR_CPUS],]
Thanks,
Alexey
> {
> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&cpu_context);
> + struct perf_event_pmu_context *cpu_epc, *task_epc = NULL;
> struct perf_event *cpu_event = NULL, *task_event = NULL;
> bool cpu_rotate = false, task_rotate = false;
> struct perf_event_context *ctx = NULL;
> + struct pmu *pmu;
>
> /*
> * Since we run this from IRQ context, nobody can install new
> * events, thus the event count values are stable.
> */
>
> - if (cpuctx->ctx.nr_events) {
> - if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
> - cpu_rotate = true;
> - }
> + cpu_epc = &cpc->epc;
> + pmu = cpu_epc->pmu;
>
> - ctx = cpuctx->task_ctx;
> - if (ctx && ctx->nr_events) {
> - if (ctx->nr_events != ctx->nr_active)
> + if (cpu_epc->nr_events && cpu_epc->nr_events != cpu_epc->nr_active)
> + cpu_rotate = true;
> +
> + task_epc = cpc->task_epc;
> + if (task_epc) {
> + WARN_ON_ONCE(task_epc->pmu != pmu);
> + if (task_epc->nr_events && task_epc->nr_events != task_epc->nr_active)
> task_rotate = true;
> }
>
> if (!(cpu_rotate || task_rotate))
> return false;
>
> - perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> - perf_pmu_disable(cpuctx->ctx.pmu);
> + perf_ctx_lock(cpuctx, ctx);
> + perf_pmu_disable(pmu);
>
> if (task_rotate)
> - task_event = ctx_first_active(ctx);
> + task_event = ctx_first_active(task_epc);
> +
> if (cpu_rotate)
> - cpu_event = ctx_first_active(&cpuctx->ctx);
> + cpu_event = ctx_first_active(cpu_epc);
>
> /*
> * As per the order given at ctx_resched() first 'pop' task flexible
> * and then, if needed CPU flexible.
> */
> - if (task_event || (ctx && cpu_event))
> - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
> - if (cpu_event)
> - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
> + if (task_event || (task_epc && cpu_event)) {
> + update_context_time(ctx);
> + __pmu_ctx_sched_out(task_epc, EVENT_FLEXIBLE);
> + }
> +
> + if (cpu_event) {
> + update_context_time(&cpuctx->ctx);
> + __pmu_ctx_sched_out(cpu_epc, EVENT_FLEXIBLE);
> + rotate_ctx(&cpuctx->ctx, cpu_event);
> + __pmu_ctx_sched_in(&cpuctx->ctx, pmu);
> + }
>
> if (task_event)
> rotate_ctx(ctx, task_event);
> - if (cpu_event)
> - rotate_ctx(&cpuctx->ctx, cpu_event);
>
> - perf_event_sched_in(cpuctx, ctx, current);
> + if (task_event || (task_epc && cpu_event))
> + __pmu_ctx_sched_in(ctx, pmu);
>
> - perf_pmu_enable(cpuctx->ctx.pmu);
> - perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> + perf_pmu_enable(pmu);
> + perf_ctx_unlock(cpuctx, ctx);
>
> return true;
> }