[PATCH V4 3/6] perf: Supply task information to sched_task()

From: kan . liang
Date: Wed May 19 2021 - 11:20:03 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

To save/restore LBR call stack data in system-wide mode, the task_struct
information is required.

Extend the parameters of sched_task() to supply task_struct information.

When schedule in, the LBR call stack data for new task will be restored.
When schedule out, the LBR call stack data for old task will be saved.
Only need to pass the required task_struct information.

Reviewed-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
---

Changes since V3:
- Rebase on top of the 5.13-rc2

arch/powerpc/perf/core-book3s.c | 8 ++++++--
arch/x86/events/core.c | 5 +++--
arch/x86/events/intel/core.c | 4 ++--
arch/x86/events/intel/lbr.c | 3 ++-
arch/x86/events/perf_event.h | 5 +++--
include/linux/perf_event.h | 2 +-
kernel/events/core.c | 15 ++++++++-------
7 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b..5c2cc4c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -131,7 +131,10 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)

static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
+static void power_pmu_sched_task(struct perf_event_context *ctx,
+ struct task_struct *task, bool sched_in)
+{
+}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@@ -441,7 +444,8 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void power_pmu_sched_task(struct perf_event_context *ctx,
+ struct task_struct *task, bool sched_in)
{
if (!ppmu->bhrb_nr)
return;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8f71dd7..8368cc7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2603,9 +2603,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};

-static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void x86_pmu_sched_task(struct perf_event_context *ctx,
+ struct task_struct *task, bool sched_in)
{
- static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
+ static_call_cond(x86_pmu_sched_task)(ctx, task, sched_in);
}

static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e288922..b7fc7c9 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4467,10 +4467,10 @@ static void intel_pmu_cpu_dead(int cpu)
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
- bool sched_in)
+ struct task_struct *task, bool sched_in)
{
intel_pmu_pebs_sched_task(ctx, sched_in);
- intel_pmu_lbr_sched_task(ctx, sched_in);
+ intel_pmu_lbr_sched_task(ctx, task, sched_in);
}

static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 4409d2c..7914f40 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -619,7 +619,8 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
task_context_opt(next_ctx_data)->lbr_callstack_users);
}

-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
void *task_ctx;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad87cb3..29f8df7 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -777,7 +777,7 @@ struct x86_pmu {

void (*check_microcode)(void);
void (*sched_task)(struct perf_event_context *ctx,
- bool sched_in);
+ struct task_struct *task, bool sched_in);

/*
* Intel Arch Perfmon v2+
@@ -1310,7 +1310,8 @@ void intel_ds_init(void);
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next);

-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx,
+ struct task_struct *task, bool sched_in);

u64 lbr_from_signext_quirk_wr(u64 val);

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d46b7e1..0085b97 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -428,7 +428,7 @@ struct pmu {
* context-switches callback
*/
void (*sched_task) (struct perf_event_context *ctx,
- bool sched_in);
+ struct task_struct *task, bool sched_in);

/*
* Kmem cache of PMU specific data
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bb1b27e..a83284b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3483,7 +3483,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
perf_pmu_disable(pmu);

if (cpuctx->sched_cb_usage && pmu->sched_task)
- pmu->sched_task(ctx, false);
+ pmu->sched_task(ctx, task, false);

/*
* PMU specific parts of task perf context can require
@@ -3523,7 +3523,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
perf_pmu_disable(pmu);

if (cpuctx->sched_cb_usage && pmu->sched_task)
- pmu->sched_task(ctx, false);
+ pmu->sched_task(ctx, task, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);

perf_pmu_enable(pmu);
@@ -3562,7 +3562,8 @@ void perf_sched_cb_inc(struct pmu *pmu)
* PEBS requires this to provide PID/TID information. This requires we flush
* all queued PEBS records before we context switch to a new task.
*/
-static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
+static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx,
+ struct task_struct *task, bool sched_in)
{
struct pmu *pmu;

@@ -3574,7 +3575,7 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);

- pmu->sched_task(cpuctx->task_ctx, sched_in);
+ pmu->sched_task(cpuctx->task_ctx, task, sched_in);

perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -3594,7 +3595,7 @@ static void perf_pmu_sched_task(struct task_struct *prev,
if (cpuctx->task_ctx)
continue;

- __perf_pmu_sched_task(cpuctx, sched_in);
+ __perf_pmu_sched_task(cpuctx, sched_in ? next : prev, sched_in);
}
}

@@ -3860,7 +3861,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
- __perf_pmu_sched_task(cpuctx, true);
+ __perf_pmu_sched_task(cpuctx, task, true);
return;
}

@@ -3886,7 +3887,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
perf_event_sched_in(cpuctx, ctx, task);

if (cpuctx->sched_cb_usage && pmu->sched_task)
- pmu->sched_task(cpuctx->task_ctx, true);
+ pmu->sched_task(cpuctx->task_ctx, task, true);

perf_pmu_enable(pmu);

--
2.7.4