[RFC 2/3] perf/x86: Control RDPMC access from .enable() hook

From: Rob Herring
Date: Wed Jul 28 2021 - 19:02:47 EST


Rather than controlling RDPMC access behind the scenes from switch_mm(),
move RDPMC access controls to the PMU .enable() hook. The .enable() hook
is called whenever the perf CPU or task context changes which is when
the RDPMC access may need to change.

This is the first step in moving the RDPMC state tracking out of the mm
context to the perf context.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: linux-perf-users@xxxxxxxxxxxxxxx
Signed-off-by: Rob Herring <robh@xxxxxxxxxx>
---
Not sure, but I think the set_attr_rdpmc() IPI needs to hold the perf
ctx lock?


arch/x86/events/core.c | 75 +++++++++++++++++++-----------
arch/x86/include/asm/mmu_context.h | 6 ---
arch/x86/include/asm/perf_event.h | 1 -
arch/x86/mm/tlb.c | 29 +-----------
4 files changed, 49 insertions(+), 62 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 03f87fd4c017..5c1703206ef5 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -52,8 +52,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.pmu = &pmu,
};

-DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
-DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+static DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
+static DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);

/*
@@ -727,11 +727,52 @@ static void x86_pmu_disable(struct pmu *pmu)
static_call(x86_pmu_disable_all)();
}

+static void perf_clear_dirty_counters(struct cpu_hw_events *cpuc)
+{
+ int i;
+
+ /* Don't need to clear the assigned counter. */
+ for (i = 0; i < cpuc->n_events; i++)
+ __clear_bit(cpuc->assign[i], cpuc->dirty);
+
+ if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
+ return;
+
+ for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
+ /* Metrics and fake events don't have corresponding HW counters. */
+ if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
+ continue;
+ else if (i >= INTEL_PMC_IDX_FIXED)
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ else
+ wrmsrl(x86_pmu_event_addr(i), 0);
+ }
+
+ bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+}
+
+static void x86_pmu_set_user_access(struct cpu_hw_events *cpuc)
+{
+ if (static_branch_unlikely(&rdpmc_always_available_key) ||
+ (!static_branch_unlikely(&rdpmc_never_available_key) &&
+ atomic_read(&(this_cpu_read(cpu_tlbstate.loaded_mm)->context.perf_rdpmc_allowed)))) {
+ /*
+ * Clear the existing dirty counters to
+ * prevent the leak for an RDPMC task.
+ */
+ perf_clear_dirty_counters(cpuc);
+ cr4_set_bits_irqsoff(X86_CR4_PCE);
+ } else
+ cr4_clear_bits_irqsoff(X86_CR4_PCE);
+}
+
void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;

+ x86_pmu_set_user_access(cpuc);
+
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

@@ -2476,29 +2517,9 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}

-void perf_clear_dirty_counters(void)
+static void x86_pmu_set_user_access_ipi(void *unused)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int i;
-
- /* Don't need to clear the assigned counter. */
- for (i = 0; i < cpuc->n_events; i++)
- __clear_bit(cpuc->assign[i], cpuc->dirty);
-
- if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
- return;
-
- for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
- /* Metrics and fake events don't have corresponding HW counters. */
- if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
- continue;
- else if (i >= INTEL_PMC_IDX_FIXED)
- wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
- else
- wrmsrl(x86_pmu_event_addr(i), 0);
- }
-
- bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+ x86_pmu_set_user_access(this_cpu_ptr(&cpu_hw_events));
}

static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
@@ -2519,7 +2540,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
mmap_assert_write_locked(mm);

if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
+ on_each_cpu_mask(mm_cpumask(mm), x86_pmu_set_user_access_ipi, NULL, 1);
}

static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
@@ -2528,7 +2549,7 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
return;

if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
+ on_each_cpu_mask(mm_cpumask(mm), x86_pmu_set_user_access_ipi, NULL, 1);
}

static int x86_pmu_event_idx(struct perf_event *event)
@@ -2584,7 +2605,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
else if (x86_pmu.attr_rdpmc == 2)
static_branch_dec(&rdpmc_always_available_key);

- on_each_cpu(cr4_update_pce, NULL, 1);
+ on_each_cpu(x86_pmu_set_user_access_ipi, NULL, 1);
x86_pmu.attr_rdpmc = val;
}

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 27516046117a..1cbb32ac245e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -22,12 +22,6 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
}
#endif /* !CONFIG_PARAVIRT_XXL */

-#ifdef CONFIG_PERF_EVENTS
-DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
-DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
-void cr4_update_pce(void *ignored);
-#endif
-
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* ldt_structs can be allocated, used, and freed, but they are never
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc1b5003713..544f41a179fb 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -478,7 +478,6 @@ struct x86_pmu_lbr {

extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
-extern void perf_clear_dirty_counters(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index cfe6b1e85fa6..060a3de78380 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -400,31 +400,6 @@ static void cond_ibpb(struct task_struct *next)
}
}

-#ifdef CONFIG_PERF_EVENTS
-static inline void cr4_update_pce_mm(struct mm_struct *mm)
-{
- if (static_branch_unlikely(&rdpmc_always_available_key) ||
- (!static_branch_unlikely(&rdpmc_never_available_key) &&
- atomic_read(&mm->context.perf_rdpmc_allowed))) {
- /*
- * Clear the existing dirty counters to
- * prevent the leak for an RDPMC task.
- */
- perf_clear_dirty_counters();
- cr4_set_bits_irqsoff(X86_CR4_PCE);
- } else
- cr4_clear_bits_irqsoff(X86_CR4_PCE);
-}
-
-void cr4_update_pce(void *ignored)
-{
- cr4_update_pce_mm(this_cpu_read(cpu_tlbstate.loaded_mm));
-}
-
-#else
-static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
-#endif
-
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -581,10 +556,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);

- if (next != real_prev) {
- cr4_update_pce_mm(next);
+ if (next != real_prev)
switch_ldt(real_prev, next);
- }
}

/*
--
2.27.0