Re: [PATCH V5] perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task

From: Liang, Kan
Date: Wed Apr 21 2021 - 11:12:19 EST




On 4/21/2021 4:11 AM, Peter Zijlstra wrote:
@@ -2327,10 +2367,17 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
+ unsigned long flags;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
+ if (x86_pmu.sched_task && event->hw.target) {
+ local_irq_save(flags);
+ perf_sched_cb_dec(event->ctx->pmu);
+ local_irq_restore(flags);
+ }
+
if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
}
I don't understand how this can possibly be correct. Both
perf_sched_cb_{inc,dec} modify strict per-cpu state, but the mapped
functions happen on whatever random CPU of the moment whenever the task
memory map changes.

Suppose we mmap() on CPU0 and then exit on CPU1. Suppose the task does
mmap() on CPU0 but then creates threads and runs on CPU1-4 concurrently
before existing on CPU5.

Could be I'm not seeing it due to having a snot-brain, please explain.

You are right.
I implemented a new test case which mmap() on CPU 3, run and exit on CPU 0. It can still read the counter values from other tasks on CPU 0.

Actually, I don't think we need perf_sched_cb_{inc,dec} and sched_task().

The mm->context.perf_rdpmc_allowed will tell us if it's a RDPMC task.
I think a clean way should be to add a new check_leakage() method. When perf schedules in a RDPMC task, we invoke the method and clear the dirty counters.
(I use a generic name check_leakage for the method so it can be reused by other ARCHs if possible.)

The patch is as below. The new and old test cases are all passed. I will do more tests.

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index c7fcc8d..229dd48 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1624,6 +1624,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto do_del;

+ __set_bit(event->hw.idx, cpuc->dirty);
+
/*
* Not a TXN, therefore cleanup properly.
*/
@@ -2631,6 +2633,37 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return 0;
}

+static void x86_pmu_clear_dirty_counters(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ /* Don't need to clear the assigned counter. */
+ for (i = 0; i < cpuc->n_events; i++)
+ __clear_bit(cpuc->assign[i], cpuc->dirty);
+
+ if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
+ return;
+
+ for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
+ /* Metrics and fake events don't have corresponding HW counters. */
+ if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
+ continue;
+ else if (i >= INTEL_PMC_IDX_FIXED)
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ else
+ wrmsrl(x86_pmu_event_addr(i), 0);
+ }
+
+ bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+}
+
+static void x86_pmu_check_leakage(void)
+{
+ if (READ_ONCE(x86_pmu.attr_rdpmc))
+ x86_pmu_clear_dirty_counters();
+}
+
static int x86_pmu_aux_output_match(struct perf_event *event)
{
if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
@@ -2675,6 +2708,7 @@ static struct pmu pmu = {
.sched_task = x86_pmu_sched_task,
.swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
+ .check_leakage = x86_pmu_check_leakage,

.aux_output_match = x86_pmu_aux_output_match,

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 27fa85e..d6003e0 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -229,6 +229,7 @@ struct cpu_hw_events {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;

int n_events; /* the # of events in the below arrays */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a763928..bcf3964 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -514,6 +514,11 @@ struct pmu {
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
*/
int (*check_period) (struct perf_event *event, u64 value); /* optional */
+
+ /*
+ * Check and clear dirty counters to prevent potential leakage
+ */
+ void (*check_leakage) (void); /* optional */
};

enum perf_addr_filter_action_t {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 928b166..b496113 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3822,6 +3822,12 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
ctx_sched_in(ctx, cpuctx, event_type, task);
}

+static bool has_check_leakage(struct pmu *pmu)
+{
+ return pmu->check_leakage && current->mm &&
+ atomic_read(&current->mm->context.perf_rdpmc_allowed);
+}
+
static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
@@ -3832,6 +3838,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
+ if (has_check_leakage(pmu))
+ pmu->check_leakage();
return;
}

@@ -3858,6 +3866,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,

if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(cpuctx->task_ctx, true);
+ if (has_check_leakage(pmu))
+ pmu->check_leakage();

perf_pmu_enable(pmu);


Thanks,
Kan