Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation
From: Stephane Eranian
Date: Wed Jun 06 2012 - 08:08:39 EST
Looks good.
thanks.
On Wed, Jun 6, 2012 at 2:06 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> On Wed, 2012-06-06 at 13:57 +0200, Stephane Eranian wrote:
>
>> Are you going to repost the update patch, or shall I?
>
>
>
> ---
> Subject: perf, x86: Fix Intel shared extra MSR allocation
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Date: Tue, 05 Jun 2012 15:30:31 +0200
>
> Zheng Yan reported that event group validation can wreck event state
> when Intel extra_reg allocation changes event state.
>
> Validation shouldn't change any persistent state. Cloning events in
> validate_{event,group}() isn't really pretty either, so add a few
> special cases to avoid modifying the event state.
>
> The code is restructured to minimize the special case impact.
>
> Reported-by: Zheng Yan <zheng.z.yan@xxxxxxxxxxxxxxx>
> Acked-by: Stephane Eranian <eranian@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Link: 1338903031.28282.175.camel@twins">http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
> ---
> Âarch/x86/kernel/cpu/perf_event.c    |  Â1
> Âarch/x86/kernel/cpu/perf_event.h    |  Â1
> Âarch/x86/kernel/cpu/perf_event_intel.c | Â 92 ++++++++++++++++++++++-----------
> Â3 files changed, 66 insertions(+), 28 deletions(-)
>
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fa
> Â Â Â Â Â Â Â Âif (!cpuc->shared_regs)
> Â Â Â Â Â Â Â Â Â Â Â Âgoto error;
> Â Â Â Â}
> + Â Â Â cpuc->is_fake = 1;
> Â Â Â Âreturn cpuc;
> Âerror:
> Â Â Â Âfree_fake_cpuc(cpuc);
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -117,6 +117,7 @@ struct cpu_hw_events {
>    Âstruct perf_event    *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
>
>    Âunsigned int      Âgroup_flag;
> +    int           is_fake;
>
> Â Â Â Â/*
> Â Â Â Â * Intel DebugStore bits
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event
> Â Â Â Âreturn NULL;
> Â}
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
> Â{
> Â Â Â Âif (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> - Â Â Â Â Â Â Â return false;
> + Â Â Â Â Â Â Â return idx;
>
> - Â Â Â if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> - Â Â Â Â Â Â Â event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> - Â Â Â Â Â Â Â event->hw.config |= 0x01bb;
> - Â Â Â Â Â Â Â event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
> - Â Â Â Â Â Â Â event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> - Â Â Â } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> + Â Â Â if (idx == EXTRA_REG_RSP_0)
> + Â Â Â Â Â Â Â return EXTRA_REG_RSP_1;
> +
> + Â Â Â if (idx == EXTRA_REG_RSP_1)
> + Â Â Â Â Â Â Â return EXTRA_REG_RSP_0;
> +
> + Â Â Â return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> + Â Â Â event->hw.extra_reg.idx = idx;
> +
> + Â Â Â if (idx == EXTRA_REG_RSP_0) {
> Â Â Â Â Â Â Â Âevent->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> Â Â Â Â Â Â Â Âevent->hw.config |= 0x01b7;
> - Â Â Â Â Â Â Â event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
> Â Â Â Â Â Â Â Âevent->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
> + Â Â Â } else if (idx == EXTRA_REG_RSP_1) {
> + Â Â Â Â Â Â Â event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> + Â Â Â Â Â Â Â event->hw.config |= 0x01bb;
> + Â Â Â Â Â Â Â event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> Â Â Â Â}
> -
> - Â Â Â if (event->hw.extra_reg.idx == orig_idx)
> - Â Â Â Â Â Â Â return false;
> -
> - Â Â Â return true;
> Â}
>
> Â/*
> @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struc
> Â Â Â Âstruct event_constraint *c = &emptyconstraint;
> Â Â Â Âstruct er_account *era;
> Â Â Â Âunsigned long flags;
> - Â Â Â int orig_idx = reg->idx;
> + Â Â Â int idx = reg->idx;
>
> - Â Â Â /* already allocated shared msr */
> - Â Â Â if (reg->alloc)
> + Â Â Â /*
> + Â Â Â Â* reg->alloc can be set due to existing state, so for fake cpuc we
> + Â Â Â Â* need to ignore this, otherwise we might fail to allocate proper fake
> + Â Â Â Â* state for this extra reg constraint. Also see the comment below.
> + Â Â Â Â*/
> + Â Â Â if (reg->alloc && !cpuc->is_fake)
> Â Â Â Â Â Â Â Âreturn NULL; /* call x86_get_event_constraint() */
>
> Âagain:
> - Â Â Â era = &cpuc->shared_regs->regs[reg->idx];
> + Â Â Â era = &cpuc->shared_regs->regs[idx];
> Â Â Â Â/*
> Â Â Â Â * we use spin_lock_irqsave() to avoid lockdep issues when
> Â Â Â Â * passing a fake cpuc
> @@ -1173,6 +1183,29 @@ __intel_shared_reg_get_constraints(struc
>
> Â Â Â Âif (!atomic_read(&era->ref) || era->config == reg->config) {
>
> + Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â* If its a fake cpuc -- as per validate_{group,event}() we
> + Â Â Â Â Â Â Â Â* shouldn't touch event state and we can avoid doing so
> + Â Â Â Â Â Â Â Â* since both will only call get_event_constraints() once
> + Â Â Â Â Â Â Â Â* on each event, this avoids the need for reg->alloc.
> + Â Â Â Â Â Â Â Â*
> + Â Â Â Â Â Â Â Â* Not doing the ER fixup will only result in era->reg being
> + Â Â Â Â Â Â Â Â* wrong, but since we won't actually try and program hardware
> + Â Â Â Â Â Â Â Â* this isn't a problem either.
> + Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â if (!cpuc->is_fake) {
> + Â Â Â Â Â Â Â Â Â Â Â if (idx != reg->idx)
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â intel_fixup_er(event, idx);
> +
> + Â Â Â Â Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â Â Â Â Â* x86_schedule_events() can call get_event_constraints()
> + Â Â Â Â Â Â Â Â Â Â Â Â* multiple times on events in the case of incremental
> + Â Â Â Â Â Â Â Â Â Â Â Â* scheduling(). reg->alloc ensures we only do the ER
> + Â Â Â Â Â Â Â Â Â Â Â Â* allocation once.
> + Â Â Â Â Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â Â Â Â Â reg->alloc = 1;
> + Â Â Â Â Â Â Â }
> +
> Â Â Â Â Â Â Â Â/* lock in msr value */
> Â Â Â Â Â Â Â Âera->config = reg->config;
> Â Â Â Â Â Â Â Âera->reg = reg->reg;
> @@ -1180,17 +1213,17 @@ __intel_shared_reg_get_constraints(struc
> Â Â Â Â Â Â Â Â/* one more user */
> Â Â Â Â Â Â Â Âatomic_inc(&era->ref);
>
> - Â Â Â Â Â Â Â /* no need to reallocate during incremental event scheduling */
> - Â Â Â Â Â Â Â reg->alloc = 1;
> -
> Â Â Â Â Â Â Â Â/*
> Â Â Â Â Â Â Â Â * need to call x86_get_event_constraint()
> Â Â Â Â Â Â Â Â * to check if associated event has constraints
> Â Â Â Â Â Â Â Â */
> Â Â Â Â Â Â Â Âc = NULL;
> - Â Â Â } else if (intel_try_alt_er(event, orig_idx)) {
> - Â Â Â Â Â Â Â raw_spin_unlock_irqrestore(&era->lock, flags);
> - Â Â Â Â Â Â Â goto again;
> + Â Â Â } else {
> + Â Â Â Â Â Â Â idx = intel_alt_er(idx);
> + Â Â Â Â Â Â Â if (idx != reg->idx) {
> + Â Â Â Â Â Â Â Â Â Â Â raw_spin_unlock_irqrestore(&era->lock, flags);
> + Â Â Â Â Â Â Â Â Â Â Â goto again;
> + Â Â Â Â Â Â Â }
> Â Â Â Â}
> Â Â Â Âraw_spin_unlock_irqrestore(&era->lock, flags);
>
> @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struc
> Â Â Â Âstruct er_account *era;
>
> Â Â Â Â/*
> - Â Â Â Â* only put constraint if extra reg was actually
> - Â Â Â Â* allocated. Also takes care of event which do
> - Â Â Â Â* not use an extra shared reg
> + Â Â Â Â* Only put constraint if extra reg was actually allocated. Also takes
> + Â Â Â Â* care of event which do not use an extra shared reg.
> + Â Â Â Â*
> + Â Â Â Â* Also, if this is a fake cpuc we shouldn't touch any event state
> + Â Â Â Â* (reg->alloc) and we don't care about leaving inconsistent cpuc state
> + Â Â Â Â* either since it'll be thrown out.
> Â Â Â Â */
> - Â Â Â if (!reg->alloc)
> + Â Â Â if (!reg->alloc || cpuc->is_fake)
> Â Â Â Â Â Â Â Âreturn;
>
> Â Â Â Âera = &cpuc->shared_regs->regs[reg->idx];
>
¢éì®&Þ~º&¶¬+-±éÝ¥w®Ë±Êâmébìdz¹Þ)í
æèw*jg¬±¨¶Ýj/êäz¹Þà2Þ¨èÚ&¢)ß«a¶Úþø®G«éh®æj:+v¨wèÙ>W±êÞiÛaxPjØm¶ÿÃ-»+ùd_