Re: [PATCH 2/3] perf/x86/intel: fix for MSR_LAST_BRANCH_FROM_x quirk when no TSX
From: Peter Zijlstra
Date: Thu Jun 02 2016 - 04:54:06 EST
On Wed, Jun 01, 2016 at 07:42:02PM -0700, David Carrillo-Cisneros wrote:
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index 2dca66c..6aa2d8a 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -80,6 +80,7 @@ static enum {
> #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
> #define LBR_FROM_FLAG_IN_TX (1ULL << 62)
> #define LBR_FROM_FLAG_ABORT (1ULL << 61)
> +#define LBR_FROM_SIGNEXT_MSB (1ULL << 60)
>
> /*
> * x86control flow change classification
> @@ -235,6 +236,62 @@ enum {
> LBR_VALID,
> };
>
> +/*
> + * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
> + * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported.
> + * When TSX support is disabled the behavior differs as follows:
> + * - For wrmsr, bits 61:62 are considered part of the sign-extension.
> + * - HW updates to the MSR (no through wrmsr) will clear bits 61:62,
> + * regardless of the sign of bit at position 47, i.e. bit 61:62 are not part
> + * of the sign-extension.
> + *
> + * Therefore, if the conditions:
> + * 1) LBR has TSX format.
> + * 2) CPU has no TSX support enabled.
> + * 3) data in MSR (bits 0:48) is negative.
I take issue with 3; sign extension has nothing to do with being
negative or not.
> + * are all true, then any value passed to wrmsr must be signed-extended to
> + * 63 bits and any value from rdmsr must be converted to 61 bits, ignoring
> + * the TSX flags.
> + */
> +
> +static inline bool lbr_from_signext_quirk_on(void)
This function name is bad; it doesn't return if the quirk is on at all.
It detects if the quirk is relevant and should be turned on, look at the
bloody usage site.
> +{
> + int lbr_format = x86_pmu.intel_cap.lbr_format;
> + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
> + boot_cpu_has(X86_FEATURE_RTM);
> +
> + return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
> +}
> +
> +DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
> +
> +static inline bool lbr_from_signext_quirk_test(u64 val)
> +{
> + return static_branch_unlikely(&lbr_from_quirk_key) &&
> + (val & LBR_FROM_SIGNEXT_MSB);
> +}
No, that's just wrong.
> +
> +/*
> + * If quirk is needed, do sign extension to 63 bits.
> + */
> +inline u64 lbr_from_signext_quirk_wr(u64 val)
> +{
> + if (lbr_from_signext_quirk_test(val))
> + val |= (LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
That too, that's horrible. You don't set IN_TX and ABORT.
What's wrong with:
if (static_branch_unlikely(&lbr_from_quirk)) {
val <<= 2;
(s64)val >>= 2;
}
That actually sign extends and without branches.
> + return val;
> +}
> +
> +/*
> + * If quirk is needed, ensure sign extension is 61 bits.
You cannot extend shorter. Extend means to make longer/more.
You can truncate sign bits, or just say you need to clear the 2 MSBs.
> + */
> +
> +u64 lbr_from_signext_quirk_rd(u64 val)
> +{
> + if (lbr_from_signext_quirk_test(val))
Again, no point in wasting an extra branch, just clear the bits
unconditional:
if (static_branch_unlikely(&lbr_from_quirk))
> + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
> + return val;
> +}
> +
> static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
> {
> int i;