Re: [PATCH v3] x86/cpu/amd: Enable the fixed Instructions Retired counter IRPERF

From: Borislav Petkov
Date: Tue Feb 18 2020 - 06:20:42 EST


On Fri, Feb 14, 2020 at 02:18:05PM -0600, Kim Phillips wrote:
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index f3327cb56edf..8979d6fcc79c 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -404,5 +404,6 @@
> #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
> #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
> #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
> +#define X86_BUG_IRPERF X86_BUG(24) /* CPU is affected by Instructions Retired counter Erratum 1054 */

Do you need this bug flag at all?

If the only reason for its existence is to check it before setting
the MSR bit enabling IRPERF, then you don't need it. Or is there any
particular reason why it should show in /proc/cpuinfo?

IOW, does this work too?

---
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8821697a7549..12c9684d59ba 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -521,6 +521,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ac83a0fef628..1f875fbe1384 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -28,6 +28,7 @@

static const int amd_erratum_383[];
static const int amd_erratum_400[];
+static const int amd_erratum_1054[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);

/*
@@ -972,6 +973,15 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ /*
+ * Turn on the Instructions Retired free counter on machines not
+ * susceptible to erratum #1054 "Instructions Retired Performance
+ * Counter May Be Inaccurate".
+ */
+ if (cpu_has(c, X86_FEATURE_IRPERF) &&
+ !cpu_has_amd_erratum(c, amd_erratum_1054))
+ msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
}

#ifdef CONFIG_X86_32
@@ -1099,6 +1109,10 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));

+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+

static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette