[PATCH] x86/MCE: Remove MCP_TIMESTAMP

From: Borislav Petkov
Date: Sat Nov 05 2016 - 09:12:49 EST


Whoops,

one more:

---
From: Borislav Petkov <bp@xxxxxxx>
Date: Sat, 5 Nov 2016 12:47:03 +0100
Subject: [PATCH] x86/MCE: Remove MCP_TIMESTAMP

MCP_TIMESTAMP controls whether current TSC value should be added to
the MCE record. Most of machine_check_poll() callers supply it, except
__mcheck_cpu_init_generic() but this is wrong because we could be
logging an MCE right at the same time and thus log one without the TSC
value.

What is more, machine_check_poll() did unconditionally clear mce.tsc
which is another bug.

So, get rid of all that and simply log an MCE with a TSC value always.
Simplifies the code a bit too.

Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/mce.h | 5 ++---
arch/x86/kernel/cpu/mcheck/mce.c | 6 +-----
arch/x86/kernel/cpu/mcheck/mce_intel.c | 6 +++---
3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 748b8da8e627..3b53c260e0be 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -265,9 +265,8 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);

enum mcp_flags {
- MCP_TIMESTAMP = BIT(0), /* log time stamp */
- MCP_UC = BIT(1), /* log uncorrected errors */
- MCP_DONTLOG = BIT(2), /* only clear, don't log */
+ MCP_UC = BIT(0), /* log uncorrected errors */
+ MCP_DONTLOG = BIT(1), /* only clear, don't log */
};
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4ca00474804b..82564156d6ab 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -713,7 +713,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.misc = 0;
m.addr = 0;
m.bank = i;
- m.tsc = 0;

barrier();
m.status = mce_rdmsrl(msr_ops.status(i));
@@ -735,9 +734,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)

mce_read_aux(&m, i);

- if (!(flags & MCP_TIMESTAMP))
- m.tsc = 0;
-
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);

if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
@@ -1394,7 +1390,7 @@ static void mce_timer_fn(unsigned long data)
iv = __this_cpu_read(mce_next_interval);

if (mce_available(this_cpu_ptr(&cpu_info))) {
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+ machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));

if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1defb8ea882c..3262f0d726bb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ bool mce_intel_cmci_poll(void)
* Reset the counter if we've logged an error in the last poll
* during the storm.
*/
- if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
+ if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else
this_cpu_dec(cmci_backoff_cnt);
@@ -250,7 +250,7 @@ static void intel_threshold_interrupt(void)
if (cmci_storm_detect())
return;

- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+ machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
}

/*
@@ -342,7 +342,7 @@ void cmci_recheck(void)
return;

local_irq_save(flags);
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+ machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags);
}

--
2.10.0

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.