[PATCH V2 9/9] edac, mce_amd_inj: Inject errors on NBC for bank 4 errors
From: Aravind Gopalakrishnan
Date: Tue Jun 02 2015 - 16:27:46 EST
For bank 4 errors, MCE is logged and reported only on
node base cores. Refer D18F3x44[NbMcaToMstCpuEn] field in
Fam10h and later BKDGs.
This patch ensures that we inject the error on the node base core
for bank 4 errors. Otherwise, triggering #MC or apic interrupts on
a non node base core would not have any effect on the system.
(i.e), we would not see any relevant output on kernel logs for
the error we just injected.
Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
---
drivers/edac/mce_amd_inj.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index b7e108c..45aac4f 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -17,9 +17,12 @@
#include <linux/cpu.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/pci.h>
#include <asm/mce.h>
+#include <asm/amd_nb.h>
#include "mce_amd.h"
+#include "amd64_edac.h"
/*
* Collect all the MCi_XXX settings
@@ -200,6 +203,44 @@ static void trigger_thr_int(void *info)
asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
}
+static u32 amd_get_nbc_for_node(int node_id)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u32 cores_per_node;
+
+ cores_per_node = c->x86_max_cores / amd_get_nodes_cnt();
+
+ return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ u32 val;
+ int err;
+
+ if (!F3)
+ return;
+
+ err = pci_read_config_dword(F3, NBCFG, &val);
+ if (err) {
+ pr_err("%s: Error reading F%dx%03x.\n", __func__,
+ PCI_FUNC(F3->devfn),
+ NBCFG);
+ return;
+ }
+
+ if (!(val & BIT(27))) {
+ pr_err("%s: BIOS not setting D18F3x44[NbMcaToMstCpuEn]. Doing that here\n", __func__);
+ val |= BIT(27);
+ err = pci_write_config_dword(F3, NBCFG, val);
+ if (err)
+ pr_err("%s: Error writing F%dx%03x.\n", __func__,
+ PCI_FUNC(F3->devfn),
+ NBCFG);
+ }
+}
+
static void do_inject(void)
{
u64 mcg_status = 0;
@@ -235,6 +276,20 @@ static void do_inject(void)
if (!(i_mce.status & MCI_STATUS_PCC))
mcg_status |= MCG_STATUS_RIPV;
+ /*
+ * For multi node cpus, logging and reporting of bank == 4 errors
+ * happen only on the node base core. Refer D18F3x44[NbMcaToMstCpuEn]
+ * for Fam10h and later BKDGs
+ */
+ if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+ /*
+ * BIOS sets D18F3x44[NbMcaToMstCpuEn] by default.
+ * But make sure of it here just in case..
+ */
+ toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+ cpu = amd_get_nbc_for_node(amd_get_nb_id(cpu));
+ }
+
toggle_hw_mce_inject(cpu, true);
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
--
2.4.0
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/