[PATCH 09/14] amd64_edac: carve out decoding of MCi_STATUS ErrorCode

From: Borislav Petkov
Date: Mon Jul 20 2009 - 12:13:56 EST


This is the MCE error code from the MCi_STATUS banks, bits [15:0] which
describe what type of error was encountered: GART TLB, Memory or Bus
related. The semantics of those bits are the same across all MCE banks
so decode those separately, irrespectively of MCE type.

Signed-off-by: Borislav Petkov <borislav.petkov@xxxxxxx>
---
drivers/edac/amd64_edac.c | 56 +++++++++++++++++++++++---------------------
1 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index a691bb8..e4a0c91 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2229,9 +2229,6 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
u32 ec = ERROR_CODE(info->nbsl);
u32 xec = EXT_ERROR_CODE(info->nbsl);

- pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, %s\n",
- RRRR(ec), II(ec), TO(ec), LL(ec), PP(ec));
-
/* Bail early out if this was an 'observed' error */
if (((ec >> 9) & 0x3) == K8_NBSL_PP_OBS)
return;
@@ -2260,7 +2257,8 @@ void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs,
int handle_errors, int ecc)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u32 ec, xec;
+ u32 ec = ERROR_CODE(regs->nbsl);
+ u32 xec = EXT_ERROR_CODE(regs->nbsl);

if (!handle_errors)
return;
@@ -2279,9 +2277,22 @@ void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs,
pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf)));
}

- ec = ERROR_CODE(regs->nbsl);
- xec = EXT_ERROR_CODE(regs->nbsl);
+ pr_emerg(" %s.\n", EXT_ERR_DESC(xec));
+
+ if (BUS_ERROR(ec))
+ amd64_decode_bus_error(mci, regs, ecc);

+ /*
+ * Check the UE bit of the NB status high register, if set generate some
+ * logs. If NOT a GART error, then process the event as a NO-INFO event.
+ * If it was a GART error, skip that process.
+ */
+ if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
+ edac_mc_handle_ue_no_info(mci, "UE bit is set");
+}
+
+static inline void amd64_decode_err_code(unsigned int ec)
+{
if (TLB_ERROR(ec)) {
/*
* GART errors are intended to help graphics driver developers
@@ -2298,30 +2309,19 @@ void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs,
if (!report_gart_errors)
return;

- pr_emerg(" GART TLB error, Transaction: %s, Cache Level %s\n",
- TT(ec), LL(ec));
+ pr_emerg(" Transaction: %s, Cache Level %s\n", TT(ec), LL(ec));
} else if (MEM_ERROR(ec)) {
- pr_emerg(" Memory/Cache error, Transaction: %s, Type: %s,"
- " Cache Level: %s",
+ pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s",
RRRR(ec), TT(ec), LL(ec));
} else if (BUS_ERROR(ec)) {
- pr_emerg(" Bus (Link/DRAM) error\n");
- amd64_decode_bus_error(mci, regs, ecc);
+ pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s,"
+ " Participating Processor: %s\n",
+ RRRR(ec), II(ec), TO(ec), LL(ec), PP(ec));
+
} else {
/* shouldn't reach here! */
- amd64_mc_printk(mci, KERN_WARNING,
- "%s(): unknown MCE error 0x%x\n", __func__, ec);
+ pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-
- pr_emerg("%s.\n", EXT_ERR_DESC(xec));
-
- /*
- * Check the UE bit of the NB status high register, if set generate some
- * logs. If NOT a GART error, then process the event as a NO-INFO event.
- * If it was a GART error, skip that process.
- */
- if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
- edac_mc_handle_ue_no_info(mci, "UE bit is set");
}

void decode_mce(struct mce *m)
@@ -2329,13 +2329,13 @@ void decode_mce(struct mce *m)
struct err_regs regs;
int ecc;

- pr_emerg("MC%d_STATUS:\n", m->bank);
+ pr_emerg("MC%d_STATUS: ", m->bank);

- pr_emerg(" Error: %sorrected, Report: %s, MiscV: %svalid, "
+ pr_cont("%sorrected error, report: %s, MiscV: %svalid, "
"CPU context corrupt: %s",
((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
((m->status & MCI_STATUS_EN) ? "yes" : "no"),
- ((m->status & MCI_STATUS_MISCV) ? "" : "In"),
+ ((m->status & MCI_STATUS_MISCV) ? "" : "in"),
((m->status & MCI_STATUS_PCC) ? "yes" : "no"));

/* do the two bits[14:13] together */
@@ -2345,6 +2345,8 @@ void decode_mce(struct mce *m)

pr_cont("\n");

+ amd64_decode_err_code(m->status & 0xffff);
+
if (m->bank == 4) {
regs.nbsl = (u32) m->status;
regs.nbsh = (u32)(m->status >> 32);
--
1.6.3.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/