[PATCH 5/7] EDAC, mce_amd: Convert to seq_buf

From: Borislav Petkov
Date: Fri Aug 25 2017 - 06:24:59 EST


From: Borislav Petkov <bp@xxxxxxx>

Convert the part which decodes the error description to the sequence
buffer facility and thus save ourselves the many printk() invocations
building the decoded string.

Also, use a genpool for the string buffers to handle concurrent
invocations (and atomic context).

Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
drivers/edac/mce_amd.c | 215 +++++++++++++++++++++++++++++--------------------
1 file changed, 127 insertions(+), 88 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671c7a38..b7c1f8f7e871 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1,3 +1,5 @@
+#include <linux/genalloc.h>
+#include <linux/seq_buf.h>
#include <linux/module.h>
#include <linux/slab.h>

@@ -34,6 +36,16 @@ void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
}
EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);

+/* 128 because, well, nice and round - two cachelines. */
+#define ELEM_ORDER 7
+#define ELEM_SIZE (1 << 7)
+#define DEC_POOL_SIZE (2 * PAGE_SIZE)
+
+static char __err_buf[DEC_POOL_SIZE];
+static struct gen_pool *dec_pool;
+
+static struct seq_buf sb;
+
/*
* string representation for the different MCA reported error types, see F3x48
* or MSR0000_0411.
@@ -315,9 +327,9 @@ static bool f12h_mc0_mce(u16 ec, u8 xec)
ret = true;

if (ll == LL_L2)
- pr_cont("during L1 linefill from L2.\n");
+ seq_buf_printf(&sb, "during L1 linefill from L2.\n");
else if (ll == LL_L1)
- pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
+ seq_buf_printf(&sb, "Data/Tag %s error.\n", R4_MSG(ec));
else
ret = false;
}
@@ -327,7 +339,7 @@ static bool f12h_mc0_mce(u16 ec, u8 xec)
static bool f10h_mc0_mce(u16 ec, u8 xec)
{
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
- pr_cont("during data scrub.\n");
+ seq_buf_printf(&sb, "during data scrub.\n");
return true;
}
return f12h_mc0_mce(ec, xec);
@@ -336,7 +348,7 @@ static bool f10h_mc0_mce(u16 ec, u8 xec)
static bool k8_mc0_mce(u16 ec, u8 xec)
{
if (BUS_ERROR(ec)) {
- pr_cont("during system linefill.\n");
+ seq_buf_printf(&sb, "during system linefill.\n");
return true;
}

@@ -356,14 +368,14 @@ static bool cat_mc0_mce(u16 ec, u8 xec)
switch (r4) {
case R4_DRD:
case R4_DWR:
- pr_cont("Data/Tag parity error due to %s.\n",
- (r4 == R4_DRD ? "load/hw prf" : "store"));
+ seq_buf_printf(&sb, "Data/Tag parity error due to %s.\n",
+ (r4 == R4_DRD ? "load/hw prf" : "store"));
break;
case R4_EVICT:
- pr_cont("Copyback parity error on a tag miss.\n");
+ seq_buf_printf(&sb, "Copyback parity error on a tag miss.\n");
break;
case R4_SNOOP:
- pr_cont("Tag parity error during snoop.\n");
+ seq_buf_printf(&sb, "Tag parity error during snoop.\n");
break;
default:
ret = false;
@@ -373,17 +385,17 @@ static bool cat_mc0_mce(u16 ec, u8 xec)
if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
return false;

- pr_cont("System read data error on a ");
+ seq_buf_printf(&sb, "System read data error on a ");

switch (r4) {
case R4_RD:
- pr_cont("TLB reload.\n");
+ seq_buf_printf(&sb, "TLB reload.\n");
break;
case R4_DWR:
- pr_cont("store.\n");
+ seq_buf_printf(&sb, "store.\n");
break;
case R4_DRD:
- pr_cont("load.\n");
+ seq_buf_printf(&sb, "load.\n");
break;
default:
ret = false;
@@ -403,28 +415,28 @@ static bool f15h_mc0_mce(u16 ec, u8 xec)

switch (xec) {
case 0x0:
- pr_cont("Data Array access error.\n");
+ seq_buf_printf(&sb, "Data Array access error.\n");
break;

case 0x1:
- pr_cont("UC error during a linefill from L2/NB.\n");
+ seq_buf_printf(&sb, "UC error during a linefill from L2/NB.\n");
break;

case 0x2:
case 0x11:
- pr_cont("STQ access error.\n");
+ seq_buf_printf(&sb, "STQ access error.\n");
break;

case 0x3:
- pr_cont("SCB access error.\n");
+ seq_buf_printf(&sb, "SCB access error.\n");
break;

case 0x10:
- pr_cont("Tag error.\n");
+ seq_buf_printf(&sb, "Tag error.\n");
break;

case 0x12:
- pr_cont("LDQ access error.\n");
+ seq_buf_printf(&sb, "LDQ access error.\n");
break;

default:
@@ -433,12 +445,12 @@ static bool f15h_mc0_mce(u16 ec, u8 xec)
} else if (BUS_ERROR(ec)) {

if (!xec)
- pr_cont("System Read Data Error.\n");
+ seq_buf_printf(&sb, "System Read Data Error.\n");
else
- pr_cont(" Internal error condition type %d.\n", xec);
+ seq_buf_printf(&sb, " Internal error condition type %d.\n", xec);
} else if (INT_ERROR(ec)) {
if (xec <= 0x1f)
- pr_cont("Hardware Assert.\n");
+ seq_buf_printf(&sb, "Hardware Assert.\n");
else
ret = false;

@@ -453,13 +465,13 @@ static void decode_mc0_mce(struct mce *m)
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);

- pr_emerg(HW_ERR "MC0 Error: ");
+ seq_buf_printf(&sb, HW_ERR "MC0 Error: ");

/* TLB error signatures are the same across families */
if (TLB_ERROR(ec)) {
if (TT(ec) == TT_DATA) {
- pr_cont("%s TLB %s.\n", LL_MSG(ec),
- ((xec == 2) ? "locked miss"
+ seq_buf_printf(&sb, "%s TLB %s.\n", LL_MSG(ec),
+ ((xec == 2) ? "locked miss"
: (xec ? "multimatch" : "parity")));
return;
}
@@ -478,19 +490,19 @@ static bool k8_mc1_mce(u16 ec, u8 xec)
return false;

if (ll == 0x2)
- pr_cont("during a linefill from L2.\n");
+ seq_buf_printf(&sb, "during a linefill from L2.\n");
else if (ll == 0x1) {
switch (R4(ec)) {
case R4_IRD:
- pr_cont("Parity error during data load.\n");
+ seq_buf_printf(&sb, "Parity error during data load.\n");
break;

case R4_EVICT:
- pr_cont("Copyback Parity/Victim error.\n");
+ seq_buf_printf(&sb, "Copyback Parity/Victim error.\n");
break;

case R4_SNOOP:
- pr_cont("Tag Snoop error.\n");
+ seq_buf_printf(&sb, "Tag Snoop error.\n");
break;

default:
@@ -515,13 +527,13 @@ static bool cat_mc1_mce(u16 ec, u8 xec)
return false;

if (r4 == R4_IRD)
- pr_cont("Data/tag array parity error for a tag hit.\n");
+ seq_buf_printf(&sb, "Data/tag array parity error for a tag hit.\n");
else if (r4 == R4_SNOOP)
- pr_cont("Tag error during snoop/victimization.\n");
+ seq_buf_printf(&sb, "Tag error during snoop/victimization.\n");
else if (xec == 0x0)
- pr_cont("Tag parity error from victim castout.\n");
+ seq_buf_printf(&sb, "Tag parity error from victim castout.\n");
else if (xec == 0x2)
- pr_cont("Microcode patch RAM parity error.\n");
+ seq_buf_printf(&sb, "Microcode patch RAM parity error.\n");
else
ret = false;

@@ -537,19 +549,19 @@ static bool f15h_mc1_mce(u16 ec, u8 xec)

switch (xec) {
case 0x0 ... 0xa:
- pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
+ seq_buf_printf(&sb, "%s.\n", f15h_mc1_mce_desc[xec]);
break;

case 0xd:
- pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
+ seq_buf_printf(&sb, "%s.\n", f15h_mc1_mce_desc[xec-2]);
break;

case 0x10:
- pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
+ seq_buf_printf(&sb, "%s.\n", f15h_mc1_mce_desc[xec-4]);
break;

case 0x11 ... 0x15:
- pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
+ seq_buf_printf(&sb, "Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
break;

default:
@@ -563,18 +575,18 @@ static void decode_mc1_mce(struct mce *m)
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);

- pr_emerg(HW_ERR "MC1 Error: ");
+ seq_buf_printf(&sb, HW_ERR "MC1 Error: ");

if (TLB_ERROR(ec))
- pr_cont("%s TLB %s.\n", LL_MSG(ec),
- (xec ? "multimatch" : "parity error"));
+ seq_buf_printf(&sb, "%s TLB %s.\n", LL_MSG(ec),
+ (xec ? "multimatch" : "parity error"));
else if (BUS_ERROR(ec)) {
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));

- pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
+ seq_buf_printf(&sb, "during %s.\n", (k8 ? "system linefill" : "NB data read"));
} else if (INT_ERROR(ec)) {
if (xec <= 0x3f)
- pr_cont("Hardware Assert.\n");
+ seq_buf_printf(&sb, "Hardware Assert.\n");
else
goto wrong_mc1_mce;
} else if (fam_ops->mc1_mce(ec, xec))
@@ -593,27 +605,27 @@ static bool k8_mc2_mce(u16 ec, u8 xec)
bool ret = true;

if (xec == 0x1)
- pr_cont(" in the write data buffers.\n");
+ seq_buf_printf(&sb, " in the write data buffers.\n");
else if (xec == 0x3)
- pr_cont(" in the victim data buffers.\n");
+ seq_buf_printf(&sb, " in the victim data buffers.\n");
else if (xec == 0x2 && MEM_ERROR(ec))
- pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
+ seq_buf_printf(&sb, ": %s error in the L2 cache tags.\n", R4_MSG(ec));
else if (xec == 0x0) {
if (TLB_ERROR(ec))
- pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
- TT_MSG(ec));
+ seq_buf_printf(&sb, "%s error in a Page Descriptor Cache or Guest TLB.\n",
+ TT_MSG(ec));
else if (BUS_ERROR(ec))
- pr_cont(": %s/ECC error in data read from NB: %s.\n",
- R4_MSG(ec), PP_MSG(ec));
+ seq_buf_printf(&sb, ": %s/ECC error in data read from NB: %s.\n",
+ R4_MSG(ec), PP_MSG(ec));
else if (MEM_ERROR(ec)) {
u8 r4 = R4(ec);

if (r4 >= 0x7)
- pr_cont(": %s error during data copyback.\n",
- R4_MSG(ec));
+ seq_buf_printf(&sb, ": %s error during data copyback.\n",
+ R4_MSG(ec));
else if (r4 <= 0x1)
- pr_cont(": %s parity/ECC error during data "
- "access from L2.\n", R4_MSG(ec));
+ seq_buf_printf(&sb,
+": %s parity/ECC error during data access from L2.\n", R4_MSG(ec));
else
ret = false;
} else
@@ -630,24 +642,24 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)

if (TLB_ERROR(ec)) {
if (xec == 0x0)
- pr_cont("Data parity TLB read error.\n");
+ seq_buf_printf(&sb, "Data parity TLB read error.\n");
else if (xec == 0x1)
- pr_cont("Poison data provided for TLB fill.\n");
+ seq_buf_printf(&sb, "Poison data provided for TLB fill.\n");
else
ret = false;
} else if (BUS_ERROR(ec)) {
if (xec > 2)
ret = false;

- pr_cont("Error during attempted NB data read.\n");
+ seq_buf_printf(&sb, "Error during attempted NB data read.\n");
} else if (MEM_ERROR(ec)) {
switch (xec) {
case 0x4 ... 0xc:
- pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
+ seq_buf_printf(&sb, "%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
break;

case 0x10 ... 0x14:
- pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
+ seq_buf_printf(&sb, "%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
break;

default:
@@ -655,7 +667,7 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)
}
} else if (INT_ERROR(ec)) {
if (xec <= 0x3f)
- pr_cont("Hardware Assert.\n");
+ seq_buf_printf(&sb, "Hardware Assert.\n");
else
ret = false;
}
@@ -672,29 +684,29 @@ static bool f16h_mc2_mce(u16 ec, u8 xec)

switch (xec) {
case 0x04 ... 0x05:
- pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
+ seq_buf_printf(&sb, "%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
break;

case 0x09 ... 0x0b:
case 0x0d ... 0x0f:
- pr_cont("ECC error in L2 tag (%s).\n",
- ((r4 == R4_GEN) ? "BankReq" :
- ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
+ seq_buf_printf(&sb, "ECC error in L2 tag (%s).\n",
+ ((r4 == R4_GEN) ? "BankReq" :
+ ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
break;

case 0x10 ... 0x19:
case 0x1b:
- pr_cont("ECC error in L2 data array (%s).\n",
- (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
- ((r4 == R4_GEN) ? "Attr" :
- ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
+ seq_buf_printf(&sb, "ECC error in L2 data array (%s).\n",
+ (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
+ ((r4 == R4_GEN) ? "Attr" :
+ ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
break;

case 0x1c ... 0x1d:
case 0x1f:
- pr_cont("Parity error in L2 attribute bits (%s).\n",
- ((r4 == R4_RD) ? "Hit" :
- ((r4 == R4_GEN) ? "Attr" : "Fill")));
+ seq_buf_printf(&sb, "Parity error in L2 attribute bits (%s).\n",
+ ((r4 == R4_RD) ? "Hit" :
+ ((r4 == R4_GEN) ? "Attr" : "Fill")));
break;

default:
@@ -709,10 +721,10 @@ static void decode_mc2_mce(struct mce *m)
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);

- pr_emerg(HW_ERR "MC2 Error: ");
+ seq_buf_printf(&sb, HW_ERR "MC2 Error: ");

if (!fam_ops->mc2_mce(ec, xec))
- pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
+ pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
}

static void decode_mc3_mce(struct mce *m)
@@ -726,7 +738,7 @@ static void decode_mc3_mce(struct mce *m)
return;
}

- pr_emerg(HW_ERR "MC3 Error");
+ seq_buf_printf(&sb, HW_ERR "MC3 Error");

if (xec == 0x0) {
u8 r4 = R4(ec);
@@ -734,7 +746,7 @@ static void decode_mc3_mce(struct mce *m)
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
goto wrong_mc3_mce;

- pr_cont(" during %s.\n", R4_MSG(ec));
+ seq_buf_printf(&sb, " during %s.\n", R4_MSG(ec));
} else
goto wrong_mc3_mce;

@@ -752,7 +764,7 @@ static void decode_mc4_mce(struct mce *m)
u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;

- pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
+ seq_buf_printf(&sb, HW_ERR "MC4 Error (node %d): ", node_id);

switch (xec) {
case 0x0 ... 0xe:
@@ -763,7 +775,7 @@ static void decode_mc4_mce(struct mce *m)
if (fam == 0x11)
goto wrong_mc4_mce;

- pr_cont("%s.\n", mc4_mce_desc[xec]);
+ seq_buf_printf(&sb, "%s.\n", mc4_mce_desc[xec]);

if (decode_dram_ecc)
decode_dram_ecc(node_id, m);
@@ -773,16 +785,16 @@ static void decode_mc4_mce(struct mce *m)

case 0xf:
if (TLB_ERROR(ec))
- pr_cont("GART Table Walk data error.\n");
+ seq_buf_printf(&sb, "GART Table Walk data error.\n");
else if (BUS_ERROR(ec))
- pr_cont("DMA Exclusion Vector Table Walk error.\n");
+ seq_buf_printf(&sb, "DMA Exclusion Vector Table Walk error.\n");
else
goto wrong_mc4_mce;
return;

case 0x19:
if (fam == 0x15 || fam == 0x16)
- pr_cont("Compute Unit Data Error.\n");
+ seq_buf_printf(&sb, "Compute Unit Data Error.\n");
else
goto wrong_mc4_mce;
return;
@@ -795,7 +807,7 @@ static void decode_mc4_mce(struct mce *m)
goto wrong_mc4_mce;
}

- pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
+ seq_buf_printf(&sb, "%s.\n", mc4_mce_desc[xec - offset]);
return;

wrong_mc4_mce:
@@ -811,20 +823,20 @@ static void decode_mc5_mce(struct mce *m)
if (fam == 0xf || fam == 0x11)
goto wrong_mc5_mce;

- pr_emerg(HW_ERR "MC5 Error: ");
+ seq_buf_printf(&sb, HW_ERR "MC5 Error: ");

if (INT_ERROR(ec)) {
if (xec <= 0x1f) {
- pr_cont("Hardware Assert.\n");
+ seq_buf_printf(&sb, "Hardware Assert.\n");
return;
} else
goto wrong_mc5_mce;
}

if (xec == 0x0 || xec == 0xc)
- pr_cont("%s.\n", mc5_mce_desc[xec]);
+ seq_buf_printf(&sb, "%s.\n", mc5_mce_desc[xec]);
else if (xec <= 0xd)
- pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
+ seq_buf_printf(&sb, "%s parity error.\n", mc5_mce_desc[xec]);
else
goto wrong_mc5_mce;

@@ -838,12 +850,12 @@ static void decode_mc6_mce(struct mce *m)
{
u8 xec = XEC(m->status, xec_mask);

- pr_emerg(HW_ERR "MC6 Error: ");
+ seq_buf_printf(&sb, HW_ERR "MC6 Error: ");

if (xec > 0x5)
goto wrong_mc6_mce;

- pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
+ seq_buf_printf(&sb, "%s parity error.\n", mc6_mce_desc[xec]);
return;

wrong_mc6_mce:
@@ -871,13 +883,13 @@ static void decode_smca_error(struct mce *m)
bank_type = hwid->bank_type;
ip_name = smca_get_long_name(bank_type);

- pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+ seq_buf_printf(&sb, HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);

/* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs &&
(hwid->xec_bitmap & BIT_ULL(xec))) {
- pr_emerg(HW_ERR "%s Error: ", ip_name);
- pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+ seq_buf_printf(&sb, HW_ERR "%s Error: ", ip_name);
+ seq_buf_printf(&sb, "%s.\n", smca_mce_descs[bank_type].descs[xec]);
}

if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
@@ -944,10 +956,21 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
struct mce *m = (struct mce *)data;
unsigned int fam = x86_family(m->cpuid);
int ecc;
+ char *dec_buf;

if (amd_filter_mce(m))
return NOTIFY_STOP;

+ dec_buf = (void *)gen_pool_alloc(dec_pool, ELEM_SIZE);
+ if (!dec_buf) {
+ pr_warn("Decode buffer full!\n");
+ return NOTIFY_STOP;
+ }
+
+ /* \0 terminated */
+ seq_buf_init(&sb, dec_buf, ELEM_SIZE);
+ seq_buf_clear_buf(&sb);
+
pr_emerg(HW_ERR "%s\n", decode_error_status(m));

pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
@@ -1044,6 +1067,10 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
err_code:
amd_decode_err_code(m->status & 0xffff);

+ pr_emerg("%.*s\n", (int)sb.len, sb.buffer);
+
+ gen_pool_free(dec_pool, (unsigned long)dec_buf, ELEM_SIZE);
+
return NOTIFY_STOP;
}

@@ -1055,6 +1082,7 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
+ int ret;

if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
@@ -1122,6 +1150,16 @@ static int __init mce_amd_init(void)
goto err_out;
}

+ dec_pool = gen_pool_create(ELEM_ORDER, -1);
+ if (!dec_pool)
+ goto err_out;
+
+ ret = gen_pool_add(dec_pool, (unsigned long)__err_buf, DEC_POOL_SIZE, -1);
+ if (ret) {
+ gen_pool_destroy(dec_pool);
+ goto err_out;
+ }
+
pr_info("MCE: In-kernel MCE decoding enabled.\n");

mce_register_decode_chain(&amd_mce_dec_nb);
@@ -1140,6 +1178,7 @@ static void __exit mce_amd_exit(void)
{
mce_unregister_decode_chain(&amd_mce_dec_nb);
kfree(fam_ops);
+ gen_pool_destroy(dec_pool);
}

MODULE_DESCRIPTION("AMD MCE decoder");
--
2.13.0