[PATCH 1/3] EDAC/skx_common: Use driver decoder first

From: Tony Luck
Date: Thu Sep 01 2022 - 15:43:46 EST


From: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>

The performance of driver decoder[1] is better than the performance
of firmware decoder[2], especially on frequent correctable errors.

So use the driver decoder first, fall back to firmware decoder if
the driver decoder is unavailable. Also rename the function pointer
skx_decode to driver_decode (better name to contrast with adxl_decode).

[1] Decode errors by extracting error information from registers of
memory controllers and/or MCA bank registers.

[2] Decode errors by calling ACPI DSM methods.

Co-developed-by: Youquan Song <youquan.song@xxxxxxxxx>
Signed-off-by: Youquan Song <youquan.song@xxxxxxxxx>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
drivers/edac/skx_common.h | 1 +
drivers/edac/skx_base.c | 9 +++++++--
drivers/edac/skx_common.c | 16 +++++++++-------
3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 03ac067a80b9..880ecd15ca42 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -136,6 +136,7 @@ struct decoded_addr {
int column;
int bank_address;
int bank_group;
+ bool decoded_by_adxl;
};

struct res_config {
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 1abc020d49ab..7e2762f62eec 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -714,8 +714,13 @@ static int __init skx_init(void)

skx_set_decode(skx_decode, skx_show_retry_rd_err_log);

- if (nvdimm_count && skx_adxl_get() == -ENODEV)
- skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
+ if (nvdimm_count && skx_adxl_get() != -ENODEV) {
+ skx_set_decode(NULL, skx_show_retry_rd_err_log);
+ } else {
+ if (nvdimm_count)
+ skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
+ skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
+ }

/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 19c17c5198c5..9b10c359849b 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -40,7 +40,7 @@ static char *adxl_msg;
static unsigned long adxl_nm_bitmap;

static char skx_msg[MSG_SIZE];
-static skx_decode_f skx_decode;
+static skx_decode_f driver_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
@@ -173,6 +173,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
break;
}

+ res->decoded_by_adxl = true;
+
return true;
}

@@ -183,7 +185,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)

void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
- skx_decode = decode;
+ driver_decode = decode;
skx_show_retry_rd_err_log = show_retry_log;
}

@@ -591,7 +593,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break;
}
}
- if (adxl_component_count) {
+ if (res->decoded_by_adxl) {
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
@@ -651,11 +653,11 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
memset(&res, 0, sizeof(res));
res.addr = mce->addr;

- if (adxl_component_count) {
- if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
+ /* Try driver decoder first */
+ if (!(driver_decode && driver_decode(&res))) {
+ /* Then try firmware decoder (ACPI DSM methods) */
+ if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
return NOTIFY_DONE;
- } else if (!skx_decode || !skx_decode(&res)) {
- return NOTIFY_DONE;
}

mci = res.dev->imc[res.imc].mci;
--
2.37.1