[PATCH 2/5] EDAC/amd64: Support more than two UMCs

From: Ghannam, Yazen
Date: Tue Feb 19 2019 - 15:26:23 EST


From: Yazen Ghannam <yazen.ghannam@xxxxxxx>

The first few models of Family 17h all had 2 UMCs per Die, so we treated
this as a fixed value. However, future systems may have more UMCs per
Die.

Related to this, we were finding the channel number and base address of
a UMC by matching on fixed, known values. However, a pattern has emerged
so we no longer need to match on hardcoded values.

Set the number of UMCs at init time based on the Family/Model. Also,
update the functions that find the channel number and base address of a
UMC in order to support more than two UMCs.

Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
drivers/edac/amd64_edac.c | 42 ++++++++++++++++++---------------------
drivers/edac/amd64_edac.h | 20 +++++++++++++++----
2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9947437d9574..507d824fe45a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
#define for_each_chip_select_mask(i, dct, pvt) \
for (i = 0; i < pvt->csels[dct].m_cnt; i++)

+#define for_each_umc(i) \
+ for (i = 0; i < num_umcs; i++)
+
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
* csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;

- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
continue;

@@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
struct amd64_umc *umc;
u32 i, tmp, umc_base;

- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];

@@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
int i, channels = 0;

/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
- for (i = 0; i < NUM_UMCS; i++)
+ for_each_umc(i)
channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);

amd64_info("MCT channel count: %d\n", channels);
@@ -2473,18 +2476,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* To find the UMC channel represented by this bank we need to match on its
* instance_id. The instance_id of a bank is held in the lower 32 bits of its
* IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th byte in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
*/
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
{
- u32 umc_instance_id[] = {0x50f00, 0x150f00};
- u32 instance_id = m->ipid & GENMASK(31, 0);
- int i, channel = -1;
-
- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
- if (umc_instance_id[i] == instance_id)
- channel = i;
-
- return channel;
+ return (m->ipid & GENMASK(31, 0)) >> 20;
}

static void decode_umc_error(int node_id, struct mce *m)
@@ -2506,11 +2505,7 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;

- err.channel = find_umc_channel(pvt, m);
- if (err.channel < 0) {
- err.err_code = ERR_CHANNEL;
- goto log_error;
- }
+ err.channel = find_umc_channel(m);

if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
@@ -2612,7 +2607,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i;

- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
/* Check enabled channels only: */
if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
(pvt->umc[i].ecc_ctrl & BIT(7))) {
@@ -2648,7 +2643,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
u32 i, umc_base;

/* Read registers from each UMC */
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {

umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -3061,7 +3056,8 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
if (boot_cpu_data.x86 >= 0x17) {
u8 umc_en_mask = 0, ecc_en_mask = 0;

- for (i = 0; i < NUM_UMCS; i++) {
+ set_num_umcs();
+ for_each_umc(i) {
u32 base = get_umc_base(i);

/* Only check enabled UMCs. */
@@ -3114,7 +3110,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
{
u8 i, ecc_en = 1, cpk_en = 1;

- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
@@ -3273,7 +3269,7 @@ static int init_one_instance(unsigned int nid)
goto err_free;

if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+ pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
if (!pvt->umc) {
ret = -ENOMEM;
goto err_free;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index de8dbb0b42b5..435450bf8684 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -274,8 +274,6 @@

#define UMC_SDP_INIT BIT(31)

-#define NUM_UMCS 2
-
enum amd_families {
K8_CPUS = 0,
F10_CPUS,
@@ -399,8 +397,22 @@ struct err_info {

static inline u32 get_umc_base(u8 channel)
{
- /* ch0: 0x50000, ch1: 0x150000 */
- return 0x50000 + (!!channel << 20);
+ /* chY: 0xY50000 */
+ return 0x50000 + (channel << 20);
+}
+
+static u8 num_umcs;
+
+static inline void set_num_umcs(void)
+{
+ u8 model = boot_cpu_data.x86_model;
+
+ if (model >= 0x30 && model <= 0x3f)
+ num_umcs = 8;
+ else
+ num_umcs = 2;
+
+ edac_dbg(1, "Number of UMCs: %x", num_umcs);
}

static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
--
2.17.1