[PATCH v6 3/5] EDAC/amd64: Extend family ops functions

From: Naveen Krishna Chatradhi
Date: Thu Oct 28 2021 - 09:02:00 EST


From: Muralidhara M K <muralimk@xxxxxxx>

Create new family operation routines and define them respectively.
This would simplify adding support for future platforms.

Signed-off-by: Muralidhara M K <muralimk@xxxxxxx>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@xxxxxxx>
Link: https://lkml.kernel.org/r/20211014185400.10451-4-nchatrad@xxxxxxx
---
Changes since v5:
split read_mc_regs for per family ops
Adjusted and Called dump_misc_regs for family ops

Changes since v4:
1. Modified k8_prep_chip_selects for ext_model checks
2. Add read_dct_base_mask to ops
3. Renamed find_umc_channel and addressed minor comments

Changes since v3:
1. Defined new family operation routines

Changs since v2:
1. new patch

drivers/edac/amd64_edac.c | 302 +++++++++++++++++++++++---------------
drivers/edac/amd64_edac.h | 10 +-
2 files changed, 188 insertions(+), 124 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4fce75013674..1029fe84ba2e 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1204,10 +1204,7 @@ static void __dump_misc_regs(struct amd64_pvt *pvt)
/* Display and decode various NB registers for debug purposes. */
static void dump_misc_regs(struct amd64_pvt *pvt)
{
- if (pvt->umc)
- __dump_misc_regs_df(pvt);
- else
- __dump_misc_regs(pvt);
+ pvt->ops->get_misc_regs(pvt);

edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");

@@ -1217,28 +1214,39 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
/*
* See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
*/
-static void prep_chip_selects(struct amd64_pvt *pvt)
+static void k8_prep_chip_selects(struct amd64_pvt *pvt)
{
if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
- } else if (pvt->fam == 0x15 && pvt->model == 0x30) {
- pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
- pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
- } else if (pvt->fam >= 0x17) {
- int umc;
-
- for_each_umc(umc) {
- pvt->csels[umc].b_cnt = 4;
- pvt->csels[umc].m_cnt = 2;
- }
-
- } else {
+ } else if (pvt->fam == 0xf && pvt->ext_model >= K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
}
}

+static void f15m30_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+}
+
+static void default_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
+}
+
+static void f17_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 4;
+ pvt->csels[umc].m_cnt = 2;
+ }
+}
+
static void read_umc_base_mask(struct amd64_pvt *pvt)
{
u32 umc_base_reg, umc_base_reg_sec;
@@ -1297,11 +1305,6 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
{
int cs;

- prep_chip_selects(pvt);
-
- if (pvt->umc)
- return read_umc_base_mask(pvt);
-
for_each_chip_select(cs, 0, pvt) {
int reg0 = DCSB0 + (cs * 4);
int reg1 = DCSB1 + (cs * 4);
@@ -2512,143 +2515,181 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
}
}

+/* Prototypes for family specific ops routines */
+static int init_csrows(struct mem_ctl_info *mci);
+static int init_csrows_df(struct mem_ctl_info *mci);
+static void read_mc_regs(struct amd64_pvt *pvt);
+static void __read_mc_regs_df(struct amd64_pvt *pvt);
+static void update_umc_err_info(struct mce *m, struct err_info *err);
+
+static const struct low_ops k8_ops = {
+ .early_channel_count = k8_early_channel_count,
+ .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
+ .dbam_to_cs = k8_dbam_to_chip_select,
+ .prep_chip_select = k8_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f10_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f10_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f15_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15m30_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .prep_chip_select = f15m30_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15m60_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f15_m60h_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f16_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .get_misc_regs = __dump_misc_regs,
+ .get_mc_regs = read_mc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f17_ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ .prep_chip_select = f17_prep_chip_selects,
+ .get_base_mask = read_umc_base_mask,
+ .get_misc_regs = __dump_misc_regs_df,
+ .get_mc_regs = __read_mc_regs_df,
+ .populate_csrows = init_csrows_df,
+ .get_umc_err_info = update_umc_err_info,
+};
+
static struct amd64_family_type family_types[] = {
[K8_CPUS] = {
.ctl_name = "K8",
.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.max_mcs = 2,
- .ops = {
- .early_channel_count = k8_early_channel_count,
- .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
- .dbam_to_cs = k8_dbam_to_chip_select,
- }
+ .ops = k8_ops,
},
[F10_CPUS] = {
.ctl_name = "F10h",
.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f10_dbam_to_chip_select,
- }
+ .ops = f10_ops,
},
[F15_CPUS] = {
.ctl_name = "F15h",
.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_dbam_to_chip_select,
- }
+ .ops = f15_ops,
},
[F15_M30H_CPUS] = {
.ctl_name = "F15h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f15m30_ops,
},
[F15_M60H_CPUS] = {
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_m60h_dbam_to_chip_select,
- }
+ .ops = f15m60_ops,
},
[F16_CPUS] = {
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f16_ops,
},
[F16_M30H_CPUS] = {
.ctl_name = "F16h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f16_ops,
},
[F17_CPUS] = {
.ctl_name = "F17h",
.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M10H_CPUS] = {
.ctl_name = "F17h_M10h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M30H_CPUS] = {
.ctl_name = "F17h_M30h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
.max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M60H_CPUS] = {
.ctl_name = "F17h_M60h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M70H_CPUS] = {
.ctl_name = "F17h_M70h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F19_CPUS] = {
.ctl_name = "F19h",
.f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
.max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
};

@@ -2899,10 +2940,13 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* Currently, we can derive the channel number by looking at the 6th nibble in
* the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
* number.
+ *
+ * csrow can be derived from the lower 3 bits of MCA_SYND value.
*/
-static int find_umc_channel(struct mce *m)
+static void update_umc_err_info(struct mce *m, struct err_info *err)
{
- return (m->ipid & GENMASK(31, 0)) >> 20;
+ err->channel = (m->ipid & GENMASK(31, 0)) >> 20;
+ err->csrow = m->synd & 0x7;
}

static void decode_umc_error(int node_id, struct mce *m)
@@ -2924,8 +2968,6 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;

- err.channel = find_umc_channel(m);
-
if (!(m->status & MCI_STATUS_SYNDV)) {
err.err_code = ERR_SYND;
goto log_error;
@@ -2940,7 +2982,7 @@ static void decode_umc_error(int node_id, struct mce *m)
err.err_code = ERR_CHANNEL;
}

- err.csrow = m->synd & 0x7;
+ pvt->ops->get_umc_err_info(m, &err);

if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
@@ -3058,6 +3100,27 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
}
}

+static void read_top_mem_registers(struct amd64_pvt *pvt)
+{
+ u64 msr_val;
+
+ /*
+ * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
+ * those are Read-As-Zero.
+ */
+ rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
+ edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
+
+ /* Check first whether TOP_MEM2 is enabled: */
+ rdmsrl(MSR_AMD64_SYSCFG, msr_val);
+ if (msr_val & BIT(21)) {
+ rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
+ edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
+ } else {
+ edac_dbg(0, " TOP_MEM2 disabled\n");
+ }
+}
+
/*
* Retrieve the hardware registers of the memory controller.
*/
@@ -3067,6 +3130,8 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
struct amd64_umc *umc;
u32 i, umc_base;

+ read_top_mem_registers(pvt);
+
/* Read registers from each UMC */
for_each_umc(i) {

@@ -3079,6 +3144,8 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi);
}
+
+ amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
}

/*
@@ -3088,30 +3155,8 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
static void read_mc_regs(struct amd64_pvt *pvt)
{
unsigned int range;
- u64 msr_val;
-
- /*
- * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
- * those are Read-As-Zero.
- */
- rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
- edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
-
- /* Check first whether TOP_MEM2 is enabled: */
- rdmsrl(MSR_AMD64_SYSCFG, msr_val);
- if (msr_val & BIT(21)) {
- rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
- edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
- } else {
- edac_dbg(0, " TOP_MEM2 disabled\n");
- }

- if (pvt->umc) {
- __read_mc_regs_df(pvt);
- amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
-
- goto skip;
- }
+ read_top_mem_registers(pvt);

amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);

@@ -3152,14 +3197,6 @@ static void read_mc_regs(struct amd64_pvt *pvt)
amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1);
amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
}
-
-skip:
- read_dct_base_mask(pvt);
-
- determine_memory_type(pvt);
- edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
-
- determine_ecc_sym_sz(pvt);
}

/*
@@ -3277,9 +3314,6 @@ static int init_csrows(struct mem_ctl_info *mci)
int nr_pages = 0;
u32 val;

- if (pvt->umc)
- return init_csrows_df(mci);
-
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);

pvt->nbcfg = val;
@@ -3703,6 +3737,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
return NULL;
}

+ /* ops required for all the families */
+ if (!pvt->ops->early_channel_count || !pvt->ops->dbam_to_cs ||
+ !pvt->ops->prep_chip_select || !pvt->ops->get_base_mask ||
+ !pvt->ops->get_misc_regs || !pvt->ops->get_mc_regs ||
+ !pvt->ops->populate_csrows) {
+ edac_dbg(1, "Common helper routines not defined.\n");
+ return NULL;
+ }
+
+ /* ops required for families 17h and later */
+ if (pvt->fam >= 0x17 && !pvt->ops->get_umc_err_info) {
+ edac_dbg(1, "Platform specific helper routines not defined.\n");
+ return NULL;
+ }
+
return fam_type;
}

@@ -3735,7 +3784,16 @@ static int hw_info_get(struct amd64_pvt *pvt)
if (ret)
return ret;

- read_mc_regs(pvt);
+ pvt->ops->get_mc_regs(pvt);
+
+ pvt->ops->prep_chip_select(pvt);
+
+ pvt->ops->get_base_mask(pvt);
+
+ determine_memory_type(pvt);
+ edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
+
+ determine_ecc_sym_sz(pvt);

return 0;
}
@@ -3786,7 +3844,7 @@ static int init_one_instance(struct amd64_pvt *pvt)

setup_mci_misc_attrs(mci);

- if (init_csrows(mci))
+ if (pvt->ops->populate_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;

ret = -ENODEV;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 85aa820bc165..881ff6322bc9 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -467,11 +467,17 @@ struct ecc_settings {
* functions and per device encoding/decoding logic.
*/
struct low_ops {
- int (*early_channel_count) (struct amd64_pvt *pvt);
+ int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
struct err_info *);
- int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
+ int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode, int cs_mask_nr);
+ void (*prep_chip_select) (struct amd64_pvt *pvt);
+ void (*get_base_mask) (struct amd64_pvt *pvt);
+ void (*get_misc_regs) (struct amd64_pvt *pvt);
+ void (*get_mc_regs) (struct amd64_pvt *pvt);
+ int (*populate_csrows) (struct mem_ctl_info *mci);
+ void (*get_umc_err_info) (struct mce *m, struct err_info *err);
};

struct amd64_family_type {
--
2.25.1