[PATCH 3.6.6 3/3] i82975x_edac: fix fatal crash

From: Arvind R
Date: Thu Nov 22 2012 - 15:52:08 EST


Subject: [PATCH 3.6.6 3/3] i82975x_edac: fix fatal crash

This patch fixes the crash caused by combination of wrong memory layer
info and dimm_label initialisation.
It is also a rewite of csrow initialisation and error reporting to
handle ALL memory configurations supported by the controller.
Tested on Asus P5WDG2-WS PRO with 7 ECC memory configurations -
1 dimm installed, 2 dimms in assymetric mode, 2 dimms in symmetric
mode, 3 dimms in assymmetric mode and 4 dimms in symmetric mode.
The initialised values in sysfs were found consistent with installed
memory in all tested cases.

Tested-by: Arvind R. <arvino55@xxxxxxxxx>
Signed-off-by: Arvind R. <arvino55@xxxxxxxxx>
---
drivers/edac/i82975x_edac.c | 150 +++++++++++++++-------------------
1 file changed, 69 insertions(+), 81 deletions(-)
diff -up a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
--- a/drivers/edac/i82975x_edac.c 2012-11-22 11:56:36.000000000 +0530
+++ b/drivers/edac/i82975x_edac.c 2012-11-22 10:29:51.000000000 +0530
@@ -29,8 +29,19 @@
#define PCI_DEVICE_ID_INTEL_82975_0 0x277c
#endif /* PCI_DEVICE_ID_INTEL_82975_0 */

-#define I82975X_NR_DIMMS 8
-#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans))
+#define I82975X_NR_ROWS_PER_CHANNEL 4 /* immutable, in controller */
+#define I82975X_NR_CHANS 2 /* immutable, in controller */
+/*
+ * the product of above immutable constants
+ * MUST equal
+ * the product of following 2 constants.
+ *
+ * max. value of either constant is 4.
+ */
+#define I82975X_RANKS_PER_DIMM 2 /* normally impl. on mobos */
+#define I82975X_NR_DIMMS 4 /* normally impl. on mobos */
+
+#define I82975X_GRAIN 7 /* immutable, in controller */

/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */
#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b)
@@ -305,32 +316,15 @@ static int i82975x_process_error_info(st
if (info->xeap & 1)
page |= 0x80000000;
page >>= (PAGE_SHIFT - 1);
+ chan = info->eap & 1;
row = edac_mc_find_csrow_by_page(mci, page);
+ offst = info->eap & ((1 << PAGE_SHIFT) - (1 << I82975X_GRAIN));
+ err_type = (info->errsts & I82975X_ERRSTS_UE)
+ ? HW_EVENT_ERR_UNCORRECTED :
+ HW_EVENT_ERR_CORRECTED;

- if (row == -1) {
- i82975x_mc_printk(mci, KERN_ERR, "error processing EAP:\n"
- "\tXEAP=%u\n"
- "\t EAP=0x%08x\n"
- "\tPAGE=0x%08x\n",
- (info->xeap & 1) ? 1 : 0, info->eap, (unsigned int) page);
- return 0;
- }
- chan = (mci->csrows[row]->nr_channels == 1) ? 0 : info->eap & 1;
- offst = info->eap
- & ((1 << PAGE_SHIFT) -
- (1 << mci->csrows[row]->channels[chan]->dimm->grain));
-
- if (info->errsts & 0x0002)
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- page, offst, 0,
- row, -1, -1,
- "i82975x UE", "");
- else
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offst, info->derrsyn,
- row, chan ? chan : 0, -1,
- "i82975x CE", "");
-
+ edac_mc_handle_error(err_type, mci, 1, page, offst, info->derrsyn,
+ row, chan, -1, "i82975x UE", "");
return 1;
}

@@ -343,20 +337,17 @@ static void i82975x_check(struct mem_ctl
i82975x_process_error_info(mci, &info, 1);
}

-static void i82975x_init_csrows(struct mem_ctl_info *mci,
- struct pci_dev *pdev, void __iomem *mch_window)
+static void __devinit i82975x_init_csrows(struct mem_ctl_info *mci,
+ void __iomem *mch_window, bool is_mode_symmetric)
{
- static const char *labels[4] = {
- "DIMM A1", "DIMM A2",
- "DIMM B1", "DIMM B2"
- };
+ static const char *label_prefix = "DIMM";
+ static const char chan_designator[I82975X_NR_CHANS] = {'A', 'B'};
struct csrow_info *csrow;
unsigned long last_cumul_size;
u8 value;
u32 cumul_size, nr_pages;
- int index, chan;
+ int row, chan;
struct dimm_info *dimm;
- enum dev_type dtype;

last_cumul_size = 0;

@@ -369,47 +360,39 @@ static void i82975x_init_csrows(struct m
*
*/

- for (index = 0; index < mci->nr_csrows; index++) {
- csrow = mci->csrows[index];
-
- value = readb(mch_window + I82975X_DRB + index +
- ((index >= 4) ? 0x80 : 0));
- cumul_size = value;
- cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT);
- /*
- * Adjust cumul_size w.r.t number of channels
- *
- */
- if (csrow->nr_channels > 1)
- cumul_size <<= 1;
- edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
-
- nr_pages = cumul_size - last_cumul_size;
- if (!nr_pages)
- continue;
-
- /*
- * Initialise dram labels
- * index values:
- * [0-7] for single-channel; i.e. csrow->nr_channels = 1
- * [0-3] for dual-channel; i.e. csrow->nr_channels = 2
- */
- for (chan = 0; chan < csrow->nr_channels; chan++) {
- dimm = mci->csrows[index]->channels[chan]->dimm;
-
- dimm->nr_pages = nr_pages / csrow->nr_channels;
- strncpy(csrow->channels[chan]->dimm->label,
- labels[(index >> 1) + (chan * 2)],
- EDAC_MC_LABEL_LEN);
- dimm->grain = 1 << 7; /* always */
+ for (chan = 0; chan < mci->num_cschannel; chan++) {
+ for (row = 0; row < mci->nr_csrows; row++) {
+ value = readb(mch_window + I82975X_DRB
+ + row + (chan ? 0x80 : 0));
+ cumul_size = value;
+ cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT);
+ edac_dbg(3, "(row: %d ch: %d) cumul_size 0x%x\n",
+ row, chan, cumul_size);
+
+ nr_pages = cumul_size - last_cumul_size;
+ if (!nr_pages)
+ continue;
+ /*
+ * Initialise dram labels
+ */
+ csrow = mci->csrows[row];
+ dimm = csrow->channels[chan]->dimm;
+ dimm->nr_pages = nr_pages;
+ snprintf(dimm->label, EDAC_MC_LABEL_LEN, "%s %c%d",
+ label_prefix,
+ chan_designator[chan],
+ row / I82975X_RANKS_PER_DIMM);
+ dimm->grain = 1 << I82975X_GRAIN; /* always */
dimm->dtype = DEV_X8; /* ECC only with DEV_X8 */
- dimm->mtype = MEM_DDR2; /* only supported */
+ dimm->mtype = MEM_DDR2; /* supports only DDR2 */
dimm->edac_mode = EDAC_SECDED; /* only supported */
- }

- csrow->first_page = last_cumul_size;
- csrow->last_page = cumul_size - 1;
- last_cumul_size = cumul_size;
+ csrow->first_page = last_cumul_size;
+ csrow->last_page = cumul_size - 1;
+ last_cumul_size = cumul_size;
+ }
+ if (is_mode_symmetric)
+ last_cumul_size = 0;
}
}

@@ -421,8 +404,8 @@ static bool __devinit detect_channel_mod
for (chan_mode = true, row = 0;
chan_mode && (row < I82975X_NR_ROWS_PER_CHANNEL);
row++)
- chan_mode &= (readb(mch_window + I82975X_DRB + row) ==
- readb(mch_window + I82975X_DRB + row + 0x80));
+ chan_mode &= (readb(mch_window + I82975X_DRB + row)
+ == readb(mch_window + I82975X_DRB + row + 0x80));
return chan_mode;
}

@@ -538,7 +521,6 @@ static int __devinit i82975x_probe1(stru
u32 mchbar;
u32 drc[2];
struct i82975x_error_info discard;
- int chans;
bool is_symmetric_config;

edac_dbg(0, "\n");
@@ -550,10 +532,13 @@ static int __devinit i82975x_probe1(stru
}
mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */
mch_window = ioremap_nocache(mchbar, 0x1000);
+ if (!mch_window)
+ return -ENODEV;

is_symmetric_config = detect_channel_mode(mch_window);
drc[0] = readl(mch_window + I82975X_DRC_CH0M0);
drc[1] = readl(mch_window + I82975X_DRC_CH1M0);
+
#ifdef CONFIG_EDAC_DEBUG
i82975x_print_dram_config(mch_window, mchbar, drc,
is_symmetric_config);
@@ -567,10 +552,10 @@ static int __devinit i82975x_probe1(stru
}

layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = I82975X_NR_DIMMS;
+ layers[0].size = I82975X_NR_ROWS_PER_CHANNEL;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = I82975X_NR_CSROWS(chans);
+ layers[1].size = I82975X_NR_CHANS;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
@@ -589,11 +574,14 @@ static int __devinit i82975x_probe1(stru
mci->dev_name = pci_name(pdev);
mci->edac_check = i82975x_check;
mci->ctl_page_to_phys = NULL;
- edac_dbg(3, "init pvt\n");
+ mci->scrub_mode = SCRUB_HW_SRC;
+
+ /* initialise private structure */
pvt = (struct i82975x_pvt *) mci->pvt_info;
pvt->mch_window = mch_window;
- i82975x_init_csrows(mci, pdev, mch_window);
- mci->scrub_mode = SCRUB_HW_SRC;
+
+ edac_dbg(3, "init csrows\n");
+ i82975x_init_csrows(mci, mch_window, is_symmetric_config);
i82975x_get_error_info(mci, &discard); /* clear counters */

/* finalize this instance of memory controller with edac core */
@@ -655,7 +643,7 @@ static void __devexit i82975x_remove_one
static DEFINE_PCI_DEVICE_TABLE(i82975x_pci_tbl) = {
{
PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- I82975X
+ I82975X_chip0
},
{
0,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/