Re: [BUG] 2.6.25-rc2-git4 - Regression Kernel oops while runningkernbench and tbench on powerpc
From: Paul Mackerras
Date: Mon Apr 14 2008 - 06:04:22 EST
Kamalesh Babulal writes:
> The SHA1 ID of the kernel is 0e81a8ae37687845f7cdfa2adce14ea6a5f1dd34 (2.6.25-rc8)
> and the source seems to have the patch 44387e9ff25267c78a99229aca55ed750e9174c7.
>
> The kernel was patched only the patch you gave me (http://lkml.org/lkml/2008/4/8/42).
Please try again with both that patch and the one below. Once again
it won't fix the bug but will give us more information. When the oops
occurs, the kernel will print a lot of debug information that should
help locate the problem.
Paul.
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e932b43..f16db50 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -144,6 +144,9 @@ int main(void)
DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+ DEFINE(PACASLBLOG, offsetof(struct paca_struct, slblog));
+ DEFINE(PACASLBLOGIX, offsetof(struct paca_struct, slblog_ix));
+ DEFINE(PACALASTSLB, offsetof(struct paca_struct, last_slb));
DEFINE(SLBSHADOW_STACKVSID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 148a354..663df17 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -419,6 +419,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
slbmte r7,r0
isync
+ ld r4,PACASLBLOGIX(r13)
+ addi r4,r4,1
+ clrldi r4,r4,64-6
+ std r4,PACASLBLOGIX(r13)
+ add r4,r4,r13
+ addi r4,r4,PACASLBLOG
+ li r5,4
+ std r5,0(r4)
+ mftb r5
+ std r5,8(r4)
+ std r6,16(r4)
+ std r0,24(r4)
2:
clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
@@ -533,6 +545,17 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
stdcx. r0,0,r1 /* to clear the reservation */
+ li r4,0
+ slbmfee r2,r4
+ std r2,PACALASTSLB(r13)
+ slbmfev r2,r4
+ std r2,PACALASTSLB+8(r13)
+ li r4,1
+ slbmfee r2,r4
+ std r2,PACALASTSLB+16(r13)
+ slbmfev r2,r4
+ std r2,PACALASTSLB+24(r13)
+
/*
* Clear RI before restoring r13. If we are returning to
* userspace and we take an exception after restoring r13,
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 4b5b7ff..c918f33 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1141,6 +1141,40 @@ void SPEFloatingPointException(struct pt_regs *regs)
}
#endif
+static void dump_unrecov_slb(void)
+{
+#ifdef CONFIG_PPC64
+ long entry, rstart;
+ unsigned long esid, vsid;
+
+ printk(KERN_EMERG "SLB contents now:\n");
+ for (entry = 0; entry < 64; ++entry) {
+ asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (entry));
+ if (esid == 0)
+ /* valid bit is clear along with everything else */
+ continue;
+ asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (entry));
+ printk(KERN_EMERG "%d: %.16lx %.16lx\n", entry, esid, vsid);
+ }
+
+ printk(KERN_EMERG "SLB 0-1 at last exception exit:\n");
+ printk(KERN_EMERG "0: %.16lx %.16lx\n", get_paca()->last_slb[0][0],
+ get_paca()->last_slb[0][1]);
+ printk(KERN_EMERG "1: %.16lx %.16lx\n", get_paca()->last_slb[1][0],
+ get_paca()->last_slb[1][1]);
+ printk(KERN_EMERG "SLB update log:\n");
+ rstart = entry = get_paca()->slblog_ix;
+ do {
+ printk(KERN_EMERG "%d: %lx %lx %.16lx %.16lx\n", entry,
+ get_paca()->slblog[entry][0],
+ get_paca()->slblog[entry][1],
+ get_paca()->slblog[entry][2],
+ get_paca()->slblog[entry][3]);
+ entry = (entry + 1) % 63;
+ } while (entry != rstart);
+#endif
+}
+
/*
* We enter here if we get an unrecoverable exception, that is, one
* that happened at a point where the RI (recoverable interrupt) bit
@@ -1151,6 +1185,8 @@ void unrecoverable_exception(struct pt_regs *regs)
{
printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
regs->trap, regs->nip);
+ if (regs->trap == 0x4100)
+ dump_unrecov_slb();
die("Unrecoverable exception", regs, SIGABRT);
}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 906daed..235edf7 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -105,6 +105,7 @@ void slb_flush_and_rebolt(void)
* appropriately too. */
unsigned long linear_llp, vmalloc_llp, lflags, vflags;
unsigned long ksp_esid_data, ksp_vsid_data;
+ long logix;
WARN_ON(!irqs_disabled());
@@ -144,6 +145,13 @@ void slb_flush_and_rebolt(void)
"r"(ksp_vsid_data),
"r"(ksp_esid_data)
: "memory");
+ logix = get_paca()->slblog_ix;
+ logix = (logix + 1) & 63;
+ get_paca()->slblog_ix = logix;
+ get_paca()->slblog[logix][0] = 3;
+ get_paca()->slblog[logix][1] = mftb();
+ get_paca()->slblog[logix][2] = ksp_esid_data;
+ get_paca()->slblog[logix][3] = ksp_vsid_data;
}
void slb_vmalloc_update(void)
@@ -192,6 +200,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
unsigned long pc = KSTK_EIP(tsk);
unsigned long stack = KSTK_ESP(tsk);
unsigned long unmapped_base;
+ long logix;
if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
@@ -204,6 +213,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
<< SLBIE_SSIZE_SHIFT;
slbie_data |= SLBIE_C; /* C set for user addresses */
asm volatile("slbie %0" : : "r" (slbie_data));
+
+ logix = get_paca()->slblog_ix;
+ logix = (logix + 1) & 63;
+ get_paca()->slblog_ix = logix;
+ get_paca()->slblog[logix][0] = 2;
+ get_paca()->slblog[logix][1] = mftb();
+ get_paca()->slblog[logix][2] = slbie_data;
+ get_paca()->slblog[logix][3] = 0;
}
asm volatile("isync" : : : "memory");
} else {
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 657f6b3..8c7ce20 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -249,6 +249,20 @@ _GLOBAL(slb_compare_rr_to_size)
*/
slbmte r11,r10
+ ld r3,PACASLBLOGIX(r13)
+ addi r3,r3,1
+ clrldi r3,r3,64-6
+ std r3,PACASLBLOGIX(r13)
+ sldi r3,r3,5
+ add r3,r3,r13
+ addi r3,r3,PACASLBLOG
+ li r9,1
+ std r9,0(r3)
+ mftb r9
+ std r9,8(r3)
+ std r11,16(r3)
+ std r10,24(r3)
+
/* we're done for kernel addresses */
crclr 4*cr0+eq /* set result to "success" */
bgelr cr7
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a1ab25c..959ef26 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -325,6 +325,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
/* Platform corrected itself */
+ printk(KERN_ERR "FWNMI: platform corrected error %.16lx\n",
+ *(unsigned long *)err);
nonfatal = 1;
} else if ((regs->msr & MSR_RI) &&
user_mode(regs) &&
diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h
index 748b35a..6280b82 100644
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@@ -115,6 +115,11 @@ struct paca_struct {
u64 system_time; /* accumulated system TB ticks */
u64 startpurr; /* PURR/TB value snapshot */
u64 startspurr; /* SPURR value snapshot */
+
+ /* SLB update log */
+ long slblog_ix;
+ u64 slblog[64][4];
+ u64 last_slb[2][2];
};
extern struct paca_struct paca[];
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/