[PATCH 4.4 14/21] parisc: Fix races in parisc_setup_cache_timing()

From: Greg Kroah-Hartman
Date: Wed Nov 30 2016 - 05:01:05 EST


4.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: John David Anglin <dave.anglin@xxxxxxxx>

commit 741dc7bf1c7c7d93b853bb55efe77baa27e1b0a9 upstream.

Helge reported to me the following startup crash:

[ 0.000000] Linux version 4.8.0-1-parisc64-smp (debian-kernel@xxxxxxxxxxxxxxxx) (gcc version 5.4.1 20161019 (GCC) ) #1 SMP Debian 4.8.7-1 (2016-11-13)
[ 0.000000] The 64-bit Kernel has started...
[ 0.000000] Kernel default page size is 4 KB. Huge pages enabled with 1 MB physical and 2 MB virtual size.
[ 0.000000] Determining PDC firmware type: System Map.
[ 0.000000] model 9000/785/J5000
[ 0.000000] Total Memory: 2048 MB
[ 0.000000] Memory: 2018528K/2097152K available (9272K kernel code, 3053K rwdata, 1319K rodata, 1024K init, 840K bss, 78624K reserved, 0K cma-reserved)
[ 0.000000] virtual kernel memory layout:
[ 0.000000] vmalloc : 0x0000000000008000 - 0x000000003f000000 (1007 MB)
[ 0.000000] memory : 0x0000000040000000 - 0x00000000c0000000 (2048 MB)
[ 0.000000] .init : 0x0000000040100000 - 0x0000000040200000 (1024 kB)
[ 0.000000] .data : 0x0000000040b0e000 - 0x0000000040f533e0 (4372 kB)
[ 0.000000] .text : 0x0000000040200000 - 0x0000000040b0e000 (9272 kB)
[ 0.768910] Brought up 1 CPUs
[ 0.992465] NET: Registered protocol family 16
[ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000
[ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online
[ 2.726692] Setting cache flush threshold to 1024 kB
[ 2.729932] Not-handled unaligned insn 0x43ffff80
[ 2.798114] Setting TLB flush threshold to 140 kB
[ 2.928039] Unaligned handler failed, ret = -1
[ 3.000419] _______________________________
[ 3.000419] < Your System ate a SPARC! Gah! >
[ 3.000419] -------------------------------
[ 3.000419] \ ^__^
[ 3.000419] (__)\ )\/\
[ 3.000419] U ||----w |
[ 3.000419] || ||
[ 9.340055] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1
[ 9.448082] task: 00000000bfd48060 task.stack: 00000000bfd50000
[ 9.528040]
[ 10.760029] IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004025d154 000000004025d158
[ 10.868052] IIR: 43ffff80 ISR: 0000000000340000 IOR: 000001ff54150960
[ 10.960029] CPU: 1 CR30: 00000000bfd50000 CR31: 0000000011111111
[ 11.052057] ORIG_R28: 000000004021e3b4
[ 11.100045] IAOQ[0]: irq_exit+0x94/0x120
[ 11.152062] IAOQ[1]: irq_exit+0x98/0x120
[ 11.208031] RP(r2): irq_exit+0xb8/0x120
[ 11.256074] Backtrace:
[ 11.288067] [<00000000402cd944>] cpu_startup_entry+0x1e4/0x598
[ 11.368058] [<0000000040109528>] smp_callin+0x2c0/0x2f0
[ 11.436308] [<00000000402b53fc>] update_curr+0x18c/0x2d0
[ 11.508055] [<00000000402b73b8>] dequeue_entity+0x2c0/0x1030
[ 11.584040] [<00000000402b3cc0>] set_next_entity+0x80/0xd30
[ 11.660069] [<00000000402c1594>] pick_next_task_fair+0x614/0x720
[ 11.740085] [<000000004020dd34>] __schedule+0x394/0xa60
[ 11.808054] [<000000004020e488>] schedule+0x88/0x118
[ 11.876039] [<0000000040283d3c>] rescuer_thread+0x4d4/0x5b0
[ 11.948090] [<000000004028fc4c>] kthread+0x1ec/0x248
[ 12.016053] [<0000000040205020>] end_fault_vector+0x20/0xc0
[ 12.092239] [<00000000402050c0>] _switch_to_ret+0x0/0xf40
[ 12.164044]
[ 12.184036] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1
[ 12.244040] Backtrace:
[ 12.244040] [<000000004021c480>] show_stack+0x68/0x80
[ 12.244040] [<00000000406f332c>] dump_stack+0xec/0x168
[ 12.244040] [<000000004021c74c>] die_if_kernel+0x25c/0x430
[ 12.244040] [<000000004022d320>] handle_unaligned+0xb48/0xb50
[ 12.244040]
[ 12.632066] ---[ end trace 9ca05a7215c7bbb2 ]---
[ 12.692036] Kernel panic - not syncing: Attempted to kill the idle task!

We have the insn 0x43ffff80 in IIR but from IAOQ we should have:
4025d150: 0f f3 20 df ldd,s r19(r31),r31
4025d154: 0f 9f 00 9c ldw r31(ret0),ret0
4025d158: bf 80 20 58 cmpb,*<> r0,ret0,4025d18c <irq_exit+0xcc>

Cpu0 has just completed running parisc_setup_cache_timing:

[ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000
[ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online
[ 2.726692] Setting cache flush threshold to 1024 kB
[ 2.729932] Not-handled unaligned insn 0x43ffff80
[ 2.798114] Setting TLB flush threshold to 140 kB
[ 2.928039] Unaligned handler failed, ret = -1

>From the backtrace, cpu1 is in smp_callin:

void __init smp_callin(void)
{
int slave_id = cpu_now_booting;

smp_cpu_init(slave_id);
preempt_disable();

flush_cache_all_local(); /* start with known state */
flush_tlb_all_local(NULL);

local_irq_enable(); /* Interrupts have been off until now */

cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

So, it has just flushed its caches and the TLB. It would seem either the
flushes in parisc_setup_cache_timing or smp_callin have corrupted kernel
memory.

The attached patch reworks parisc_setup_cache_timing to remove the races
in setting the cache and TLB flush thresholds. It also corrects the
number of bytes flushed in the TLB calculation.

The patch flushes the cache and TLB on cpu0 before starting the
secondary processors so that they are started from a known state.

Tested with a few reboots on c8000.

Signed-off-by: John David Anglin <dave.anglin@xxxxxxxx>
Signed-off-by: Helge Deller <deller@xxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
arch/parisc/kernel/cache.c | 31 ++++++++++++-------------------
arch/parisc/kernel/setup.c | 4 ++++
2 files changed, 16 insertions(+), 19 deletions(-)

--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -351,6 +351,7 @@ void __init parisc_setup_cache_timing(vo
{
unsigned long rangetime, alltime;
unsigned long size, start;
+ unsigned long threshold;

alltime = mfctl(16);
flush_data_cache();
@@ -364,17 +365,12 @@ void __init parisc_setup_cache_timing(vo
printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime);

- /* Racy, but if we see an intermediate value, it's ok too... */
- parisc_cache_flush_threshold = size * alltime / rangetime;
-
- parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
- if (!parisc_cache_flush_threshold)
- parisc_cache_flush_threshold = FLUSH_THRESHOLD;
-
- if (parisc_cache_flush_threshold > cache_info.dc_size)
- parisc_cache_flush_threshold = cache_info.dc_size;
-
- printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+ threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+ if (threshold > cache_info.dc_size)
+ threshold = cache_info.dc_size;
+ if (threshold)
+ parisc_cache_flush_threshold = threshold;
+ printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
parisc_cache_flush_threshold/1024);

/* calculate TLB flush threshold */
@@ -383,7 +379,7 @@ void __init parisc_setup_cache_timing(vo
flush_tlb_all();
alltime = mfctl(16) - alltime;

- size = PAGE_SIZE;
+ size = 0;
start = (unsigned long) _text;
rangetime = mfctl(16);
while (start < (unsigned long) _end) {
@@ -396,13 +392,10 @@ void __init parisc_setup_cache_timing(vo
printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime);

- parisc_tlb_flush_threshold = size * alltime / rangetime;
- parisc_tlb_flush_threshold *= num_online_cpus();
- parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
- if (!parisc_tlb_flush_threshold)
- parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
-
- printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+ threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime);
+ if (threshold)
+ parisc_tlb_flush_threshold = threshold;
+ printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
parisc_tlb_flush_threshold/1024);
}

--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -334,6 +334,10 @@ static int __init parisc_init(void)
/* tell PDC we're Linux. Nevermind failure. */
pdc_stable_write(0x40, &osid, sizeof(osid));

+ /* start with known state */
+ flush_cache_all_local();
+ flush_tlb_all_local(NULL);
+
processor_init();
#ifdef CONFIG_SMP
pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n",