Re: [patch V2 28/29] x86/irq/64: Remap the IRQ stack with guard pages

From: Andy Lutomirski
Date: Sun Apr 07 2019 - 01:10:01 EST


On Fri, Apr 5, 2019 at 8:11 AM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:
>
> From: Andy Lutomirski <luto@xxxxxxxxxx>
>
> The IRQ stack lives in percpu space, so an IRQ handler that overflows it
> will overwrite other data structures.
>
> Use vmap() to remap the IRQ stack so that it will have the usual guard
> pages that vmap/vmalloc allocations have. With this the kernel will panic
> immediately on an IRQ stack overflow.

The 0day bot noticed that this dies with DEBUG_PAGEALLOC on. This is
because the store_stackinfo() function is utter garbage and this patch
correctly detects just how broken it is. The attached patch "fixes"
it. (It also contains a reliability improvement that should probably
get folded in, but is otherwise unrelated.)

A real fix would remove the generic kstack_end() function entirely
along with __HAVE_ARCH_KSTACK_END and would optionally replace
store_stackinfo() with something useful. Josh, do we have a generic
API to do a little stack walk like this? Otherwise, I don't think it
would be the end of the world to just remove the offending code.

--Andy
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 801c6f040faa..eb8939d28f96 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,6 +1510,12 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

+/*
+ * The initial hardirq_stack_ptr value of NULL is invalid. To prevent it
+ * from being used if an IRQ happens too early, we initialize irq_count to 1,
+ * which effectively disables ENTER_IRQ_STACK. The code that maps the IRQ
+ * stack will reset irq_count to -1.
+ */
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 48caa3d31662..61c691889362 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -56,6 +56,7 @@ static int map_irq_stack(unsigned int cpu)
return -ENOMEM;

per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+ pr_err("*** CPU %u: hardirq_stack_ptr = 0x%lx\n", cpu, (unsigned long)(va + IRQ_STACK_SIZE));
return 0;
}
#else
@@ -74,7 +75,14 @@ static int map_irq_stack(unsigned int cpu)

int irq_init_percpu_irqstack(unsigned int cpu)
{
+ int ret;
+
if (per_cpu(hardirq_stack_ptr, cpu))
return 0;
- return map_irq_stack(cpu);
+ ret = map_irq_stack(cpu);
+ if (ret)
+ return ret;
+
+ per_cpu(irq_count, cpu) = -1;
+ return 0;
}
diff --git a/mm/slab.c b/mm/slab.c
index 329bfe67f2ca..198e9948a874 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1481,6 +1481,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
*addr++ = caller;
*addr++ = smp_processor_id();
size -= 3 * sizeof(unsigned long);
+ /*
{
unsigned long *sptr = &caller;
unsigned long svalue;
@@ -1496,6 +1497,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
}

}
+ */
*addr++ = 0x87654321;
}