Re: "BUG: using smp_processor_id() in preemptible" with KPTI on 4.14.11

From: Peter Zijlstra
Date: Thu Jan 04 2018 - 12:07:23 EST


On Thu, Jan 04, 2018 at 04:37:24PM +0100, Thomas Gleixner wrote:
> > Yes:
> >
> > BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498
> > caller is native_flush_tlb_single+0x57/0xc0
> > CPU: 2 PID: 4498 Comm: ovsdb-server Not tainted 4.15.0-rc6-kvm-00423-gea1908c252eb #3
> > Hardware name: MSI MS-7798/B75MA-P45 (MS-7798), BIOS V1.9 09/30/2013
> > Call Trace:
> > dump_stack+0x5c/0x86
> > check_preemption_disabled+0xdd/0xe0
> > native_flush_tlb_single+0x57/0xc0
> > ? __set_pte_vaddr+0x2d/0x40
> > __set_pte_vaddr+0x2d/0x40
> > set_pte_vaddr+0x2f/0x40
> > cea_set_pte+0x30/0x40
> > ds_update_cea.constprop.4+0x4d/0x70
> > reserve_ds_buffers+0x159/0x410
> > ? wp_page_copy+0x370/0x6c0
> > x86_reserve_hardware+0x150/0x160
> > x86_pmu_event_init+0x3e/0x1f0
> > perf_try_init_event+0x69/0x80
> > perf_event_alloc+0x652/0x740
> > SyS_perf_event_open+0x3f6/0xd60
> > do_syscall_64+0x5c/0x190
> > entry_SYSCALL64_slow_path+0x25/0x25
> > RIP: 0033:0x72bff0a3c0b9
> > RSP: 002b:00007ffed11c2f18 EFLAGS: 00000206 ORIG_RAX: 000000000000012a
> > RAX: ffffffffffffffda RBX: 00007ffed11c30f0 RCX: 000072bff0a3c0b9
> > RDX: 00000000ffffffff RSI: 0000000000000000 RDI: 00007ffed11c2f20
> > RBP: 0000000000000000 R08: 0000000000000000 R09: 0000007000000000
> > R10: 00000000ffffffff R11: 0000000000000206 R12: 0000000000000008
> > R13: 0000000000000000 R14: 00007ffed11c30d0 R15: 000060986ecfb600

Fun, so set_pte_vaddr() and the whole cpu_entry_area are supposed to be
per CPU. But the DS crud does cross CPU updates of those tables.

So we need some additional fun and games..

How's the below?

---
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8f0aace08b87..8156e47da7ba 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,6 +5,7 @@

#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
+#include <asm/tlbflush.h>
#include <asm/insn.h>

#include "../perf_event.h"
@@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);

static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
+ unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;

pa = virt_to_phys(addr);
+
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
+
+ /*
+ * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+ * all TLB entries for it.
+ */
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}

static void ds_clear_cea(void *cea, size_t size)
{
+ unsigned long start = (unsigned long)cea;
size_t msz = 0;

+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
+
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}

static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)