Re: [PATCH v3 01/11] perf: Rework perf_event_exit_event()

From: Marco Elver
Date: Thu Mar 25 2021 - 12:18:41 EST


On Thu, Mar 25, 2021 at 11:17AM +0100, Marco Elver wrote:
> On Wed, Mar 24, 2021 at 12:24PM +0100, Marco Elver wrote:
> > From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> >
> > Make perf_event_exit_event() more robust, such that we can use it from
> > other contexts. Specifically the up and coming remove_on_exec.
> >
> > For this to work we need to address a few issues. Remove_on_exec will
> > not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to
> > disable event_function_call() and we thus have to use
> > perf_remove_from_context().
> >
> > When using perf_remove_from_context(), there's two races to consider.
> > The first is against close(), where we can have concurrent tear-down
> > of the event. The second is against child_list iteration, which should
> > not find a half baked event.
> >
> > To address this, teach perf_remove_from_context() to special case
> > !ctx->is_active and about DETACH_CHILD.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> > Signed-off-by: Marco Elver <elver@xxxxxxxxxx>
> > ---
> > v3:
> > * New dependency for series:
> > https://lkml.kernel.org/r/YFn/I3aKF+TOjGcl@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> > ---
>
> syzkaller found a crash with stack trace pointing at changes in this
> patch. Can't tell if this is an old issue or introduced in this series.

Yay, I found a reproducer. v5.12-rc4 is good, and sadly with this patch only we
crash. :-/

Here's a stacktrace with just this patch applied:

| BUG: kernel NULL pointer dereference, address: 00000000000007af
| #PF: supervisor read access in kernel mode
| #PF: error_code(0x0000) - not-present page
| PGD 0 P4D 0
| Oops: 0000 [#1] PREEMPT SMP PTI
| CPU: 7 PID: 465 Comm: a.out Not tainted 5.12.0-rc4+ #25
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
| RIP: 0010:task_pid_ptr kernel/pid.c:324 [inline]
| RIP: 0010:__task_pid_nr_ns+0x112/0x240 kernel/pid.c:500
| Code: e8 13 55 07 00 e8 1e a6 0e 00 48 c7 c6 83 1e 0b 81 48 c7 c7 a0 2e d5 82 e8 4b 08 04 00 44 89 e0 5b 5d 41 5c c3 e8 fe a5 0e 00 <48> 8b 85 b0 07 00 00 4a 8d ac e0 98 01 00 00 e9 5a ff ff ff e8 e5
| RSP: 0000:ffffc90001b73a60 EFLAGS: 00010093
| RAX: 0000000000000000 RBX: ffffffff82c69820 RCX: ffffffff810b1eb2
| RDX: ffff888108d143c0 RSI: 0000000000000000 RDI: ffffffff8299ccc6
| RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000000
| R10: ffff888108d14db8 R11: 0000000000000000 R12: 0000000000000001
| R13: ffffffffffffffff R14: ffffffffffffffff R15: ffff888108e05240
| FS: 0000000000000000(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000
| CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
| CR2: 00000000000007af CR3: 0000000002c22002 CR4: 0000000000770ee0
| DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
| DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
| PKRU: 55555554
| Call Trace:
| perf_event_pid_type kernel/events/core.c:1412 [inline]
| perf_event_pid kernel/events/core.c:1421 [inline]
| perf_event_read_event+0x78/0x1d0 kernel/events/core.c:7406
| sync_child_event kernel/events/core.c:12404 [inline]
| perf_child_detach kernel/events/core.c:2223 [inline]
| __perf_remove_from_context+0x14d/0x280 kernel/events/core.c:2359
| perf_remove_from_context+0x9f/0xf0 kernel/events/core.c:2395
| perf_event_exit_event kernel/events/core.c:12442 [inline]
| perf_event_exit_task_context kernel/events/core.c:12523 [inline]
| perf_event_exit_task+0x276/0x4c0 kernel/events/core.c:12556
| do_exit+0x4cd/0xed0 kernel/exit.c:834
| do_group_exit+0x4d/0xf0 kernel/exit.c:922
| get_signal+0x1d2/0xf30 kernel/signal.c:2777
| arch_do_signal_or_restart+0xf7/0x750 arch/x86/kernel/signal.c:789
| handle_signal_work kernel/entry/common.c:147 [inline]
| exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
| exit_to_user_mode_prepare+0x113/0x190 kernel/entry/common.c:208
| irqentry_exit_to_user_mode+0x6/0x30 kernel/entry/common.c:314
| asm_exc_general_protection+0x1e/0x30 arch/x86/include/asm/idtentry.h:571

Attached is a C reproducer of the syzkaller program that crashes us.

Thanks,
-- Marco
// autogenerated by syzkaller (https://github.com/google/syzkaller)
/*
Generated from this syzkaller program:

clone(0x88004400, 0x0, 0x0, 0x0, 0x0)
perf_event_open(&(0x7f00000003c0)={0x4, 0x70, 0x40, 0x1, 0x3, 0x1, 0x0, 0x6, 0x10001, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x80000001, 0x2, @perf_bp={&(0x7f0000000380), 0xd}, 0x1000, 0x6, 0x0, 0x4, 0x1, 0x4, 0x8}, 0x0, 0xffffffffffffffff, 0xffffffffffffffff, 0x1)
clone(0x8000, &(0x7f0000000200)="3017248985480229c715f01f2776139977f49770d8181077dce816423a929ed5e59bf26ca77f2ba311b783dda29870d621ff2394424d9c799be5fa29f1ee42102645b56fd9727401d2fe52073c20023d4623dd48522d13dff56af96e4d73f53d62f3de841a58436c591733b58072f04a49bd5cf0473e3f568b604959c06365a82e0e1350550271c25298", &(0x7f0000000100), &(0x7f0000000140), &(0x7f00000002c0)="8c0e32ae8f2716cdf998f341eb4ff0b404c7dca07d9e895c109603d3552c42f07c0190860e4c880d03ba867e8d5d738172839bdbe974d38580e5bc8a91713bee4b859c1a4500f61f197d3610ef2f515474d0b302af29f64053899418054cdf0afe2e75f313f92daf84b3f77cdb10d9d002c44bf43d0cb532cce29b249aab4d6e8218e2528c95453d255e31715422b9d3014c35603fa361ec70136322a7366868f53b78b7c369496dc39cf8ea248b7345e378")
*/

#define _GNU_SOURCE

#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#define BITMASK(bf_off, bf_len) (((1ull << (bf_len)) - 1) << (bf_off))
#define STORE_BY_BITMASK(type, htobe, addr, val, bf_off, bf_len) \
*(type*)(addr) = \
htobe((htobe(*(type*)(addr)) & ~BITMASK((bf_off), (bf_len))) | \
(((type)(val) << (bf_off)) & BITMASK((bf_off), (bf_len))))

int main(void)
{
syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
syscall(__NR_clone, 0x88004400ul, 0ul, 0ul, 0ul, 0ul);
*(uint32_t*)0x200003c0 = 4;
*(uint32_t*)0x200003c4 = 0x70;
*(uint8_t*)0x200003c8 = 0x40;
*(uint8_t*)0x200003c9 = 1;
*(uint8_t*)0x200003ca = 3;
*(uint8_t*)0x200003cb = 1;
*(uint32_t*)0x200003cc = 0;
*(uint64_t*)0x200003d0 = 6;
*(uint64_t*)0x200003d8 = 0x10001;
*(uint64_t*)0x200003e0 = 0;
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 0, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 1, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 2, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 3, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 4, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 5, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 6, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 7, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 8, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 9, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 10, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 11, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 12, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 13, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 14, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 15, 2);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 17, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 18, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 19, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 20, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 21, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 22, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 23, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 24, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 25, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 26, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 27, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 28, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 29, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 30, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 31, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 32, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 33, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 1, 34, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 35, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 36, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 37, 1);
STORE_BY_BITMASK(uint64_t, , 0x200003e8, 0, 38, 26);
*(uint32_t*)0x200003f0 = 0x80000001;
*(uint32_t*)0x200003f4 = 2;
*(uint64_t*)0x200003f8 = 0x20000380;
*(uint64_t*)0x20000400 = 0xd;
*(uint64_t*)0x20000408 = 0x1000;
*(uint64_t*)0x20000410 = 6;
*(uint32_t*)0x20000418 = 0;
*(uint32_t*)0x2000041c = 4;
*(uint64_t*)0x20000420 = 1;
*(uint32_t*)0x20000428 = 4;
*(uint16_t*)0x2000042c = 8;
*(uint16_t*)0x2000042e = 0;
syscall(__NR_perf_event_open, 0x200003c0ul, 0, -1ul, -1, 1ul);
memcpy(
(void*)0x20000200,
"\x30\x17\x24\x89\x85\x48\x02\x29\xc7\x15\xf0\x1f\x27\x76\x13\x99\x77\xf4"
"\x97\x70\xd8\x18\x10\x77\xdc\xe8\x16\x42\x3a\x92\x9e\xd5\xe5\x9b\xf2\x6c"
"\xa7\x7f\x2b\xa3\x11\xb7\x83\xdd\xa2\x98\x70\xd6\x21\xff\x23\x94\x42\x4d"
"\x9c\x79\x9b\xe5\xfa\x29\xf1\xee\x42\x10\x26\x45\xb5\x6f\xd9\x72\x74\x01"
"\xd2\xfe\x52\x07\x3c\x20\x02\x3d\x46\x23\xdd\x48\x52\x2d\x13\xdf\xf5\x6a"
"\xf9\x6e\x4d\x73\xf5\x3d\x62\xf3\xde\x84\x1a\x58\x43\x6c\x59\x17\x33\xb5"
"\x80\x72\xf0\x4a\x49\xbd\x5c\xf0\x47\x3e\x3f\x56\x8b\x60\x49\x59\xc0\x63"
"\x65\xa8\x2e\x0e\x13\x50\x55\x02\x71\xc2\x52\x98",
138);
memcpy(
(void*)0x200002c0,
"\x8c\x0e\x32\xae\x8f\x27\x16\xcd\xf9\x98\xf3\x41\xeb\x4f\xf0\xb4\x04\xc7"
"\xdc\xa0\x7d\x9e\x89\x5c\x10\x96\x03\xd3\x55\x2c\x42\xf0\x7c\x01\x90\x86"
"\x0e\x4c\x88\x0d\x03\xba\x86\x7e\x8d\x5d\x73\x81\x72\x83\x9b\xdb\xe9\x74"
"\xd3\x85\x80\xe5\xbc\x8a\x91\x71\x3b\xee\x4b\x85\x9c\x1a\x45\x00\xf6\x1f"
"\x19\x7d\x36\x10\xef\x2f\x51\x54\x74\xd0\xb3\x02\xaf\x29\xf6\x40\x53\x89"
"\x94\x18\x05\x4c\xdf\x0a\xfe\x2e\x75\xf3\x13\xf9\x2d\xaf\x84\xb3\xf7\x7c"
"\xdb\x10\xd9\xd0\x02\xc4\x4b\xf4\x3d\x0c\xb5\x32\xcc\xe2\x9b\x24\x9a\xab"
"\x4d\x6e\x82\x18\xe2\x52\x8c\x95\x45\x3d\x25\x5e\x31\x71\x54\x22\xb9\xd3"
"\x01\x4c\x35\x60\x3f\xa3\x61\xec\x70\x13\x63\x22\xa7\x36\x68\x68\xf5\x3b"
"\x78\xb7\xc3\x69\x49\x6d\xc3\x9c\xf8\xea\x24\x8b\x73\x45\xe3\x78",
178);
syscall(__NR_clone, 0x8000ul, 0x20000200ul, 0x20000100ul, 0x20000140ul,
0x200002c0ul);
return 0;
}