pmu::read() called erroneously in v4.13-rc{3,4}

From: Mark Rutland
Date: Thu Aug 10 2017 - 13:37:07 EST


Hi,

While running Vince's perf fuzzer on arm64 v4.13-rc3, I found we call
pmu::read() for an event whose event::cpu != smp_processor_id(), and
event::oncpu == -1, violating the usual pmu::read() requirements.

I do not see this on v4.12, and a bisect points to commit:

ba5213ae6b88fb17 ("perf/core: Correct event creation with PERF_FORMAT_GROUP")

I minimised the of into the test below, which demonstrates the issue on
arm64, and on x86 with a sanity check in x86_perf_event_update(), also
below. Example splat at the end of the email.

In the absence of the sanity check, the issue results in a (silent)
erroneous read of a PMC MSR.

Thanks,
Mark.

---->8----
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <linux/perf_event.h>

#include <asm/unistd.h>

pid_t create_child(void)
{
pid_t p;
p = fork();

/* we are the child */
if (p == 0)
do { } while (1);
if (p == -1)
exit(errno);

return p;
}

int perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}

int event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
int group_fd, int flags)
{
int fd;

fd = perf_event_open(attr, pid, cpu, group_fd, flags);
if (fd == -1) {
fprintf(stderr, "%s(%ld) failed with %d\n",
__func__, (long)pid, errno);
exit(-1);
}

return fd;
}

struct perf_event_attr e1_attr = {
.size = sizeof(e1_attr),
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_INSTRUCTIONS,
.read_format = PERF_FORMAT_GROUP,
.exclude_kernel = 1,
.inherit = 1,
};

struct perf_event_attr e2_attr = {
.size = sizeof(e2_attr),
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_INSTRUCTIONS,
.read_format = PERF_FORMAT_GROUP,
.exclude_kernel = 1,
.inherit = 1,
.inherit_stat = 1,
};

int main(int argc, char *argv)
{
pid_t child;
int e1, e2;

e1 = event_open(&e1_attr, 0, 0, -1, PERF_FLAG_FD_OUTPUT);
e2 = event_open(&e2_attr, 0, 1, e1, 0);

mmap(NULL, 32887, PROT_READ | PROT_WRITE, MAP_SHARED, e2, 0);

child = create_child();
kill(child, SIGKILL);
waitpid(child, NULL, 0);

return 0;
}
---->8----

---->8----
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f6..c5c9238 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -72,6 +72,8 @@ u64 x86_perf_event_update(struct perf_event *event)
int idx = hwc->idx;
u64 delta;

+ WARN_ON(event->oncpu != smp_processor_id());
+
if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;

---->8----

[ 230.909172] ------------[ cut here ]------------
[ 230.913796] WARNING: CPU: 1 PID: 3637 at arch/x86/events/core.c:75 x86_perf_event_update+0xbd/0xd0
[ 230.922739] Modules linked in:
[ 230.925798] CPU: 1 PID: 3637 Comm: wrong-counter Tainted: G W 4.13.0-rc4+ #230
[ 230.934132] Hardware name: LENOVO 7484A3G/LENOVO, BIOS 5CKT54AUS 09/07/2009
[ 230.941080] task: ffff9ad5b0232640 task.stack: ffffb2f001aac000
[ 230.946989] RIP: 0010:x86_perf_event_update+0xbd/0xd0
[ 230.952038] RSP: 0018:ffffb2f001aaf978 EFLAGS: 00010282
[ 230.957257] RAX: 0000000000000020 RBX: ffffb2f001aafa48 RCX: 0000000000000000
[ 230.964378] RDX: 0000000000000001 RSI: ffff9ad5a9d33400 RDI: ffff9ad5a9d32800
[ 230.971498] RBP: ffffb2f001aaf9a0 R08: 0000000000000000 R09: 0000000000000028
[ 230.978622] R10: ffff9ad5b0232640 R11: ffff9ad5a9cd4840 R12: 0000000000000002
[ 230.985745] R13: 0000000000000018 R14: 0000000000000001 R15: ffff9ad5a9d32800
[ 230.992866] FS: 0000000000000000(0000) GS:ffff9ad5bec80000(0000) knlGS:0000000000000000
[ 231.000939] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 231.006680] CR2: 00007f26cba069d0 CR3: 0000000085a0a000 CR4: 00000000000406e0
[ 231.013802] Call Trace:
[ 231.016254] x86_pmu_read+0x9/0x10
[ 231.019656] perf_output_read+0x19f/0x410
[ 231.023665] ? __alloc_pages_nodemask+0xe8/0x1f0
[ 231.028274] perf_event_read_event+0xd2/0x110
[ 231.032628] ? x86_pmu_del+0x3e/0x130
[ 231.036290] ? __intel_pmu_enable_all.isra.12+0x27/0x90
[ 231.041512] ? intel_pmu_enable_all+0xb/0x10
[ 231.045779] ? x86_pmu_enable+0x25e/0x2f0
[ 231.049787] ? group_sched_out+0x8c/0xd0
[ 231.053708] ? perf_pmu_enable+0x22/0x30
[ 231.057629] ? update_group_times+0x13/0x40
[ 231.061812] ? list_del_event+0x66/0xc0
[ 231.065647] perf_event_exit_task+0x2fa/0x380
[ 231.069999] do_exit+0x2b0/0xb50
[ 231.070033] CE: hpet2 increased min_delta_ns to 20115 nsec
[ 231.078710] do_group_exit+0x3a/0xa0
[ 231.082285] get_signal+0x1c2/0x590
[ 231.085776] do_signal+0x23/0x660
[ 231.089092] ? __do_page_fault+0x23e/0x490
[ 231.093187] ? do_page_fault+0xc/0x10
[ 231.096849] ? page_fault+0x22/0x30
[ 231.100337] exit_to_usermode_loop+0x41/0x7c
[ 231.104603] syscall_return_slowpath+0x55/0x60
[ 231.109044] ret_from_fork+0x15/0x30
[ 231.112617] RIP: 0033:0x7f26cb502156
[ 231.116188] RSP: 002b:00007ffdd98750b0 EFLAGS: 00000246 ORIG_RAX: 0000000000000038
[ 231.123744] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f26cb502156
[ 231.130868] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
[ 231.137991] RBP: 00007ffdd98750d0 R08: 0000000000000000 R09: 00007f26cba06700
[ 231.145113] R10: 00007f26cba069d0 R11: 0000000000000246 R12: 0000000000000000
[ 231.152234] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 231.159358] Code: 41 5e 41 5f 5d c3 48 89 d3 89 cf 31 d2 48 c1 e3 20 48 09 c3 48 89 de e8 c2 67 36 00 eb b1 5b 41 5c 41 5d 41 5e 41 5f 31 c0 5d c3 <0f> ff e9 72 ff ff ff 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 48
[ 231.178197] ---[ end trace 9666f9b88e30f5e5 ]---
[ 387.883184] ------------[ cut here ]------------