perf/ftrace lockup on 3.12-rc6 with trigger code

From: Vince Weaver
Date: Thu Oct 24 2013 - 13:22:40 EST


Hello

after a month of trying I finally got a small test-case out of my
perf_fuzzer suite that triggers a system lockup with just one syscall.

Attached is the code that triggers it.

I tried tracking through the ftrace code to find out what's going on, but
I quickly get lost. The perf/ftrace code does do questionable things like
copy at 64-bit config value into an "int".

You may recall this bug triggers messages like this before total system
lockup:

[ 47.813436] ------------[ cut here ]------------
[ 47.813436] WARNING: CPU: 0 PID: 2781 at kernel/watchdog.c:245 watchdog_over)
[ 47.813436] Watchdog detected hard LOCKUP on cpu 0
[ 47.813436] Modules linked in: cpufreq_userspace cpufreq_stats cpufreq_powern
[ 47.813436] CPU: 0 PID: 2781 Comm: out Not tainted 3.12.0-rc6+ #79
[ 47.813436] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BI2
[ 47.813436] 00000000000000f5 ffff88011fc07c48 ffffffff8151a736 ffff88011fc08
[ 47.813436] ffffffff8103cd68 ffffffff817fac46 ffffffff810a0fe5 ffff88011b310
[ 47.813436] ffff88011fc07ef8 ffff88011fc07d58 0000000000000000 ffff88011fc08
[ 47.813436] Call Trace:
[ 47.813436] <NMI> [<ffffffff8151a736>] ? dump_stack+0x41/0x53
[ 47.813436] [<ffffffff8103cd68>] ? warn_slowpath_common+0x81/0x9c
[ 47.813436] [<ffffffff810a0fe5>] ? watchdog_overflow_callback+0x9b/0xa6
[ 47.813436] [<ffffffff8103ce25>] ? warn_slowpath_fmt+0x46/0x48
[ 47.813436] [<ffffffff810a0f53>] ? watchdog_overflow_callback+0x9/0xa6
[ 47.813436] [<ffffffff810a0fe5>] ? watchdog_overflow_callback+0x9b/0xa6
[ 47.813436] [<ffffffff810ca46b>] ? __perf_event_overflow+0x12e/0x1ab
[ 47.813436] [<ffffffff81018047>] ? intel_pmu_save_and_restart+0xe/0x22
[ 47.813436] [<ffffffff81018c95>] ? intel_pmu_handle_irq+0x2b8/0x34d
[ 47.813436] [<ffffffff8152457c>] ? ftrace_call+0x5/0x2f
[ 47.813436] [<ffffffff810c4aa4>] ? irq_work_queue+0x71/0x72
[ 47.813436] [<ffffffff8151f0b3>] ? perf_event_nmi_handler+0x24/0x3d
[ 47.813436] [<ffffffff8151e9f2>] ? nmi_handle+0x5e/0x13a
[ 47.813436] [<ffffffff810a17fa>] ? rcu_nmi_enter+0x9/0x5b
[ 47.813436] [<ffffffff8151eb75>] ? do_nmi+0xa7/0x2bd
[ 47.813436] [<ffffffff8151e177>] ? end_repeat_nmi+0x1e/0x2e
[ 47.813436] [<ffffffff810c4aa4>] ? irq_work_queue+0x71/0x72
[ 47.813436] [<ffffffff810c4aa4>] ? irq_work_queue+0x71/0x72
[ 47.813436] [<ffffffff810c4aa4>] ? irq_work_queue+0x71/0x72
[ 47.813436] <<EOE>> <IRQ> [<ffffffff810ca4d9>] ? __perf_event_overflow+0xb
[ 47.813436] [<ffffffff810a8f18>] ? ftrace_ops_control_func+0x9a/0xc8
[ 47.813436] [<ffffffff810ca654>] ? perf_swevent_overflow+0x3f/0x58
[ 47.813436] [<ffffffff810ca86e>] ? perf_tp_event+0xe9/0x1f6
[ 47.813436] [<ffffffff810a8f18>] ? ftrace_ops_control_func+0x9a/0xc8
[ 47.813436] [<ffffffff8151d65c>] ? _raw_spin_unlock_irqrestore+0x4/0x11
[ 47.813436] [<ffffffff810be8db>] ? perf_ftrace_function_call+0xc7/0xd1
[ 47.813436] [<ffffffff81525d9a>] ? irq_work_interrupt+0x6a/0x70
[ 47.813436] [<ffffffff81040cdb>] ? irq_exit+0x4/0xa1
[ 47.813436] [<ffffffff81525d9a>] ? irq_work_interrupt+0x6a/0x70
[ 47.813436] [<ffffffff810be8db>] ? perf_ftrace_function_call+0xc7/0xd1
[ 47.813436] [<ffffffff81040cdb>] ? irq_exit+0x4/0xa1
[ 47.813436] [<ffffffff810a8f18>] ? ftrace_ops_control_func+0x9a/0xc8
[ 47.813436] [<ffffffff81040cdb>] ? irq_exit+0x4/0xa1
[ 47.813436] [<ffffffff810a8f18>] ? ftrace_ops_control_func+0x9a/0xc8
[ 47.813436] [<ffffffff8151d661>] ? _raw_spin_unlock_irqrestore+0x9/0x11
[ 47.813436] [<ffffffff8152457c>] ? ftrace_call+0x5/0x2f
[ 47.813436] [<ffffffff8111f7a4>] ? kill_fasync+0x74/0x86
[ 47.813436] [<ffffffff810cabc2>] ? perf_event_wakeup+0x8c/0xa1
[ 47.813436] [<ffffffff81005f1f>] ? smp_irq_work_interrupt+0x9/0x21
[ 47.813436] [<ffffffff81005f1f>] ? smp_irq_work_interrupt+0x9/0x21
[ 47.813436] [<ffffffff810c4916>] ? __irq_work_run+0x5f/0x7b
[ 47.813436] [<ffffffff81040ce0>] ? irq_exit+0x9/0xa1
[ 47.813436] [<ffffffff81040ce0>] ? irq_exit+0x9/0xa1
[ 47.813436] [<ffffffff81525d9a>] ? irq_work_interrupt+0x6a/0x70
[ 47.813436] <EOI> [<ffffffff81520b5d>] ? __do_page_fault+0x10b/0x3f5
[ 47.813436] [<ffffffff81520a6b>] ? __do_page_fault+0x19/0x3f5
[ 47.813436] [<ffffffff8152457c>] ? ftrace_call+0x5/0x2f
[ 47.813436] [<ffffffff811281ce>] ? fget_raw_light+0x9/0x8a
[ 47.813436] [<ffffffff81520e50>] ? do_page_fault+0x9/0xf
[ 47.813436] [<ffffffff8151de72>] ? page_fault+0x22/0x30
[ 47.813436] ---[ end trace 9a2f13d704fa7b22 ]--- /* log_to_code output from ./broken_trace/new.broken.7 */
/* by Vince Weaver <vincent.weaver _at_ maine.edu */

#define _GNU_SOURCE 1
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <poll.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>

int fd[1024];
struct perf_event_attr pe[1024];
char *mmap_result[1024];
#define MAX_READ_SIZE 65536
static long long data[MAX_READ_SIZE];

#define MAX_POLL_FDS 128
struct pollfd pollfds[MAX_POLL_FDS];

long long id;

int forked_pid;

struct sigaction sa;
static int overflows=0;
static int sigios=0;

static void our_handler(int signum, siginfo_t *info, void *uc) {
int fd = info->si_fd;
int ret;

overflows++;
ioctl(fd,PERF_EVENT_IOC_DISABLE,0);
if (sigios) return;
ret=ioctl(fd, PERF_EVENT_IOC_REFRESH,1);
}
int perf_event_open(struct perf_event_attr *hw_event_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags) {

return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu,
group_fd, flags);
}

int main(int argc, char **argv) {
/* 1 */

memset(&pe[5],0,sizeof(struct perf_event_attr));
pe[5].type=PERF_TYPE_TRACEPOINT;
pe[5].size=64;
pe[5].config=0x7fffffff00000001;
pe[5].sample_period=0xffffffffff000000;

pe[5].sample_type=PERF_SAMPLE_IP|PERF_SAMPLE_TIME|PERF_SAMPLE_READ|PERF_SAMPLE_ID|PERF_SAMPLE_PERIOD|PERF_SAMPLE_STREAM_ID|PERF_SAMPLE_WEIGHT; /* 4355 */
// pe[5].read_format=PERF_FORMAT_ID; /* 4 */
// pe[5].exclusive=1;
// pe[5].exclude_user=1;
// pe[5].exclude_idle=1;
// pe[5].enable_on_exec=1;
// pe[5].watermark=1;
// pe[5].precise_ip=2; /* request zero skid */
// pe[5].sample_id_all=1;
// pe[5].wakeup_watermark=255;
// pe[5].bp_type=HW_BREAKPOINT_EMPTY;

fd[5]=perf_event_open(&pe[5],0,0,-1,0 /*0*/ );

/* 2 */
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_sigaction = our_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction( SIGRTMIN+2, &sa, NULL) < 0) {
printf("Error setting up signal handler\n");
}
fcntl(fd[5], F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
fcntl(fd[5], F_SETSIG, SIGRTMIN+2);
fcntl(fd[5], F_SETOWN,getpid());
/* Replayed 2 syscalls */
return 0;
}