Re: [tip:perf/core] perf/core: Check return value of the perf_event_read() IPI

From: Jiri Olsa
Date: Sun Aug 28 2016 - 14:10:15 EST


On Mon, Aug 22, 2016 at 12:38:23PM +0200, Jiri Olsa wrote:
> On Mon, Aug 22, 2016 at 10:29:32AM +0200, Jiri Olsa wrote:
> > On Mon, Aug 22, 2016 at 09:17:37AM +0200, Jiri Olsa wrote:
> > > On Sun, Aug 21, 2016 at 02:10:07PM +0200, Vegard Nossum wrote:
> > >
> > > SNIP
> > >
> > > > [<ffffffff816d1577>] ? __fget+0x47/0x270
> > > > [<ffffffff81676d5b>] vfs_readv+0x8b/0xc0
> > > > [<ffffffff81676e6e>] do_readv+0xde/0x230
> > > > [<ffffffff81676d90>] ? vfs_readv+0xc0/0xc0
> > > > [<ffffffff81002b60>] ? exit_to_usermode_loop+0x190/0x190
> > > > [<ffffffff82001b07>] ? check_preemption_disabled+0x37/0x1e0
> > > > [<ffffffff81677617>] SyS_readv+0x27/0x30
> > > > [<ffffffff816775f0>] ? do_pwritev+0x1a0/0x1a0
> > > > [<ffffffff81005524>] do_syscall_64+0x1c4/0x4e0
> > > > [<ffffffff83c3286a>] entry_SYSCALL64_slow_path+0x25/0x25
> > > >
> > > > I don't think WARN() is the right interface for signalling errors to
> > > > userspace programs?
> > >
> > > any special way to trigger that?
> >
> > nope ;-)
> >
> > perf stat -a -I 10
>
> reading the event could race with event schedule out
> leaving us with active state and oncpu == -1
>
> attached patch fixes the warn for me, but I might
> be missing some other cases
>
> jirka

ping, thanks

jirka

>
>
> ---
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 3f07e6cfc1b6..375274b6f3b4 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1802,8 +1802,9 @@ event_sched_out(struct perf_event *event,
>
> event->tstamp_stopped = tstamp;
> event->pmu->del(event, 0);
> - event->oncpu = -1;
> - event->state = PERF_EVENT_STATE_INACTIVE;
> + WRITE_ONCE(event->state, PERF_EVENT_STATE_INACTIVE);
> + smp_wmb();
> + WRITE_ONCE(event->oncpu, -1);
> if (event->pending_disable) {
> event->pending_disable = 0;
> event->state = PERF_EVENT_STATE_OFF;
> @@ -3424,9 +3425,8 @@ struct perf_read_data {
> int ret;
> };
>
> -static int find_cpu_to_read(struct perf_event *event, int local_cpu)
> +static int find_cpu_to_read(struct perf_event *event, int event_cpu, int local_cpu)
> {
> - int event_cpu = event->oncpu;
> u16 local_pkg, event_pkg;
>
> if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
> @@ -3561,13 +3561,17 @@ u64 perf_event_read_local(struct perf_event *event)
>
> static int perf_event_read(struct perf_event *event, bool group)
> {
> - int ret = 0, cpu_to_read, local_cpu;
> + int ret = 0, cpu_to_read, local_cpu, state;
> +
> + state = READ_ONCE(event->state);
> + smp_rmb();
> + cpu_to_read = event->oncpu;
>
> /*
> * If event is enabled and currently active on a CPU, update the
> * value in the event structure:
> */
> - if (event->state == PERF_EVENT_STATE_ACTIVE) {
> + if (state == PERF_EVENT_STATE_ACTIVE && cpu_to_read != -1) {
> struct perf_read_data data = {
> .event = event,
> .group = group,
> @@ -3575,7 +3579,7 @@ static int perf_event_read(struct perf_event *event, bool group)
> };
>
> local_cpu = get_cpu();
> - cpu_to_read = find_cpu_to_read(event, local_cpu);
> + cpu_to_read = find_cpu_to_read(event, cpu_to_read, local_cpu);
> put_cpu();
>
> ret = smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);