Re: Q: perf_event && event->owner

From: Oleg Nesterov
Date: Fri Nov 12 2010 - 13:56:12 EST


On 11/12, Peter Zijlstra wrote:
>
> Utter paranoia took over and I'm still not sure its solid...

Thanks Peter!

I believe this all is correct.

> Subject: perf: Fix owner-list vs exit
> From: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Date: Tue, 09 Nov 2010 19:01:43 +0100
>
> Reported-by: Oleg Nesterov <oleg@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> LKML-Reference: <1289325703.2191.60.camel@laptop>
> ---
> kernel/perf_event.c | 63 ++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 51 insertions(+), 12 deletions(-)
>
> Index: linux-2.6/kernel/perf_event.c
> ===================================================================
> --- linux-2.6.orig/kernel/perf_event.c
> +++ linux-2.6/kernel/perf_event.c
> @@ -2234,11 +2234,6 @@ int perf_event_release_kernel(struct per
> raw_spin_unlock_irq(&ctx->lock);
> mutex_unlock(&ctx->mutex);
>
> - mutex_lock(&event->owner->perf_event_mutex);
> - list_del_init(&event->owner_entry);
> - mutex_unlock(&event->owner->perf_event_mutex);
> - put_task_struct(event->owner);
> -
> free_event(event);
>
> return 0;
> @@ -2251,9 +2246,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_ker
> static int perf_release(struct inode *inode, struct file *file)
> {
> struct perf_event *event = file->private_data;
> + struct task_struct *owner;
>
> file->private_data = NULL;
>
> + rcu_read_lock();
> + owner = ACCESS_ONCE(event->owner);
> + /*
> + * Matches the smp_wmb() in perf_event_exit_task(). If we observe
> + * !owner it means the list deletion is complete and we can indeed
> + * free this event, otherwise we need to serialize on
> + * owner->perf_event_mutex.
> + */
> + smp_read_barrier_depends();
> + if (owner) {
> + /*
> + * Since delayed_put_task_struct() also drops the last
> + * task reference we can safely take a new reference
> + * while holding the rcu_read_lock().
> + */
> + get_task_struct(owner);
> + }
> + rcu_read_unlock();
> +
> + if (owner) {
> + mutex_lock(&owner->perf_event_mutex);
> + /*
> + * We have to re-check the event->owner field, if it is cleared
> + * we raced with perf_event_exit_task(), acquiring the mutex
> + * ensured they're done, and we can proceed with freeing the
> + * event.
> + */
> + if (event->owner)
> + list_del_init(&event->owner_entry);
> + mutex_unlock(&owner->perf_event_mutex);
> + put_task_struct(owner);
> + }
> +
> return perf_event_release_kernel(event);
> }
>
> @@ -5677,7 +5706,7 @@ SYSCALL_DEFINE5(perf_event_open,
> mutex_unlock(&ctx->mutex);
>
> event->owner = current;
> - get_task_struct(current);
> +
> mutex_lock(&current->perf_event_mutex);
> list_add_tail(&event->owner_entry, &current->perf_event_list);
> mutex_unlock(&current->perf_event_mutex);
> @@ -5745,12 +5774,6 @@ perf_event_create_kernel_counter(struct
> ++ctx->generation;
> mutex_unlock(&ctx->mutex);
>
> - event->owner = current;
> - get_task_struct(current);
> - mutex_lock(&current->perf_event_mutex);
> - list_add_tail(&event->owner_entry, &current->perf_event_list);
> - mutex_unlock(&current->perf_event_mutex);
> -
> return event;
>
> err_free:
> @@ -5901,8 +5924,24 @@ static void perf_event_exit_task_context
> */
> void perf_event_exit_task(struct task_struct *child)
> {
> + struct perf_event *event, *tmp;
> int ctxn;
>
> + mutex_lock(&child->perf_event_mutex);
> + list_for_each_entry_safe(event, tmp, &child->perf_event_list,
> + owner_entry) {
> + list_del_init(&event->owner_entry);
> +
> + /*
> + * Ensure the list deletion is visible before we clear
> + * the owner, closes a race against perf_release() where
> + * we need to serialize on the owner->perf_event_mutex.
> + */
> + smp_wmb();
> + event->owner = NULL;
> + }
> + mutex_unlock(&child->perf_event_mutex);
> +
> for_each_task_context_nr(ctxn)
> perf_event_exit_task_context(child, ctxn);
> }
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/