Re: use-after-free in __perf_install_in_context

From: Dmitry Vyukov
Date: Fri Jan 08 2016 - 03:40:38 EST


On Tue, Jan 5, 2016 at 1:17 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> On Thu, Dec 31, 2015 at 06:15:41PM +0100, Dmitry Vyukov wrote:
>> On Thu, Dec 17, 2015 at 3:43 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>> > On Thu, Dec 17, 2015 at 03:35:32PM +0100, Dmitry Vyukov wrote:
>> >> In short, I did not see use-after-frees but perf_event_open fuzzing
>> >> started hanging VMs very frequently, so testing is inconclusive.
>> >
>> > Right, I'll get back to staring at that one.
>> >
>> > Running your test case on bare metal got my machine reproducably stuck
>> > but in a different place than you've reported.
>> >
>> > Takes a while too, so far no luck in explaining/debugging it. All my
>> > added debug code remains silent :/
>> >
>> > I'll go think up more ..
>>
>> Hi Peter,
>>
>> Any progress with this?
>> I can't continue to test perf subsytem, as it reliably hangs my machines.
>
> Holidays got in the way, I'll continue asap when I've cleared some of
> the easy mail backlog from the past two weeks.


Hi Peter,

I've hit the following WARNING twice over night run. This is on
b06f3a168cdcd80026276898fd1fee443ef25743 (Jan 6) + the following patch
from you. I can't easily reproduce it. Does it say something to you?
It looks like it all happen in the context of a single perf_event_open
call.


------------[ cut here ]------------
WARNING: CPU: 2 PID: 2523 at kernel/events/core.c:1891
event_sched_in.isra.95+0x84b/0xcd0()
Modules linked in:
CPU: 2 PID: 2523 Comm: syz-executor Not tainted 4.4.0-rc8+ #206
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
00000000ffffffff ffff880042d7f7d8 ffffffff8289a56d 0000000000000000
ffff8800590b9780 ffffffff85db2300 ffff880042d7f818 ffffffff812eb849
ffffffff815ae34b ffffffff85db2300 0000000000000763 ffff88005f35f4e0
Call Trace:
[< inline >] __dump_stack lib/dump_stack.c:15
[<ffffffff8289a56d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
[<ffffffff812eb849>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460
[<ffffffff812eba79>] warn_slowpath_null+0x29/0x30 kernel/panic.c:493
[<ffffffff815ae34b>] event_sched_in.isra.95+0x84b/0xcd0
kernel/events/core.c:1891
[<ffffffff815b4164>] group_sched_in+0x114/0x410 kernel/events/core.c:1963
[< inline >] ctx_pinned_sched_in kernel/events/core.c:2744
[<ffffffff815b5085>] ctx_sched_in+0xc25/0x11b0 kernel/events/core.c:2807
[<ffffffff815b5657>] perf_event_sched_in+0x47/0xa0 kernel/events/core.c:2073
[<ffffffff815b587c>] perf_event_context_sched_in+0x1cc/0x380
kernel/events/core.c:2844
[<ffffffff815bf4f3>] __perf_event_task_sched_in+0x3a3/0x480
kernel/events/core.c:2872
[< inline >] perf_event_task_sched_in include/linux/perf_event.h:931
[<ffffffff8136d40f>] finish_task_switch+0x3cf/0x610 kernel/sched/core.c:2576
[<ffffffff85c7b08d>] __schedule+0x902016/01/07 20:52:38 executing program 2:
[<ffffffff85c7cbc2>] preempt_schedule_common+0x42/0x70 kernel/sched/core.c:3250
[<ffffffff85c7cc07>] _cond_resched+0x17/0x20 kernel/sched/core.c:4618
[< inline >] slab_pre_alloc_hook mm/slub.c:1317
[< inline >] slab_alloc_node mm/slub.c:2520
[< inline >] slab_alloc mm/slub.c:2602
[<ffffffff816cba4b>] kmem_cache_alloc_trace+0x24b/0x2f0 mm/slub.c:2619
[< inline >] kmalloc include/linux/slab.h:458
[< inline >] kzalloc include/linux/slab.h:602
[< inline >] aa_alloc_file_context
security/apparmor/include/context.h:46
[<ffffffff826a9c85>] apparmor_file_alloc_security+0x45/0xa0
security/apparmor/lsm.c:402
[<ffffffff825e2a1d>] security_file_alloc+0x6d/0xa0 security/security.c:742
[<ffffffff81716aba>] get_empty_filp+0x13a/0x3f0 fs/file_table.c:128
[<ffffffff81716d90>] alloc_file+0x20/0x380 fs/file_table.c:163
[<ffffffff817f4fdd>] anon_inode_getfile+0x19d/0x340 fs/anon_inodes.c:105
[<ffffffff815c7daa>] SYSC_perf_event_open+0x12da/0x1fa0
kernel/events/core.c:8471
[<ffffffff815cf609>] SyS_perf_event_open+0x39/0x50 kernel/events/core.c:8262
[<ffffffff85c8a2b6>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
---[ end trace 326ea08cc5f1b454 ]---


diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef2d6ea..fdf78fb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -881,13 +881,22 @@ static void free_ctx(struct rcu_head *head)
kfree(ctx);
}

+#define for_each_task_context_nr(ctxn) \
+ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
+
static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
- if (ctx->task)
+ if (ctx->task) {
+ int ctxn;
+
+ for_each_task_context_nr(ctxn)
+
WARN_ON(ctx->task->perf_event_ctxp[ctxn] == ctx);
+
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef2d6ea..fdf78fb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -881,13 +881,22 @@ static void free_ctx(struct rcu_head *head)
kfree(ctx);
}

+#define for_each_task_context_nr(ctxn) \
+ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
+
static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
- if (ctx->task)
+ if (ctx->task) {
+ int ctxn;
+
+ for_each_task_context_nr(ctxn)
+
WARN_ON(ctx->task->perf_event_ctxp[ctxn] == ctx);
+
put_task_struct(ctx->task);
+ }
call_rcu(&ctx->rcu_head, free_ctx);
}
}
@@ -2658,9 +2667,6 @@ static void perf_pmu_sched_task(struct task_struct *prev,
static void perf_event_switch(struct task_struct *task,
struct task_struct *next_prev, bool sched_in);

-#define for_each_task_context_nr(ctxn) \
- for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
-
/*
* Called from scheduler to remove the events of the current task,
* with interrupts disabled.
@@ -3154,15 +3160,16 @@ static int event_enable_on_exec(struct
perf_event *event,
* Enable all of a task's events that have been marked enable-on-exec.
* This expects task == current.
*/
-static void perf_event_enable_on_exec(struct perf_event_context *ctx)
+static void perf_event_enable_on_exec(int ctxn)
{
- struct perf_event_context *clone_ctx = NULL;
+ struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *event;
unsigned long flags;
int enabled = 0;
int ret;

local_irq_save(flags);
+ ctx = current->perf_event_ctxp[ctxn];
if (!ctx || !ctx->nr_events)
goto out;

@@ -3205,17 +3212,11 @@ out:

void perf_event_exec(void)
{
- struct perf_event_context *ctx;
int ctxn;

rcu_read_lock();
- for_each_task_context_nr(ctxn) {
- ctx = current->perf_event_ctxp[ctxn];
- if (!ctx)
- continue;
-
- perf_event_enable_on_exec(ctx);
- }
+ for_each_task_context_nr(ctxn)
+ perf_event_enable_on_exec(ctxn);
rcu_read_unlock();
}

@@ -8967,6 +8968,8 @@ again:

mutex_unlock(&ctx->mutex);

+ task->perf_event_ctxp[ctxn] = NULL;
+
put_ctx(ctx);
}
}