[RFC][PATCH 5/5] perf: Grab event_mutex before taking get_online_cpus()

From: Steven Rostedt
Date: Fri May 12 2017 - 13:27:49 EST


From: "Steven Rostedt (VMware)" <rostedt@xxxxxxxxxxx>

The event_mutex is a high level lock. It should never be taken under
get_online_cpus() being held. Perf is the only user that does so. Move the
taking of event_mutex outside of get_online_cpus() and this should solve the
locking order.

Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
---
include/linux/trace_events.h | 2 ++
kernel/events/core.c | 9 +++++++++
kernel/trace/trace.h | 1 -
kernel/trace/trace_event_perf.c | 8 ++++----
4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..0a30d2b5df51 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -50,6 +50,8 @@ struct trace_event;
int trace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *event);

+extern struct mutex event_mutex;
+
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dba870ccda63..c65c3b5a92f4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4021,12 +4021,14 @@ static void unaccount_event(struct perf_event *event)

static void perf_sched_delayed(struct work_struct *work)
{
+ mutex_lock(&event_mutex);
get_online_cpus();
mutex_lock(&perf_sched_mutex);
if (atomic_dec_and_test(&perf_sched_count))
static_branch_disable_cpuslocked(&perf_sched_events);
mutex_unlock(&perf_sched_mutex);
put_online_cpus();
+ mutex_unlock(&event_mutex);
}

/*
@@ -4231,7 +4233,9 @@ static void put_event(struct perf_event *event)
if (!atomic_long_dec_and_test(&event->refcount))
return;

+ mutex_lock(&event_mutex);
_free_event(event);
+ mutex_unlock(&event_mutex);
}

/*
@@ -8917,6 +8921,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
pmu->hrtimer_interval_ms = timer;

/* update all cpuctx for this PMU */
+ mutex_lock(&event_mutex);
get_online_cpus();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
@@ -8927,6 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
put_online_cpus();
+ mutex_unlock(&event_mutex);
mutex_unlock(&mux_interval_mutex);

return count;
@@ -9879,6 +9885,7 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}

+ mutex_lock(&event_mutex);
get_online_cpus();

if (task) {
@@ -10160,6 +10167,7 @@ SYSCALL_DEFINE5(perf_event_open,
}

put_online_cpus();
+ mutex_unlock(&event_mutex);

mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
@@ -10196,6 +10204,7 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&task->signal->cred_guard_mutex);
err_cpus:
put_online_cpus();
+ mutex_lock(&event_mutex);
err_task:
if (task)
put_task_struct(task);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..4df0a8e9d000 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1430,7 +1430,6 @@ static inline void *event_file_data(struct file *filp)
return ACCESS_ONCE(file_inode(filp)->i_private);
}

-extern struct mutex event_mutex;
extern struct list_head ftrace_events;

extern const struct file_operations event_trigger_fops;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 562fa69df5d3..b8c90a024e99 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -213,7 +213,8 @@ int perf_trace_init(struct perf_event *p_event)
u64 event_id = p_event->attr.config;
int ret = -EINVAL;

- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
tp_event->class && tp_event->class->reg &&
@@ -224,17 +225,16 @@ int perf_trace_init(struct perf_event *p_event)
break;
}
}
- mutex_unlock(&event_mutex);

return ret;
}

void perf_trace_destroy(struct perf_event *p_event)
{
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
- mutex_unlock(&event_mutex);
}

int perf_trace_add(struct perf_event *p_event, int flags)
--
2.10.2