[RFC PATCH] tracing: Atomically get refcounts of event_call andtrace_array

From: Masami Hiramatsu
Date: Thu Jul 04 2013 - 07:58:53 EST


Currently ftrace_open_generic_file gets an event_file from
inode->i_private, and then locks event_mutex and gets refcount.
However, this can cause a race as below scenario;

CPU0 CPU1
open(kprobe_events)
trace_remove_event_call() open(enable)
lock event_mutex get event_file from inode->i_private
event_remove() wait for unlock event_mutex
...
free event_file
unlock event_mutex
lock event_mutex
add refcount of event_file->call (*)

So, at (*) point, the event_file is already freed and we
may access the corrupted object.
The same thing could happen on trace_array because it is also
directly accessed from event_file.

To avoid this, when opening events/*/*/enable, we must atomically
do; ensure the ftrace_event_file object still exists on a trace_array,
and get refcounts of event_file->call and the trace_array.


CPU0 CPU1
open(kprobe_events)
trace_remove_event_call() open(enable)
lock event_mutex get event_file from inode->i_private
event_remove() wait for unlock event_mutex
...
free event_file
unlock event_mutex
lock event_mutex
search the event_file and failed
unlock event_mutex
return -ENODEV

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
---
kernel/trace/trace_events.c | 58 +++++++++++++++++++++++++++++++++----------
1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1a5547e..db6b107 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -391,15 +391,24 @@ static void __get_system_dir(struct ftrace_subsystem_dir *dir)
__get_system(dir->subsystem);
}

-static int ftrace_event_call_get(struct ftrace_event_call *call)
+static int __ftrace_event_call_get(struct ftrace_event_call *call)
{
int ret = 0;

- mutex_lock(&event_mutex);
if ((call->flags & TRACE_EVENT_FL_REF_MASK) == TRACE_EVENT_FL_REF_MAX - 1)
ret = -EBUSY;
else
call->flags++;
+
+ return ret;
+}
+
+static int ftrace_event_call_get(struct ftrace_event_call *call)
+{
+ int ret = 0;
+
+ mutex_lock(&event_mutex);
+ ret = __ftrace_event_call_get(call);
mutex_unlock(&event_mutex);

return ret;
@@ -413,6 +422,35 @@ static void ftrace_event_call_put(struct ftrace_event_call *call)
mutex_unlock(&event_mutex);
}

+static int ftrace_event_file_get(struct ftrace_event_file *this_file)
+{
+ struct ftrace_event_file *file;
+ struct trace_array *tr;
+ int ret = -ENODEV;
+
+ mutex_lock(&event_mutex);
+ do_for_each_event_file(tr, file) {
+ if (file == this_file) {
+ ret = __ftrace_event_call_get(file->event_call);
+ if (!ret)
+ tr->ref++;
+ goto out_unlock;
+ }
+ } while_for_each_event_file();
+ out_unlock:
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static void ftrace_event_file_put(struct ftrace_event_file *file)
+{
+ struct trace_array *tr = file->tr;
+
+ ftrace_event_call_put(file->event_call);
+ trace_array_put(tr);
+}
+
static void __put_system_dir(struct ftrace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
@@ -438,33 +476,27 @@ static void put_system(struct ftrace_subsystem_dir *dir)
static int tracing_open_generic_file(struct inode *inode, struct file *filp)
{
struct ftrace_event_file *file = inode->i_private;
- struct trace_array *tr = file->tr;
int ret;

- if (trace_array_get(tr) < 0)
- return -ENODEV;
-
- ret = tracing_open_generic(inode, filp);
+ ret = ftrace_event_file_get(file);
if (ret < 0)
- goto fail;
+ return ret;

- ret = ftrace_event_call_get(file->event_call);
+ ret = tracing_open_generic(inode, filp);
if (ret < 0)
goto fail;

return 0;
fail:
- trace_array_put(tr);
+ ftrace_event_file_put(file);
return ret;
}

static int tracing_release_generic_file(struct inode *inode, struct file *filp)
{
struct ftrace_event_file *file = inode->i_private;
- struct trace_array *tr = file->tr;

- ftrace_event_call_put(file->event_call);
- trace_array_put(tr);
+ ftrace_event_file_put(file);

return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/