[RFC PATCH v2 2/3] tracefs: add instances support for uprobe events

From: Hari Bathini
Date: Wed Jul 27 2016 - 17:27:59 EST


If a uprobe event is set on a library function, and if a similar uprobe
event trace is needed for a container, a duplicate is created leaving
the uprobe list with multiple entries of the same function:

$ perf probe --list
probe_libc:malloc (on 0x80490 in /lib64/libc.so.6)
probe_libc:malloc_1 (on __libc_malloc in /lib64/libc.so.6)
$

This can soon get out of hand if multiple containers want to probe the
same function/address in their libraries. This patch tries to resolve this
by adding uprobe event trace files to every new instance. Currently, perf
tool can leverage this by using --debugfs-dir option - something like
(assuming instance dir name is 'tracing'):

$ perf --debugfs-dir=$MOUNT_PNT/instances probe /lib64/libc.so.6 malloc
$
$
$ perf --debugfs-dir=$MOUNT_PNT/instances probe --list
probe_libc:malloc (on __libc_malloc in /lib64/libc.so.6)
$

New uprobe events can be added to the uprobe_events file under the instance
directory and the profile information for these events will be available in
uprobe_profile file in the same instance directory.

Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxxxxxxx>
---
include/linux/trace_events.h | 3 +
kernel/trace/trace.c | 2 +
kernel/trace/trace.h | 12 +++
kernel/trace/trace_events.c | 15 +++-
kernel/trace/trace_kprobe.c | 2 -
kernel/trace/trace_uprobe.c | 158 +++++++++++++++++++++++++++++++-----------
6 files changed, 144 insertions(+), 48 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index be00761..f893223 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -451,7 +451,8 @@ extern int trace_event_raw_init(struct trace_event_call *call);
extern int trace_define_field(struct trace_event_call *call, const char *type,
const char *name, int offset, int size,
int is_signed, int filter_type);
-extern int trace_add_event_call(struct trace_event_call *call);
+extern int trace_add_event_call(struct trace_event_call *call,
+ struct trace_array *tr);
extern int trace_remove_event_call(struct trace_event_call *call);
extern int trace_event_get_offsets(struct trace_event_call *call);

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a4bd6b..23a8111 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6966,6 +6966,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
&tr->max_latency, &tracing_max_lat_fops);
#endif

+ uprobe_create_trace_files(tr, d_tracer);
+
if (ftrace_create_function_files(tr, d_tracer))
WARN(1, "Could not allocate function filter files");

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5167c36..a8360e9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -245,6 +245,10 @@ struct trace_array {
struct list_head events;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
int ref;
+#ifdef CONFIG_UPROBE_EVENT
+ struct mutex uprobe_lock;
+ struct list_head uprobe_list;
+#endif
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops *ops;
/* function tracing enabled */
@@ -819,6 +823,14 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)

extern struct list_head ftrace_pids;

+#ifdef CONFIG_UPROBE_EVENT
+void uprobe_create_trace_files(struct trace_array *tr,
+ struct dentry *parent);
+#else
+static inline void
+uprobe_create_trace_files(struct trace_array *tr, struct dentry *parent) { }
+#endif /* CONFIG_UPROBE_EVENT */
+
#ifdef CONFIG_FUNCTION_TRACER
extern bool ftrace_filter_param __initdata;
static inline int ftrace_trace_task(struct task_struct *task)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3d41558..2e0f986 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2441,15 +2441,20 @@ struct ftrace_module_file_ops;
static void __add_event_to_tracers(struct trace_event_call *call);

/* Add an additional event_call dynamically */
-int trace_add_event_call(struct trace_event_call *call)
+int trace_add_event_call(struct trace_event_call *call, struct trace_array *tr)
{
int ret;
mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);

ret = __register_event(call, NULL);
- if (ret >= 0)
- __add_event_to_tracers(call);
+ if (ret >= 0) {
+ if (tr)
+ /* If a tracer is specified, add event only to it */
+ __trace_add_new_event(call, tr);
+ else
+ __add_event_to_tracers(call);
+ }

mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
@@ -2609,6 +2614,10 @@ __trace_add_event_dirs(struct trace_array *tr)
int ret;

list_for_each_entry(call, &ftrace_events, list) {
+ /* Don't add dynamic uprobe events to new tracers */
+ if (call->flags & TRACE_EVENT_FL_UPROBE)
+ continue;
+
ret = __trace_add_new_event(call, tr);
if (ret < 0)
pr_warn("Could not create directory for event %s\n",
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5546eec..b82a328 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1296,7 +1296,7 @@ static int register_kprobe_event(struct trace_kprobe *tk)
call->flags = TRACE_EVENT_FL_KPROBE;
call->class->reg = kprobe_register;
call->data = tk;
- ret = trace_add_event_call(call);
+ ret = trace_add_event_call(call, NULL);
if (ret) {
pr_info("Failed to register kprobe event: %s\n",
trace_event_name(call));
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c534854..ea8c4e4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -64,12 +64,10 @@ struct trace_uprobe {
(offsetof(struct trace_uprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))

-static int register_uprobe_event(struct trace_uprobe *tu);
+static int register_uprobe_event(struct trace_array *tr,
+ struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);

-static DEFINE_MUTEX(uprobe_lock);
-static LIST_HEAD(uprobe_list);
-
struct uprobe_dispatch_data {
struct trace_uprobe *tu;
unsigned long bp_addr;
@@ -288,11 +286,12 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
kfree(tu);
}

-static struct trace_uprobe *find_probe_event(const char *event, const char *group)
+static struct trace_uprobe *
+find_probe_event(struct trace_array *tr, const char *event, const char *group)
{
struct trace_uprobe *tu;

- list_for_each_entry(tu, &uprobe_list, list)
+ list_for_each_entry(tu, &tr->uprobe_list, list)
if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
strcmp(tu->tp.call.class->system, group) == 0)
return tu;
@@ -315,15 +314,16 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
}

/* Register a trace_uprobe and probe_event */
-static int register_trace_uprobe(struct trace_uprobe *tu)
+static int register_trace_uprobe(struct trace_array *tr,
+ struct trace_uprobe *tu)
{
struct trace_uprobe *old_tu;
int ret;

- mutex_lock(&uprobe_lock);
+ mutex_lock(&tr->uprobe_lock);

/* register as an event */
- old_tu = find_probe_event(trace_event_name(&tu->tp.call),
+ old_tu = find_probe_event(tr, trace_event_name(&tu->tp.call),
tu->tp.call.class->system);
if (old_tu) {
/* delete old event */
@@ -332,16 +332,16 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
goto end;
}

- ret = register_uprobe_event(tu);
+ ret = register_uprobe_event(tr, tu);
if (ret) {
pr_warn("Failed to register probe event(%d)\n", ret);
goto end;
}

- list_add_tail(&tu->list, &uprobe_list);
+ list_add_tail(&tu->list, &tr->uprobe_list);

end:
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&tr->uprobe_lock);

return ret;
}
@@ -352,7 +352,7 @@ end:
*
* - Remove uprobe: -:[GRP/]EVENT
*/
-static int create_trace_uprobe(int argc, char **argv)
+static int create_trace_uprobe(struct trace_array *tr, int argc, char **argv)
{
struct trace_uprobe *tu;
struct inode *inode;
@@ -409,17 +409,17 @@ static int create_trace_uprobe(int argc, char **argv)
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
- mutex_lock(&uprobe_lock);
- tu = find_probe_event(event, group);
+ mutex_lock(&tr->uprobe_lock);
+ tu = find_probe_event(tr, event, group);

if (!tu) {
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&tr->uprobe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
ret = unregister_trace_uprobe(tu);
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&tr->uprobe_lock);
return ret;
}

@@ -543,7 +543,7 @@ static int create_trace_uprobe(int argc, char **argv)
}
}

- ret = register_trace_uprobe(tu);
+ ret = register_trace_uprobe(tr, tu);
if (ret)
goto error;
return 0;
@@ -560,37 +560,45 @@ fail_address_parse:
return ret;
}

-static int cleanup_all_probes(void)
+static int cleanup_all_probes(struct trace_array *tr)
{
struct trace_uprobe *tu;
int ret = 0;

- mutex_lock(&uprobe_lock);
- while (!list_empty(&uprobe_list)) {
- tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
+ mutex_lock(&tr->uprobe_lock);
+ while (!list_empty(&tr->uprobe_list)) {
+ tu = list_entry(tr->uprobe_list.next,
+ struct trace_uprobe,
+ list);
ret = unregister_trace_uprobe(tu);
if (ret)
break;
}
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&tr->uprobe_lock);
return ret;
}

/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
- mutex_lock(&uprobe_lock);
- return seq_list_start(&uprobe_list, *pos);
+ struct trace_array *tr = m->file->f_inode->i_private;
+
+ mutex_lock(&tr->uprobe_lock);
+ return seq_list_start(&tr->uprobe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
- return seq_list_next(v, &uprobe_list, pos);
+ struct trace_array *tr = m->file->f_inode->i_private;
+
+ return seq_list_next(v, &tr->uprobe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
- mutex_unlock(&uprobe_lock);
+ struct trace_array *tr = m->file->f_inode->i_private;
+
+ mutex_unlock(&tr->uprobe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
@@ -635,9 +643,10 @@ static const struct seq_operations probes_seq_op = {
static int probes_open(struct inode *inode, struct file *file)
{
int ret;
+ struct trace_array *tr = inode->i_private;

if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = cleanup_all_probes();
+ ret = cleanup_all_probes(tr);
if (ret)
return ret;
}
@@ -645,10 +654,72 @@ static int probes_open(struct inode *inode, struct file *file)
return seq_open(file, &probes_seq_op);
}

+#define WRITE_BUFSIZE 4096
+
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
+ char *kbuf, *tmp;
+ char **argv;
+ int argc;
+ int ret = 0;
+ size_t done = 0;
+ size_t size;
+ struct trace_array *tr = file->f_inode->i_private;
+
+ kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ while (done < count) {
+ size = count - done;
+
+ if (size >= WRITE_BUFSIZE)
+ size = WRITE_BUFSIZE - 1;
+
+ if (copy_from_user(kbuf, buffer + done, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ kbuf[size] = '\0';
+ tmp = strchr(kbuf, '\n');
+
+ if (tmp) {
+ *tmp = '\0';
+ size = tmp - kbuf + 1;
+ } else if (done + size < count) {
+ pr_warn("Line length is too long: Should be less than %d\n",
+ WRITE_BUFSIZE);
+ ret = -EINVAL;
+ goto out;
+ }
+ done += size;
+ /* Remove comments */
+ tmp = strchr(kbuf, '#');
+
+ if (tmp)
+ *tmp = '\0';
+
+ argc = 0;
+ argv = argv_split(GFP_KERNEL, kbuf, &argc);
+ if (!argv) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (argc)
+ ret = create_trace_uprobe(tr, argc, argv);
+
+ argv_free(argv);
+ if (ret)
+ goto out;
+ }
+ ret = done;
+
+out:
+ kfree(kbuf);
+
+ return ret;
}

static const struct file_operations uprobe_events_ops = {
@@ -1290,7 +1361,8 @@ static struct trace_event_functions uprobe_funcs = {
.trace = print_uprobe_event
};

-static int register_uprobe_event(struct trace_uprobe *tu)
+static int register_uprobe_event(struct trace_array *tr,
+ struct trace_uprobe *tu)
{
struct trace_event_call *call = &tu->tp.call;
int ret;
@@ -1312,7 +1384,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
- ret = trace_add_event_call(call);
+ ret = trace_add_event_call(call, tr);

if (ret) {
pr_info("Failed to register uprobe event: %s\n",
@@ -1338,20 +1410,20 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
}

/* Make a trace interface for controling probe points */
-static __init int init_uprobe_trace(void)
+void uprobe_create_trace_files(struct trace_array *tr,
+ struct dentry *parent)
{
- struct dentry *d_tracer;
+ if (!tr) {
+ WARN(1, "Need a trace array for uprobe events");
+ return;
+ }

- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
- return 0;
+ mutex_init(&tr->uprobe_lock);
+ INIT_LIST_HEAD(&tr->uprobe_list);

- trace_create_file("uprobe_events", 0644, d_tracer,
- NULL, &uprobe_events_ops);
+ trace_create_file("uprobe_events", 0644, parent,
+ tr, &uprobe_events_ops);
/* Profile interface */
- trace_create_file("uprobe_profile", 0444, d_tracer,
- NULL, &uprobe_profile_ops);
- return 0;
+ trace_create_file("uprobe_profile", 0444, parent,
+ tr, &uprobe_profile_ops);
}
-
-fs_initcall(init_uprobe_trace);