[RFC][PATCH 3/4] tracing: Implement event pid filtering

From: Steven Rostedt
Date: Thu Oct 22 2015 - 12:10:59 EST


From: "Steven Rostedt (Red Hat)" <rostedt@xxxxxxxxxxx>

Add the necessary hooks to use the pids loaded in set_event_pid to filter
all the events enabled in the tracing instance that match the pids listed.

Two probes are added to both sched_switch and sched_wakeup tracepoints to be
called before other probes are called and after the other probes are called.
The first is used to set the necessary flags to let the probes know to test
if they should be traced or not.

The sched_switch pre probe will set the "ignore_pid" flag if neither the
previous or next task has a matching pid.

The sched_switch probe will set the "ignore_pid" flag if the next task
does not match the matching pid.

The pre probe allows for probes tracing sched_switch to be traced if
necessary.

The sched_wakeup pre probe will set the "ignore_pid" flag if neither the
current task nor the wakee task has a matching pid.

The sched_wakeup post probe will set the "ignore_pid" flag if the current
task does not have a matching pid.

Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
include/linux/trace_events.h | 7 ++
kernel/trace/trace.h | 2 +
kernel/trace/trace_events.c | 148 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f85693bbcdc3..429fdfc3baf5 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -328,6 +328,7 @@ enum {
EVENT_FILE_FL_SOFT_DISABLED_BIT,
EVENT_FILE_FL_TRIGGER_MODE_BIT,
EVENT_FILE_FL_TRIGGER_COND_BIT,
+ EVENT_FILE_FL_PID_FILTER_BIT,
};

/*
@@ -341,6 +342,7 @@ enum {
* tracepoint may be enabled)
* TRIGGER_MODE - When set, invoke the triggers associated with the event
* TRIGGER_COND - When set, one or more triggers has an associated filter
+ * PID_FILTER - When set, the event is filtered based on pid
*/
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -351,6 +353,7 @@ enum {
EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
+ EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
};

struct trace_event_file {
@@ -429,6 +432,8 @@ extern enum event_trigger_type event_triggers_call(struct trace_event_file *file
extern void event_triggers_post_call(struct trace_event_file *file,
enum event_trigger_type tt);

+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
+
/**
* trace_trigger_soft_disabled - do triggers and test if soft disabled
* @file: The file pointer of the event to test
@@ -448,6 +453,8 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
event_triggers_call(file, NULL);
if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
return true;
+ if (eflags & EVENT_FILE_FL_PID_FILTER)
+ return trace_event_ignore_this_pid(file);
}
return false;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 250481043bb5..89ffdaf3e371 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -156,6 +156,8 @@ struct trace_array_cpu {
pid_t pid;
kuid_t uid;
char comm[TASK_COMM_LEN];
+
+ bool ignore_pid;
};

struct tracer;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2ad7014707ee..ab07058e27c1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -22,6 +22,8 @@
#include <linux/slab.h>
#include <linux/delay.h>

+#include <trace/events/sched.h>
+
#include <asm/setup.h>

#include "trace_output.h"
@@ -212,12 +214,32 @@ int trace_event_raw_init(struct trace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);

+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
+{
+ struct trace_array *tr = trace_file->tr;
+ struct trace_array_cpu *data;
+ struct trace_pid_list *pid_list;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+ if (!pid_list)
+ return false;
+
+ data = this_cpu_ptr(tr->trace_buffer.data);
+
+ return data->ignore_pid;
+}
+EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
+
void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
struct trace_event_file *trace_file,
unsigned long len)
{
struct trace_event_call *event_call = trace_file->event_call;

+ if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
+ trace_event_ignore_this_pid(trace_file))
+ return NULL;
+
local_save_flags(fbuffer->flags);
fbuffer->pc = preempt_count();
fbuffer->trace_file = trace_file;
@@ -459,15 +481,114 @@ static int cmp_pid(const void *key, const void *elt)
return 1;
}

+static bool
+check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+ pid_t search_pid;
+ pid_t *pid;
+
+ /*
+ * Return false, because if filtered_pids does not exist,
+ * all pids are good to trace.
+ */
+ if (!filtered_pids)
+ return false;
+
+ search_pid = task->pid;
+
+ pid = bsearch(&search_pid, filtered_pids->pids,
+ filtered_pids->nr_pids, sizeof(pid_t),
+ cmp_pid);
+ if (!pid)
+ return true;
+
+ return false;
+}
+
+static void
+event_filter_pid_sched_switch_probe_pre(void *data,
+ struct task_struct *prev, struct task_struct *next)
+{
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+ this_cpu_write(tr->trace_buffer.data->ignore_pid,
+ check_ignore_pid(pid_list, prev) &&
+ check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_switch_probe_post(void *data,
+ struct task_struct *prev, struct task_struct *next)
+{
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+ this_cpu_write(tr->trace_buffer.data->ignore_pid,
+ check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
+{
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;
+
+ /* Nothing to do if we are already tracing */
+ if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
+ return;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+ this_cpu_write(tr->trace_buffer.data->ignore_pid,
+ check_ignore_pid(pid_list, task));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
+{
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;
+
+ /* Nothing to do if we are not tracing */
+ if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
+ return;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+ /* Set tracing if current is enabled */
+ this_cpu_write(tr->trace_buffer.data->ignore_pid,
+ check_ignore_pid(pid_list, current));
+}
+
static void __ftrace_clear_event_pids(struct trace_array *tr)
{
struct trace_pid_list *pid_list;
+ struct trace_event_file *file;
+ int cpu;

pid_list = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
if (!pid_list)
return;

+ unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
+ unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
+
+ unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
+ unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
+
+ list_for_each_entry(file, &tr->events, list) {
+ clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+ }
+
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
+
rcu_assign_pointer(tr->filtered_pids, NULL);

/* Wait till all users are no longer using pid filtering */
@@ -1429,13 +1550,14 @@ static int max_pids(struct trace_pid_list *pid_list)
}

static ssize_t
-ftrace_event_pid_write(struct file *file, const char __user *ubuf,
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct seq_file *m = file->private_data;
+ struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *pid_list = NULL;
+ struct trace_event_file *file;
struct trace_parser parser;
unsigned long val;
loff_t this_pos;
@@ -1564,15 +1686,35 @@ ftrace_event_pid_write(struct file *file, const char __user *ubuf,

rcu_assign_pointer(tr->filtered_pids, pid_list);

- mutex_unlock(&event_mutex);
+ list_for_each_entry(file, &tr->events, list) {
+ set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+ }

if (filtered_pids) {
synchronize_sched();

free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
kfree(filtered_pids);
+ } else {
+ /*
+ * Register a probe that is called before all other probes
+ * to set ignore_pid if next or prev do not match.
+ * Register a probe this is called after all other probes
+ * to only keep ignore_pid set if next pid matches.
+ */
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+ tr, 0);
+
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
}

+ mutex_unlock(&event_mutex);
+
ret = read;
*ppos += read;

--
2.6.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/