[PATCH v5 1/3] tracing: Add support for recording tgid of tasks

From: Joel Fernandes
Date: Mon Jun 26 2017 - 22:03:53 EST


Inorder to support recording of tgid, the following changes are made:

* Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
along with the task's comm at the same time. This has has the benefit of not
setting trace_cmdline_save before all the information for a task is saved.
* Add a new API tracing_record_taskinfo_sched_switch to record task information
for 2 tasks at a time (previous and next) and use it from sched_switch probe.
* Preserve the old API (tracing_record_cmdline) and create it as a wrapper
around the new one so that existing callers aren't affected.
* Reuse the existing sched_switch and sched_wakeup probes to record tgid
information and add a new option 'record-tgid' to enable recording of tgid

When record-tgid option isn't enabled to being with, we take care to make sure
that there's isn't memory or runtime overhead.

Cc: kernel-team@xxxxxxxxxxx
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Tested-by: Michael Sartain <mikesart@xxxxxxxxx>
Signed-off-by: Joel Fernandes <joelaf@xxxxxxxxxx>
---
Hi Steven, Here's the reworked patch addressing the allocator concerns. I'm
just sending you this single patch since all our discussion in the morning was
on this patch. Let me know if you still want me to resend 2/3 and 3/3 which
are the same as v4 sent last evening.

include/linux/trace_events.h | 13 ++++-
kernel/trace/trace.c | 105 ++++++++++++++++++++++++++++++++++----
kernel/trace/trace.h | 7 +++
kernel/trace/trace_events.c | 42 ++++++++++++++-
kernel/trace/trace_sched_switch.c | 72 +++++++++++++++++++++-----
5 files changed, 213 insertions(+), 26 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..f73cedfa2e0b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -151,7 +151,15 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
int type, unsigned long len,
unsigned long flags, int pc);

-void tracing_record_cmdline(struct task_struct *tsk);
+#define TRACE_RECORD_CMDLINE BIT(0)
+#define TRACE_RECORD_TGID BIT(1)
+
+void tracing_record_taskinfo(struct task_struct *task, int flags);
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+ struct task_struct *next, int flags);
+
+void tracing_record_cmdline(struct task_struct *task);
+void tracing_record_tgid(struct task_struct *task);

int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);

@@ -290,6 +298,7 @@ struct trace_subsystem_dir;
enum {
EVENT_FILE_FL_ENABLED_BIT,
EVENT_FILE_FL_RECORDED_CMD_BIT,
+ EVENT_FILE_FL_RECORDED_TGID_BIT,
EVENT_FILE_FL_FILTERED_BIT,
EVENT_FILE_FL_NO_SET_FILTER_BIT,
EVENT_FILE_FL_SOFT_MODE_BIT,
@@ -303,6 +312,7 @@ enum {
* Event file flags:
* ENABLED - The event is enabled
* RECORDED_CMD - The comms should be recorded at sched_switch
+ * RECORDED_TGID - The tgids should be recorded at sched_switch
* FILTERED - The event has a filter attached
* NO_SET_FILTER - Set when filter has error and is to be ignored
* SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
@@ -315,6 +325,7 @@ enum {
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
+ EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT),
EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1122f151466f..45a289171def 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
* tracing is active, only save the comm when a trace event
* occurred.
*/
-static DEFINE_PER_CPU(bool, trace_cmdline_save);
+static DEFINE_PER_CPU(bool, trace_taskinfo_save);

/*
* Kill all tracing for good (never come back).
@@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
static __always_inline void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
- __this_cpu_write(trace_cmdline_save, true);
+ __this_cpu_write(trace_taskinfo_save, true);

/* If this is the temp buffer, we need to commit fully */
if (this_cpu_read(trace_buffered_event) == event) {
@@ -1709,6 +1709,8 @@ void tracing_reset_all_online_cpus(void)
}
}

+static int *tgid_map;
+
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1722,7 +1724,7 @@ struct saved_cmdlines_buffer {
static struct saved_cmdlines_buffer *savedcmd;

/* temporary disable recording */
-static atomic_t trace_record_cmdline_disabled __read_mostly;
+static atomic_t trace_record_taskinfo_disabled __read_mostly;

static inline char *get_saved_cmdlines(int idx)
{
@@ -1992,16 +1994,87 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}

-void tracing_record_cmdline(struct task_struct *tsk)
+int trace_find_tgid(int pid)
+{
+ if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
+ return 0;
+
+ return tgid_map[pid];
+}
+
+static int trace_save_tgid(struct task_struct *tsk)
{
- if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
+ if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+ return 0;
+
+ tgid_map[tsk->pid] = tsk->tgid;
+ return 1;
+}
+
+static bool tracing_record_taskinfo_skip(int flags)
+{
+ if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
+ return true;
+ if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
+ return true;
+ if (!__this_cpu_read(trace_taskinfo_save))
+ return true;
+ return false;
+}
+
+/**
+ * tracing_record_taskinfo - record the task info of a task
+ *
+ * @task - task to record
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * - TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo(struct task_struct *task, int flags)
+{
+ if (tracing_record_taskinfo_skip(flags))
+ return;
+ if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
+ return;
+ if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
return;

- if (!__this_cpu_read(trace_cmdline_save))
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/**
+ * tracing_record_taskinfo_sched_switch - record task info for sched_switch
+ *
+ * @prev - previous task during sched_switch
+ * @next - next task during sched_switch
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+ struct task_struct *next, int flags)
+{
+ if (tracing_record_taskinfo_skip(flags))
return;

- if (trace_save_cmdline(tsk))
- __this_cpu_write(trace_cmdline_save, false);
+ if ((flags & TRACE_RECORD_CMDLINE) &&
+ (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
+ return;
+
+ if ((flags & TRACE_RECORD_TGID) &&
+ (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+ return;
+
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/* Helpers to record a specific task information */
+void tracing_record_cmdline(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
+}
+
+void tracing_record_tgid(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_TGID);
}

/*
@@ -3146,7 +3219,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
#endif

if (!iter->snapshot)
- atomic_inc(&trace_record_cmdline_disabled);
+ atomic_inc(&trace_record_taskinfo_disabled);

if (*pos != iter->pos) {
iter->ent = NULL;
@@ -3191,7 +3264,7 @@ static void s_stop(struct seq_file *m, void *p)
#endif

if (!iter->snapshot)
- atomic_dec(&trace_record_cmdline_disabled);
+ atomic_dec(&trace_record_taskinfo_disabled);

trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
@@ -4238,6 +4311,18 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);

+ if (mask == TRACE_ITER_RECORD_TGID) {
+ if (!tgid_map)
+ tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
+ GFP_KERNEL);
+ if (!tgid_map) {
+ tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
+ return -ENOMEM;
+ }
+
+ trace_event_enable_tgid_record(enabled);
+ }
+
if (mask == TRACE_ITER_EVENT_FORK)
trace_event_follow_fork(tr, enabled);

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 39fd77330aab..dd12f8d83631 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -637,6 +637,9 @@ void set_graph_array(struct trace_array *tr);

void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
+void tracing_start_tgid_record(void);
+void tracing_stop_tgid_record(void);
+
int register_tracer(struct tracer *type);
int is_tracing_stopped(void);

@@ -697,6 +700,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern u64 ftrace_now(int cpu);

extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
extern void trace_event_follow_fork(struct trace_array *tr, bool enable);

#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1107,6 +1111,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
C(LATENCY_FMT, "latency-format"), \
C(RECORD_CMD, "record-cmd"), \
+ C(RECORD_TGID, "record-tgid"), \
C(OVERWRITE, "overwrite"), \
C(STOP_ON_FREE, "disable_on_free"), \
C(IRQ_INFO, "irq-info"), \
@@ -1423,6 +1428,8 @@ struct ftrace_event_field *
trace_find_event_field(struct trace_event_call *call, char *name);

extern void trace_event_enable_cmd_record(bool enable);
+extern void trace_event_enable_tgid_record(bool enable);
+
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e7973e10398c..240c6df95ea6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
mutex_unlock(&event_mutex);
}

+void trace_event_enable_tgid_record(bool enable)
+{
+ struct trace_event_file *file;
+ struct trace_array *tr;
+
+ mutex_lock(&event_mutex);
+ do_for_each_event_file(tr, file) {
+ if (!(file->flags & EVENT_FILE_FL_ENABLED))
+ continue;
+
+ if (enable) {
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ } else {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
+ &file->flags);
+ }
+ } while_for_each_event_file();
+ mutex_unlock(&event_mutex);
+}
+
static int __ftrace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable)
{
@@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
tracing_stop_cmdline_record();
clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
+ }
+
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
@@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
}

if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
+ bool cmd = false, tgid = false;

/* Keep the event disabled, when going to SOFT_MODE. */
if (soft_disable)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);

if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+ cmd = true;
tracing_start_cmdline_record();
set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ tgid = true;
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ }
+
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
if (ret) {
- tracing_stop_cmdline_record();
+ if (cmd)
+ tracing_stop_cmdline_record();
+ if (tgid)
+ tracing_stop_tgid_record();
pr_info("event trace: Could not enable event "
"%s\n", trace_event_name(call));
break;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4c896a0101bd..b341c02730be 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -12,27 +12,38 @@

#include "trace.h"

-static int sched_ref;
+#define RECORD_CMDLINE 1
+#define RECORD_TGID 2
+
+static int sched_cmdline_ref;
+static int sched_tgid_ref;
static DEFINE_MUTEX(sched_register_mutex);

static void
probe_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
- if (unlikely(!sched_ref))
- return;
+ int flags;
+
+ flags = (RECORD_TGID * !!sched_tgid_ref) +
+ (RECORD_CMDLINE * !!sched_cmdline_ref);

- tracing_record_cmdline(prev);
- tracing_record_cmdline(next);
+ if (!flags)
+ return;
+ tracing_record_taskinfo_sched_switch(prev, next, flags);
}

static void
probe_sched_wakeup(void *ignore, struct task_struct *wakee)
{
- if (unlikely(!sched_ref))
- return;
+ int flags;
+
+ flags = (RECORD_TGID * !!sched_tgid_ref) +
+ (RECORD_CMDLINE * !!sched_cmdline_ref);

- tracing_record_cmdline(current);
+ if (!flags)
+ return;
+ tracing_record_taskinfo(current, flags);
}

static int tracing_sched_register(void)
@@ -75,28 +86,61 @@ static void tracing_sched_unregister(void)
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
}

-static void tracing_start_sched_switch(void)
+static void tracing_start_sched_switch(int ops)
{
+ bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
mutex_lock(&sched_register_mutex);
- if (!(sched_ref++))
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref++;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref++;
+ break;
+ }
+
+ if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
tracing_sched_register();
mutex_unlock(&sched_register_mutex);
}

-static void tracing_stop_sched_switch(void)
+static void tracing_stop_sched_switch(int ops)
{
mutex_lock(&sched_register_mutex);
- if (!(--sched_ref))
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref--;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref--;
+ break;
+ }
+
+ if (!sched_cmdline_ref && !sched_tgid_ref)
tracing_sched_unregister();
mutex_unlock(&sched_register_mutex);
}

void tracing_start_cmdline_record(void)
{
- tracing_start_sched_switch();
+ tracing_start_sched_switch(RECORD_CMDLINE);
}

void tracing_stop_cmdline_record(void)
{
- tracing_stop_sched_switch();
+ tracing_stop_sched_switch(RECORD_CMDLINE);
+}
+
+void tracing_start_tgid_record(void)
+{
+ tracing_start_sched_switch(RECORD_TGID);
+}
+
+void tracing_stop_tgid_record(void)
+{
+ tracing_stop_sched_switch(RECORD_TGID);
}
--
2.13.1.611.g7e3b11ae1-goog