Re: [PATCH v4 1/3] tracing: Add support for recording tgid of tasks

From: Steven Rostedt
Date: Mon Jun 26 2017 - 12:52:45 EST


On Sun, 25 Jun 2017 22:38:42 -0700
Joel Fernandes <joelaf@xxxxxxxxxx> wrote:

> +int *tgid_map;
> +
> +void tracing_alloc_tgid_map(void)
> +{
> + if (tgid_map)
> + return;
> +
> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> + GFP_KERNEL);
> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");

This needs to return a value. If it fails to allocate, then we must not
set the bit to do the recording. More below about why.

> +}
> +
> #define SAVED_CMDLINES_DEFAULT 128
> #define NO_CMDLINE_MAP UINT_MAX
> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
> static struct saved_cmdlines_buffer *savedcmd;
>
> /* temporary disable recording */
> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
>
> static inline char *get_saved_cmdlines(int idx)
> {
> @@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
> preempt_enable();
> }
>
> -void tracing_record_cmdline(struct task_struct *tsk)
> +int trace_find_tgid(int pid)
> +{
> + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
> + return 0;
> +
> + return tgid_map[pid];
> +}
> +
> +static int trace_save_tgid(struct task_struct *tsk)
> +{
> + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
> + return 0;

If we failed to allocate, this will always return 0.

> +
> + tgid_map[tsk->pid] = tsk->tgid;
> + return 1;
> +}
> +
> +static bool tracing_record_taskinfo_skip(int flags)
> +{
> + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
> + return true;
> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
> + return true;
> + if (!__this_cpu_read(trace_taskinfo_save))
> + return true;
> + return false;
> +}
> +
> +/**
> + * tracing_record_taskinfo - record the task info of a task
> + *
> + * @task - task to record
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * - TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo(struct task_struct *task, int flags)
> +{
> + if (tracing_record_taskinfo_skip(flags))
> + return;
> + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
> + return;
> + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))

With a failed allocation, this will return here.

> + return;
> +
> + __this_cpu_write(trace_taskinfo_save, false);

The above will never be cleared, and this callback will constantly be
called, slowing down the tracer.


> +}
> +
> +/**
> + * tracing_record_taskinfo_sched_switch - record task info for sched_switch
> + *
> + * @prev - previous task during sched_switch
> + * @next - next task during sched_switch
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
> + struct task_struct *next, int flags)
> {
> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> + if (tracing_record_taskinfo_skip(flags))
> + return;
> +
> + if ((flags & TRACE_RECORD_CMDLINE) &&
> + (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
> return;
>
> - if (!__this_cpu_read(trace_cmdline_save))
> + if ((flags & TRACE_RECORD_TGID) &&
> + (!trace_save_tgid(prev) || !trace_save_tgid(next)))

On failed allocation, this will return here.

> return;
>
> - if (trace_save_cmdline(tsk))
> - __this_cpu_write(trace_cmdline_save, false);
> + __this_cpu_write(trace_taskinfo_save, false);

This will not be cleared.

> +}
> +
> +/* Helpers to record a specific task information */
> +void tracing_record_cmdline(struct task_struct *task)
> +{
> + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
> +}
> +
> +void tracing_record_tgid(struct task_struct *task)
> +{
> + tracing_record_taskinfo(task, TRACE_RECORD_TGID);
> }
>
> /*
> @@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
> #endif
>
> if (!iter->snapshot)
> - atomic_inc(&trace_record_cmdline_disabled);
> + atomic_inc(&trace_record_taskinfo_disabled);
>
> if (*pos != iter->pos) {
> iter->ent = NULL;
> @@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
> #endif
>
> if (!iter->snapshot)
> - atomic_dec(&trace_record_cmdline_disabled);
> + atomic_dec(&trace_record_taskinfo_disabled);
>
> trace_access_unlock(iter->cpu_file);
> trace_event_read_unlock();
> @@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
> if (mask == TRACE_ITER_RECORD_CMD)
> trace_event_enable_cmd_record(enabled);
>
> + if (mask == TRACE_ITER_RECORD_TGID)
> + trace_event_enable_tgid_record(enabled);
> +
> if (mask == TRACE_ITER_EVENT_FORK)
> trace_event_follow_fork(tr, enabled);
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 39fd77330aab..90318b016fd9 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
> int trace_graph_entry(struct ftrace_graph_ent *trace);
> void set_graph_array(struct trace_array *tr);
>
> +extern int *tgid_map;
> +void tracing_alloc_tgid_map(void);
> void tracing_start_cmdline_record(void);
> void tracing_stop_cmdline_record(void);
> +void tracing_start_tgid_record(void);
> +void tracing_stop_tgid_record(void);
> +
> int register_tracer(struct tracer *type);
> int is_tracing_stopped(void);
>
> @@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
> extern u64 ftrace_now(int cpu);
>
> extern void trace_find_cmdline(int pid, char comm[]);
> +extern int trace_find_tgid(int pid);
> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
> C(LATENCY_FMT, "latency-format"), \
> C(RECORD_CMD, "record-cmd"), \
> + C(RECORD_TGID, "record-tgid"), \
> C(OVERWRITE, "overwrite"), \
> C(STOP_ON_FREE, "disable_on_free"), \
> C(IRQ_INFO, "irq-info"), \
> @@ -1423,6 +1430,8 @@ struct ftrace_event_field *
> trace_find_event_field(struct trace_event_call *call, char *name);
>
> extern void trace_event_enable_cmd_record(bool enable);
> +extern void trace_event_enable_tgid_record(bool enable);
> +
> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
> extern int event_trace_del_tracer(struct trace_array *tr);
>
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index e7973e10398c..240c6df95ea6 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> mutex_unlock(&event_mutex);
> }
>
> +void trace_event_enable_tgid_record(bool enable)

This should return a value.

> +{
> + struct trace_event_file *file;
> + struct trace_array *tr;
> +
> + mutex_lock(&event_mutex);
> + do_for_each_event_file(tr, file) {
> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> + continue;
> +
> + if (enable) {
> + tracing_start_tgid_record();

If we fail to start, the bit should not be set, and we should return
failure. Note, it can only fail on the first try, as once it is
allocated, you don't need to worry about it failing. Thus, if it fails,
break out of the loop now and return failure.

> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + } else {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
> + &file->flags);
> + }
> + } while_for_each_event_file();
> + mutex_unlock(&event_mutex);
> +}
> +
> static int __ftrace_event_enable_disable(struct trace_event_file *file,
> int enable, int soft_disable)
> {
> @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> tracing_stop_cmdline_record();
> clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> + }
> +
> call->class->reg(call, TRACE_REG_UNREGISTER, file);
> }
> /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
> @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> }
>
> if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
> + bool cmd = false, tgid = false;
>
> /* Keep the event disabled, when going to SOFT_MODE. */
> if (soft_disable)
> set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
>
> if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
> + cmd = true;
> tracing_start_cmdline_record();
> set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
> + tgid = true;
> + tracing_start_tgid_record();
> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + }
> +
> ret = call->class->reg(call, TRACE_REG_REGISTER, file);
> if (ret) {
> - tracing_stop_cmdline_record();
> + if (cmd)
> + tracing_stop_cmdline_record();
> + if (tgid)
> + tracing_stop_tgid_record();
> pr_info("event trace: Could not enable event "
> "%s\n", trace_event_name(call));
> break;
> diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
> index 4c896a0101bd..75a0c3accf2a 100644
> --- a/kernel/trace/trace_sched_switch.c
> +++ b/kernel/trace/trace_sched_switch.c
> @@ -12,27 +12,38 @@
>
> #include "trace.h"
>
> -static int sched_ref;
> +#define RECORD_CMDLINE 1
> +#define RECORD_TGID 2
> +
> +static int sched_cmdline_ref;
> +static int sched_tgid_ref;
> static DEFINE_MUTEX(sched_register_mutex);
>
> static void
> probe_sched_switch(void *ignore, bool preempt,
> struct task_struct *prev, struct task_struct *next)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags;
> +
> + flags = (RECORD_TGID * !!sched_tgid_ref) +
> + (RECORD_CMDLINE * !!sched_cmdline_ref);
>
> - tracing_record_cmdline(prev);
> - tracing_record_cmdline(next);
> + if (!flags)
> + return;
> + tracing_record_taskinfo_sched_switch(prev, next, flags);
> }
>
> static void
> probe_sched_wakeup(void *ignore, struct task_struct *wakee)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags;
> +
> + flags = (RECORD_TGID * !!sched_tgid_ref) +
> + (RECORD_CMDLINE * !!sched_cmdline_ref);
>
> - tracing_record_cmdline(current);
> + if (!flags)
> + return;
> + tracing_record_taskinfo(current, flags);
> }
>
> static int tracing_sched_register(void)
> @@ -75,28 +86,64 @@ static void tracing_sched_unregister(void)
> unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
> }
>
> -static void tracing_start_sched_switch(void)
> +static void tracing_start_sched_switch(int ops)
> {
> + bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
> mutex_lock(&sched_register_mutex);
> - if (!(sched_ref++))
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref++;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref++;
> + break;
> + }
> +
> + if (sched_tgid_ref > 0 && !tgid_map)
> + tracing_alloc_tgid_map();

Should have this check for failure as well, and clean up if there is.

-- Steve

> +
> + if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
> tracing_sched_register();
> mutex_unlock(&sched_register_mutex);
> }
>
> -static void tracing_stop_sched_switch(void)
> +static void tracing_stop_sched_switch(int ops)
> {
> mutex_lock(&sched_register_mutex);
> - if (!(--sched_ref))
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref--;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref--;
> + break;
> + }
> +
> + if (!sched_cmdline_ref && !sched_tgid_ref)
> tracing_sched_unregister();
> mutex_unlock(&sched_register_mutex);
> }
>
> void tracing_start_cmdline_record(void)
> {
> - tracing_start_sched_switch();
> + tracing_start_sched_switch(RECORD_CMDLINE);
> }
>
> void tracing_stop_cmdline_record(void)
> {
> - tracing_stop_sched_switch();
> + tracing_stop_sched_switch(RECORD_CMDLINE);
> +}
> +
> +void tracing_start_tgid_record(void)
> +{
> + tracing_start_sched_switch(RECORD_TGID);
> +}
> +
> +void tracing_stop_tgid_record(void)
> +{
> + tracing_stop_sched_switch(RECORD_TGID);
> }