Re: [RFC PATCH] tracing: add sched_prio_update

From: Steven Rostedt
Date: Tue Jul 05 2016 - 11:19:28 EST


On Mon, 4 Jul 2016 15:46:04 -0400
Julien Desfossez <jdesfossez@xxxxxxxxxxxx> wrote:


> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index 9b90c57..fcb0f29 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -8,6 +8,34 @@
> #include <linux/tracepoint.h>
> #include <linux/binfmts.h>
>
> +#define SCHEDULING_POLICY \
> + EM( SCHED_NORMAL, "SCHED_NORMAL") \
> + EM( SCHED_FIFO, "SCHED_FIFO") \
> + EM( SCHED_RR, "SCHED_RR") \
> + EM( SCHED_BATCH, "SCHED_BATCH") \
> + EM( SCHED_IDLE, "SCHED_IDLE") \
> + EMe(SCHED_DEADLINE, "SCHED_DEADLINE")
> +
> +/*
> + * First define the enums in the above macros to be exported to userspace
> + * via TRACE_DEFINE_ENUM().
> + */
> +#undef EM
> +#undef EMe
> +#define EM(a, b) TRACE_DEFINE_ENUM(a);
> +#define EMe(a, b) TRACE_DEFINE_ENUM(a);
> +
> +SCHEDULING_POLICY
> +
> +/*
> + * Now redefine the EM() and EMe() macros to map the enums to the strings
> + * that will be printed in the output.
> + */
> +#undef EM
> +#undef EMe
> +#define EM(a, b) {a, b},
> +#define EMe(a, b) {a, b}
> +
> /*
> * Tracepoint for calling kthread_stop, performed to end a kthread:
> */
> @@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
>
> TP_printk("cpu=%d", __entry->cpu)
> );
> +
> +/*
> + * Tracepoint for showing scheduling priority changes.
> + */
> +TRACE_EVENT(sched_prio_update,

I'm fine with the addition of this tracepoint. You'll have to get by
Peter Zijlstra for it.

> +
> + TP_PROTO(struct task_struct *tsk),
> +
> + TP_ARGS(tsk),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )

I could imagine this being a high frequency tracepoint, especially with
a lot of boosting going on. Can we nuke the comm recording and let the
userspace tools just hook to the sched_switch tracepoint for that?

-- Steve


> + __field( pid_t, pid )
> + __field( unsigned int, policy )
> + __field( int, nice )
> + __field( unsigned int, rt_priority )
> + __field( u64, dl_runtime )
> + __field( u64, dl_deadline )
> + __field( u64, dl_period )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
> + __entry->pid = tsk->pid;
> + __entry->policy = tsk->policy;
> + __entry->nice = task_nice(tsk);
> + __entry->rt_priority = tsk->rt_priority;
> + __entry->dl_runtime = tsk->dl.dl_runtime;
> + __entry->dl_deadline = tsk->dl.dl_deadline;
> + __entry->dl_period = tsk->dl.dl_period;
> + ),
> +
> + TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
> + "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
> + __entry->comm, __entry->pid,
> + __print_symbolic(__entry->policy, SCHEDULING_POLICY),
> + __entry->nice, __entry->rt_priority,
> + __entry->dl_runtime, __entry->dl_deadline,
> + __entry->dl_period)
> +);
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 7926993..ac4294a 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1773,6 +1773,7 @@ long _do_fork(unsigned long clone_flags,
> struct pid *pid;
>
> trace_sched_process_fork(current, p);
> + trace_sched_prio_update(p);
>
> pid = get_task_pid(p, PIDTYPE_PID);
> nr = pid_vnr(pid);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index ce83e39..c729425 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3708,6 +3708,7 @@ void set_user_nice(struct task_struct *p, long nice)
> resched_curr(rq);
> }
> out_unlock:
> + trace_sched_prio_update(p);
> task_rq_unlock(rq, p, &rf);
> }
> EXPORT_SYMBOL(set_user_nice);
> @@ -3912,6 +3913,8 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
> p->sched_class = &rt_sched_class;
> else
> p->sched_class = &fair_sched_class;
> +
> + trace_sched_prio_update(p);
> }
>
> static void