[PATCH v2 05/12] sched: Fix trace_sched_switch()

From: Peter Zijlstra
Date: Wed Sep 30 2015 - 03:33:04 EST


__trace_sched_switch_state() is the last remaining PREEMPT_ACTIVE
user, move trace_sched_switch() from prepare_task_switch() to
__schedule() and propagate the @preempt argument.

Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/trace/events/sched.h | 22 +++++++++-------------
kernel/sched/core.c | 2 +-
kernel/trace/ftrace.c | 2 +-
kernel/trace/trace_sched_switch.c | 3 ++-
kernel/trace/trace_sched_wakeup.c | 2 +-
5 files changed, 14 insertions(+), 17 deletions(-)

--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -104,22 +104,17 @@ DEFINE_EVENT(sched_wakeup_template, sche
TP_ARGS(p));

#ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
- long state = p->state;
-
-#ifdef CONFIG_PREEMPT
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
+
/*
- * For all intents and purposes a preempted task is a running task.
+ * Preemption ignores task state, therefore preempted tasks are always
+ * RUNNING (we will not have dequeued if state != RUNNING).
*/
- if (preempt_count() & PREEMPT_ACTIVE)
- state = TASK_RUNNING | TASK_STATE_MAX;
-#endif /* CONFIG_PREEMPT */
-
- return state;
+ return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
}
#endif /* CREATE_TRACE_POINTS */

@@ -128,10 +123,11 @@ static inline long __trace_sched_switch_
*/
TRACE_EVENT(sched_switch,

- TP_PROTO(struct task_struct *prev,
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
struct task_struct *next),

- TP_ARGS(prev, next),
+ TP_ARGS(preempt, prev, next),

TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
@@ -147,7 +143,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
- __entry->prev_state = __trace_sched_switch_state(prev);
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2470,7 +2470,6 @@ static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
- trace_sched_switch(prev, next);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
@@ -3121,6 +3120,7 @@ static void __sched __schedule(bool pree
rq->curr = next;
++*switch_count;

+ trace_sched_switch(preempt, prev, next);
rq = context_switch(rq, prev, next); /* unlocks the rq */
cpu = cpu_of(rq);
} else {
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5697,7 +5697,7 @@ static int alloc_retstack_tasklist(struc
}

static void
-ftrace_graph_probe_sched_switch(void *ignore,
+ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
unsigned long long timestamp;
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@ static int sched_ref;
static DEFINE_MUTEX(sched_register_mutex);

static void
-probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
+probe_sched_switch(void *ignore, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
{
if (unlikely(!sched_ref))
return;
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -420,7 +420,7 @@ tracing_sched_wakeup_trace(struct trace_
}

static void notrace
-probe_wakeup_sched_switch(void *ignore,
+probe_wakeup_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/