Re: perf,tools: Remove usage of trace_sched_wakeup(.success)

From: Peter Zijlstra
Date: Tue May 13 2014 - 14:16:10 EST




Anyway, ideally we'd do something like the below, except of course that
it'll break userspace :-(

And that is why I loathe tracepoints.

Also, I've not looked at the code difference and or performance
implications.

---
Subject: sched: Add enqueue information to trace_sched_wakeup()

trace_sched_switch() shows task dequeues, trace_sched_wakeup() shows all
wakeups, including those that do not enqueue (because the task is
still enqueued).

In order to easy matching dequeue to enqueue, add enqueue information to
trace_sched_wakeup().

Replace the trace_sched_wakeup() success argument which is the
try_to_wake_up() return value and always true (since we removed
tracing the false path).

Not-signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
include/trace/events/sched.h | 16 ++++++++--------
kernel/sched/core.c | 42 ++++++++++++++++++++++--------------------
kernel/sched/sched.h | 3 ++-
3 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 67e1bbf83695..5e5037f794f2 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -55,15 +55,15 @@ TRACE_EVENT(sched_kthread_stop_ret,
*/
DECLARE_EVENT_CLASS(sched_wakeup_template,

- TP_PROTO(struct task_struct *p, int success),
+ TP_PROTO(struct task_struct *p, int enqueue),

- TP_ARGS(__perf_task(p), success),
+ TP_ARGS(__perf_task(p), enqueue),

TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
- __field( int, success )
+ __field( int, enqueue )
__field( int, target_cpu )
),

@@ -71,24 +71,24 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
- __entry->success = success;
+ __entry->enqueue = enqueue;
__entry->target_cpu = task_cpu(p);
),

- TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+ TP_printk("comm=%s pid=%d prio=%d enqueue=%d target_cpu=%03d",
__entry->comm, __entry->pid, __entry->prio,
- __entry->success, __entry->target_cpu)
+ __entry->enqueue, __entry->target_cpu)
);

DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
- TP_PROTO(struct task_struct *p, int success),
+ TP_PROTO(struct task_struct *p, int enqueue),
TP_ARGS(p, success));

/*
* Tracepoint for waking up a new task:
*/
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
- TP_PROTO(struct task_struct *p, int success),
+ TP_PROTO(struct task_struct *p, int enqueue),
TP_ARGS(p, success));

#ifdef CREATE_TRACE_POINTS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4ea7b3f1a087..e29b87748680 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1461,10 +1461,22 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
* Mark the task runnable and perform wakeup-preemption.
*/
static void
-ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
+ttwu_do_wakeup(struct rq *rq, struct task_struct *p,
+ int wake_flags, en_flags)
{
+ bool enqueue = wake_flags & WF_ENQUEUE;
+
+ if (enqueue) {
+#ifdef CONFIG_SMP
+ if (p->sched_contributes_to_load)
+ rq->nr_uninterruptible--;
+#endif
+
+ ttwu_activate(rq, p, en_flags);
+ }
+
check_preempt_curr(rq, p, wake_flags);
- trace_sched_wakeup(p, true);
+ trace_sched_wakeup(p, enqueue);

p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
@@ -1485,18 +1497,6 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
#endif
}

-static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-{
-#ifdef CONFIG_SMP
- if (p->sched_contributes_to_load)
- rq->nr_uninterruptible--;
-#endif
-
- ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
- ttwu_do_wakeup(rq, p, wake_flags);
-}
-
/*
* Called in case the task @p isn't fully descheduled from its runqueue,
* in this case we must do a remote wakeup. Its a 'light' wakeup though,
@@ -1512,7 +1512,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
if (p->on_rq) {
/* check_preempt_curr() may use rq clock */
update_rq_clock(rq);
- ttwu_do_wakeup(rq, p, wake_flags);
+ ttwu_do_wakeup(rq, p, wake_flags, 0);
ret = 1;
}
__task_rq_unlock(rq);
@@ -1532,7 +1532,8 @@ static void sched_ttwu_pending(void)
while (llist) {
p = llist_entry(llist, struct task_struct, wake_entry);
llist = llist_next(llist);
- ttwu_do_activate(rq, p, 0);
+ ttwu_do_wakeup(rq, p, WF_ENQUEUE,
+ ENQUEUE_WAKEUP | ENQUEUE_WAKING);
}

raw_spin_unlock(&rq->lock);
@@ -1604,7 +1605,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
#endif

raw_spin_lock(&rq->lock);
- ttwu_do_activate(rq, p, 0);
+ ttwu_do_wakeup(rq, p, WF_ENQUEUE, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
raw_spin_unlock(&rq->lock);
}

@@ -1707,10 +1708,11 @@ static void try_to_wake_up_local(struct task_struct *p)
if (!(p->state & TASK_NORMAL))
goto out;

- if (!p->on_rq)
- ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+ if (p->on_rq)
+ ttwu_do_wakeup(rq, p, 0, 0);
+ else
+ ttwu_do_wakeup(rq, p, WF_ENQUEUE, ENQUEUE_WAKEUP);

- ttwu_do_wakeup(rq, p, 0);
ttwu_stat(p, smp_processor_id(), 0);
out:
raw_spin_unlock(&p->pi_lock);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b2cbe81308af..d865415ea1d2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1026,7 +1026,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
*/
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* child wakeup after fork */
-#define WF_MIGRATED 0x4 /* internal use, task got migrated */
+#define WF_MIGRATED 0x04 /* internal use, task got migrated */
+#define WF_ENQUEUE 0x08

/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/