[tip:sched/hrtimers] sched: Replace post_schedule with a balance callback list

From: tip-bot for Peter Zijlstra
Date: Thu Jun 18 2015 - 19:01:16 EST


Commit-ID: e3fca9e7cbfb72694a21c886fcdf9f059cfded9c
Gitweb: http://git.kernel.org/tip/e3fca9e7cbfb72694a21c886fcdf9f059cfded9c
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu, 11 Jun 2015 14:46:37 +0200
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Fri, 19 Jun 2015 00:25:26 +0200

sched: Replace post_schedule with a balance callback list

Generalize the post_schedule() stuff into a balance callback list.
This allows us to more easily use it outside of schedule() and cross
sched_class.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: ktkhai@xxxxxxxxxxxxx
Cc: rostedt@xxxxxxxxxxx
Cc: juri.lelli@xxxxxxxxx
Cc: pang.xunlei@xxxxxxxxxx
Cc: oleg@xxxxxxxxxx
Cc: wanpeng.li@xxxxxxxxxxxxxxx
Cc: umgwanakikbuti@xxxxxxxxx
Link: http://lkml.kernel.org/r/20150611124742.424032725@xxxxxxxxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 36 ++++++++++++++++++++++++------------
kernel/sched/deadline.c | 21 +++++++++++----------
kernel/sched/rt.c | 25 +++++++++++--------------
kernel/sched/sched.h | 19 +++++++++++++++++--
4 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41942a5..fa32bc0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2277,23 +2277,35 @@ static struct rq *finish_task_switch(struct task_struct *prev)
#ifdef CONFIG_SMP

/* rq->lock is NOT held, but preemption is disabled */
-static inline void post_schedule(struct rq *rq)
+static void __balance_callback(struct rq *rq)
{
- if (rq->post_schedule) {
- unsigned long flags;
+ struct callback_head *head, *next;
+ void (*func)(struct rq *rq);
+ unsigned long flags;

- raw_spin_lock_irqsave(&rq->lock, flags);
- if (rq->curr->sched_class->post_schedule)
- rq->curr->sched_class->post_schedule(rq);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ head = rq->balance_callback;
+ rq->balance_callback = NULL;
+ while (head) {
+ func = (void (*)(struct rq *))head->func;
+ next = head->next;
+ head->next = NULL;
+ head = next;

- rq->post_schedule = 0;
+ func(rq);
}
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static inline void balance_callback(struct rq *rq)
+{
+ if (unlikely(rq->balance_callback))
+ __balance_callback(rq);
}

#else

-static inline void post_schedule(struct rq *rq)
+static inline void balance_callback(struct rq *rq)
{
}

@@ -2311,7 +2323,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
/* finish_task_switch() drops rq->lock and enables preemtion */
preempt_disable();
rq = finish_task_switch(prev);
- post_schedule(rq);
+ balance_callback(rq);
preempt_enable();

if (current->set_child_tid)
@@ -2823,7 +2835,7 @@ static void __sched __schedule(void)
} else
raw_spin_unlock_irq(&rq->lock);

- post_schedule(rq);
+ balance_callback(rq);
}

static inline void sched_submit_work(struct task_struct *tsk)
@@ -7219,7 +7231,7 @@ void __init sched_init(void)
rq->sd = NULL;
rq->rd = NULL;
rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
- rq->post_schedule = 0;
+ rq->balance_callback = NULL;
rq->active_balance = 0;
rq->next_balance = jiffies;
rq->push_cpu = 0;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 7a08d59..d80523f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -213,9 +213,16 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return dl_task(prev);
}

-static inline void set_post_schedule(struct rq *rq)
+static DEFINE_PER_CPU(struct callback_head, dl_balance_head);
+
+static void push_dl_tasks(struct rq *);
+
+static inline void queue_push_tasks(struct rq *rq)
{
- rq->post_schedule = has_pushable_dl_tasks(rq);
+ if (!has_pushable_dl_tasks(rq))
+ return;
+
+ queue_balance_callback(rq, &per_cpu(dl_balance_head, rq->cpu), push_dl_tasks);
}

static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
@@ -296,7 +303,7 @@ static inline int pull_dl_task(struct rq *rq)
return 0;
}

-static inline void set_post_schedule(struct rq *rq)
+static inline void queue_push_tasks(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
@@ -1126,7 +1133,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
if (hrtick_enabled(rq))
start_hrtick_dl(rq, p);

- set_post_schedule(rq);
+ queue_push_tasks(rq);

return p;
}
@@ -1544,11 +1551,6 @@ skip:
return ret;
}

-static void post_schedule_dl(struct rq *rq)
-{
- push_dl_tasks(rq);
-}
-
/*
* Since the task is not running and a reschedule is not going to happen
* anytime soon on its runqueue, we try pushing it away now.
@@ -1784,7 +1786,6 @@ const struct sched_class dl_sched_class = {
.set_cpus_allowed = set_cpus_allowed_dl,
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
- .post_schedule = post_schedule_dl,
.task_woken = task_woken_dl,
#endif

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7d7093c5..4f3726f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -354,13 +354,16 @@ static inline int has_pushable_tasks(struct rq *rq)
return !plist_head_empty(&rq->rt.pushable_tasks);
}

-static inline void set_post_schedule(struct rq *rq)
+static DEFINE_PER_CPU(struct callback_head, rt_balance_head);
+
+static void push_rt_tasks(struct rq *);
+
+static inline void queue_push_tasks(struct rq *rq)
{
- /*
- * We detect this state here so that we can avoid taking the RQ
- * lock again later if there is no need to push
- */
- rq->post_schedule = has_pushable_tasks(rq);
+ if (!has_pushable_tasks(rq))
+ return;
+
+ queue_balance_callback(rq, &per_cpu(rt_balance_head, rq->cpu), push_rt_tasks);
}

static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
@@ -417,7 +420,7 @@ static inline int pull_rt_task(struct rq *this_rq)
return 0;
}

-static inline void set_post_schedule(struct rq *rq)
+static inline void queue_push_tasks(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
@@ -1497,7 +1500,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);

- set_post_schedule(rq);
+ queue_push_tasks(rq);

return p;
}
@@ -2042,11 +2045,6 @@ skip:
return ret;
}

-static void post_schedule_rt(struct rq *rq)
-{
- push_rt_tasks(rq);
-}
-
/*
* If we are not running and we are not going to reschedule soon, we should
* try to push tasks away now
@@ -2318,7 +2316,6 @@ const struct sched_class rt_sched_class = {
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
- .post_schedule = post_schedule_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f10a445..62949ab 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -624,9 +624,10 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;

+ struct callback_head *balance_callback;
+
unsigned char idle_balance;
/* For active balancing */
- int post_schedule;
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
@@ -767,6 +768,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);

#ifdef CONFIG_SMP

+static inline void
+queue_balance_callback(struct rq *rq,
+ struct callback_head *head,
+ void (*func)(struct rq *rq))
+{
+ lockdep_assert_held(&rq->lock);
+
+ if (unlikely(head->next))
+ return;
+
+ head->func = (void (*)(struct callback_head *))func;
+ head->next = rq->balance_callback;
+ rq->balance_callback = head;
+}
+
extern void sched_ttwu_pending(void);

#define rcu_dereference_check_sched_domain(p) \
@@ -1192,7 +1208,6 @@ struct sched_class {
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);

- void (*post_schedule) (struct rq *this_rq);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/