[PATCH 7/9] sched: Migrate sched to use new tick dependency mask model

From: Frederic Weisbecker
Date: Mon Dec 14 2015 - 13:39:04 EST


Instead of providing asynchronous checks for the nohz subsystem to verify
sched tick dependency, migrate sched to the new mask.

Everytime a task is enqueued or dequeued, we evaluate the state of the
tick dependency on top of the policy of the tasks in the runqueue, by
order of priority:

SCHED_DEADLINE: Need the tick in order to periodically check for runtime
SCHED_FIFO : Don't need the tick (no round-robin)
SCHED_RR : Need the tick if more than 1 task of the same priority
for round robin (simplified with checking if more than
one SCHED_RR task no matter what priority).
SCHED_NORMAL : Need the tick if more than 1 task for round-robin.

We could optimize that further with one flag per sched policy on the tick
dependency mask and perform only the checks relevant to the policy
concerned by an enqueue/dequeue operation.

Since the checks aren't based on the current task anymore, we could get
rid of the task switch hook but it's still needed for posix cpu
timers.

Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Luiz Capitulino <lcapitulino@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
---
include/linux/sched.h | 3 ---
kernel/sched/core.c | 36 +++++++++++++++++++++---------------
kernel/sched/sched.h | 47 +++++++++++++++++++++++++++++++++--------------
kernel/time/tick-sched.c | 5 -----
4 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1de0db..cfefb26 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2345,10 +2345,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif

#ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void);
-#else
-static inline bool sched_can_stop_tick(void) { return false; }
#endif

#ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 05b84c6..2a69a32 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -701,31 +701,37 @@ static inline bool got_nohz_idle_kick(void)
#endif /* CONFIG_NO_HZ_COMMON */

#ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
{
+ int fifo_nr_running;
+
+ /* Deadline tasks, even if single, need the tick */
+ if (rq->dl.dl_nr_running)
+ return false;
+
/*
- * FIFO realtime policy runs the highest priority task. Other runnable
- * tasks are of a lower priority. The scheduler tick does nothing.
+ * FIFO realtime policy runs the highest priority task (after DEADLINE).
+ * Other runnable tasks are of a lower priority. The scheduler tick
+ * isn't needed.
*/
- if (current->policy == SCHED_FIFO)
+ fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+ if (fifo_nr_running)
return true;

/*
* Round-robin realtime tasks time slice with other tasks at the same
- * realtime priority. Is this task the only one at this priority?
+ * realtime priority.
*/
- if (current->policy == SCHED_RR) {
- struct sched_rt_entity *rt_se = &current->rt;
-
- return rt_se->run_list.prev == rt_se->run_list.next;
+ if (rq->rt.rr_nr_running) {
+ if (rq->rt.rr_nr_running == 1)
+ return true;
+ else
+ return false;
}

- /*
- * More than one running task need preemption.
- * nr_running update is assumed to be visible
- * after IPI is sent from wakers.
- */
- if (this_rq()->nr_running > 1)
+
+ /* Normal multitasking need periodic preemption checks */
+ if (rq->cfs.nr_running > 1)
return false;

return true;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6413238..2620697 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1308,6 +1308,35 @@ unsigned long to_ratio(u64 period, u64 runtime);

extern void init_entity_runnable_average(struct sched_entity *se);

+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+
+/*
+ * Tick may be needed by tasks in the runqueue depending on their policy and
+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
+ * nohz mode if necessary.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+ int cpu;
+
+ if (!tick_nohz_full_enabled())
+ return;
+
+ cpu = cpu_of(rq);
+
+ if (!tick_nohz_full_cpu(rq->cpu))
+ return;
+
+ if (sched_can_stop_tick(rq))
+ tick_nohz_clear_dep_cpu(TICK_SCHED_BIT, cpu);
+ else
+ tick_nohz_set_dep_cpu(TICK_SCHED_BIT, cpu);
+}
+#else
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;
@@ -1319,26 +1348,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
if (!rq->rd->overload)
rq->rd->overload = true;
#endif
-
-#ifdef CONFIG_NO_HZ_FULL
- if (tick_nohz_full_cpu(rq->cpu)) {
- /*
- * Tick is needed if more than one task runs on a CPU.
- * Send the target an IPI to kick it out of nohz mode.
- *
- * We assume that IPI implies full memory barrier and the
- * new value of rq->nr_running is visible on reception
- * from the target.
- */
- tick_nohz_full_kick_cpu(rq->cpu);
- }
-#endif
}
+
+ sched_update_tick_dependency(rq);
}

static inline void sub_nr_running(struct rq *rq, unsigned count)
{
rq->nr_running -= count;
+ /* Check if we still need preemption */
+ sched_update_tick_dependency(rq);
}

static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 00c20db..07bebdd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -202,11 +202,6 @@ static bool can_stop_full_tick(struct tick_sched *ts)
return false;
}

- if (!sched_can_stop_tick()) {
- trace_tick_stop(0, TICK_SCHED_MASK);
- return false;
- }
-
if (!posix_cpu_timers_can_stop_tick(current)) {
trace_tick_stop(0, TICK_POSIX_TIMER_MASK);
return false;
--
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/