[PATCH] sched/deadline: Use IPI to trigger DL task push migration instead of pulling

From: Wanpeng Li
Date: Sun Mar 29 2015 - 19:25:45 EST


This design the same as rt class to send out only one IPI to the
first overloaded CPU. It tries to push any tasks that it can, and
then looks for the next overloaded CPU that can push to the source
CPU. The IPIs stop when all overloaded CPUs that have pushable tasks
that have deadlines earlier than the source CPU are covered. In case
the source CPU has later deadline again, a flag is set to tell the
IPI traversal to restart with the first RT overloaded CPU after the
source CPU.

Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxxxxxx>
---
kernel/sched/deadline.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 6 ++
2 files changed, 183 insertions(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 24c18dc..4d203e0 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,7 @@
#include "sched.h"

#include <linux/slab.h>
+#include <linux/irq_work.h>

struct dl_bandwidth def_dl_bandwidth;

@@ -69,6 +70,10 @@ void init_dl_bw(struct dl_bw *dl_b)
dl_b->total_bw = 0;
}

+#ifdef CONFIG_SMP
+static void push_irq_work_func(struct irq_work *work);
+#endif
+
void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
{
dl_rq->rb_root = RB_ROOT;
@@ -80,6 +85,12 @@ void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
dl_rq->dl_nr_migratory = 0;
dl_rq->overloaded = 0;
dl_rq->pushable_dl_tasks_root = RB_ROOT;
+#ifdef HAVE_RT_PUSH_IPI
+ dl_rq->push_flags = 0;
+ dl_rq->push_cpu = nr_cpu_ids;
+ raw_spin_lock_init(&dl_rq->push_lock);
+ init_irq_work(&dl_rq->push_work, push_irq_work_func);
+#endif
#else
init_dl_bw(&dl_rq->dl_bw);
#endif
@@ -1416,6 +1427,165 @@ static void push_dl_tasks(struct rq *rq)
;
}

+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * The search for the next cpu always starts at rq->cpu and ends
+ * when we reach rq->cpu again. It will never return rq->cpu.
+ * This returns the next cpu to check, or nr_cpu_ids if the loop
+ * is complete.
+ *
+ * rq->dl.push_cpu holds the last cpu returned by this function,
+ * or if this is the first instance, it must hold rq->cpu.
+ */
+static int dlo_next_cpu(struct rq *rq)
+{
+ int prev_cpu = rq->dl.push_cpu;
+ int cpu;
+
+ cpu = cpumask_next(prev_cpu, rq->rd->dlo_mask);
+
+ /*
+ * If the previous cpu is less than the rq's CPU, then it already
+ * passed the end of the mask, and has started from the beginning.
+ * We end if the next CPU is greater or equal to rq's CPU.
+ */
+ if (prev_cpu < rq->cpu) {
+ if (cpu >= rq->cpu)
+ return nr_cpu_ids;
+
+ } else if (cpu >= nr_cpu_ids) {
+ /*
+ * We passed the end of the mask, start at the beginning.
+ * If the result is greater or equal to the rq's CPU, then
+ * the loop is finished.
+ */
+ cpu = cpumask_first(rq->rd->dlo_mask);
+ if (cpu >= rq->cpu)
+ return nr_cpu_ids;
+ }
+ rq->dl.push_cpu = cpu;
+
+ /* Return cpu to let the caller know if the loop is finished or not */
+ return cpu;
+}
+
+static int find_next_push_cpu(struct rq *rq)
+{
+ struct rq *next_rq;
+ int cpu;
+
+ while (1) {
+ cpu = dlo_next_cpu(rq);
+ if (cpu >= nr_cpu_ids)
+ break;
+ next_rq = cpu_rq(cpu);
+
+ /* Make sure the next rq can push to this rq */
+ if (dl_time_before(next_rq->dl.earliest_dl.next,
+ rq->dl.earliest_dl.curr))
+ break;
+ }
+
+ return cpu;
+}
+
+#define RT_PUSH_IPI_EXECUTING 1
+#define RT_PUSH_IPI_RESTART 2
+
+static void tell_cpu_to_push(struct rq *rq)
+{
+ int cpu;
+
+ if (rq->dl.push_flags & RT_PUSH_IPI_EXECUTING) {
+ raw_spin_lock(&rq->dl.push_lock);
+ /* Make sure it's still executing */
+ if (rq->dl.push_flags & RT_PUSH_IPI_EXECUTING) {
+ /*
+ * Tell the IPI to restart the loop as things have
+ * changed since it started.
+ */
+ rq->dl.push_flags |= RT_PUSH_IPI_RESTART;
+ raw_spin_unlock(&rq->dl.push_lock);
+ return;
+ }
+ raw_spin_unlock(&rq->dl.push_lock);
+ }
+
+ /* When here, there's no IPI going around */
+
+ rq->dl.push_cpu = rq->cpu;
+ cpu = find_next_push_cpu(rq);
+ if (cpu >= nr_cpu_ids)
+ return;
+
+ rq->dl.push_flags = RT_PUSH_IPI_EXECUTING;
+
+ irq_work_queue_on(&rq->dl.push_work, cpu);
+}
+
+/* Called from hardirq context */
+static void try_to_push_tasks(void *arg)
+{
+ struct dl_rq *dl_rq = arg;
+ struct rq *rq, *src_rq;
+ int this_cpu;
+ int cpu;
+
+ this_cpu = dl_rq->push_cpu;
+
+ /* Paranoid check */
+ BUG_ON(this_cpu != smp_processor_id());
+
+ rq = cpu_rq(this_cpu);
+ src_rq = rq_of_dl_rq(dl_rq);
+
+again:
+ if (has_pushable_dl_tasks(rq)) {
+ raw_spin_lock(&rq->lock);
+ push_dl_task(rq);
+ raw_spin_unlock(&rq->lock);
+ }
+
+ /* Pass the IPI to the next rt overloaded queue */
+ raw_spin_lock(&dl_rq->push_lock);
+ /*
+ * If the source queue changed since the IPI went out,
+ * we need to restart the search from that CPU again.
+ */
+ if (dl_rq->push_flags & RT_PUSH_IPI_RESTART) {
+ dl_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
+ dl_rq->push_cpu = src_rq->cpu;
+ }
+
+ cpu = find_next_push_cpu(src_rq);
+
+ if (cpu >= nr_cpu_ids)
+ dl_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
+ raw_spin_unlock(&dl_rq->push_lock);
+
+ if (cpu >= nr_cpu_ids)
+ return;
+
+ /*
+ * It is possible that a restart caused this CPU to be
+ * chosen again. Don't bother with an IPI, just see if we
+ * have more to push.
+ */
+ if (unlikely(cpu == rq->cpu))
+ goto again;
+
+ /* Try the next RT overloaded CPU */
+ irq_work_queue_on(&dl_rq->push_work, cpu);
+}
+
+static void push_irq_work_func(struct irq_work *work)
+{
+ struct dl_rq *dl_rq = container_of(work, struct dl_rq, push_work);
+
+ try_to_push_tasks(dl_rq);
+}
+#endif /* HAVE_RT_PUSH_IPI */
+
static int pull_dl_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
@@ -1432,6 +1602,13 @@ static int pull_dl_task(struct rq *this_rq)
*/
smp_rmb();

+#ifdef HAVE_RT_PUSH_IPI
+ if (sched_feat(RT_PUSH_IPI)) {
+ tell_cpu_to_push(this_rq);
+ return 0;
+ }
+#endif
+
for_each_cpu(cpu, this_rq->rd->dlo_mask) {
if (this_cpu == cpu)
continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dd532c5..87a937c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -500,6 +500,12 @@ struct dl_rq {
*/
struct rb_root pushable_dl_tasks_root;
struct rb_node *pushable_dl_tasks_leftmost;
+#ifdef HAVE_RT_PUSH_IPI
+ int push_flags;
+ int push_cpu;
+ struct irq_work push_work;
+ raw_spinlock_t push_lock;
+#endif
#else
struct dl_bw dl_bw;
#endif
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/