Re: [RFC 0/12][PATCH] SCHED_DEADLINE: core of the scheduling class

From: Raistlin
Date: Fri Oct 16 2009 - 11:41:27 EST


This commit introduces a new scheduling policy (SCHED_DEADLINE), implemented
in a new scheduling class (sched_deadline.c).

As of now, it implements the popular Earliest Deadline First (EDF) real-time
scheduling algorithm.
It basically means each (instance of each) task has a deadline, indicating the
time instant by which its computation has to be completed. The scheduler
always picks the task with the earliest deadline as the next to be executed.

Some more logic is added in order to avoid tasks interfering between each
others, i.e., a deadline miss of task A should not affect the capability of
task B to meet its own deadline.

Open issues:
- this implementation is ``fully partitioned'', which means each task has to
be bound to one processor at any given time. Turning it into ``global
scheduling'' (i.e., migrations are allowed) is work in progress;
- proper dealing with critical sections/rt-mutexes is also missing, and
is also work in progress.

Signed-off-by: Raistlin <raistlin@xxxxxxxx>
---
include/linux/sched.h | 36 ++++
kernel/hrtimer.c | 2 +-
kernel/sched.c | 44 ++++-
kernel/sched_deadline.c | 513 +++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched_fair.c | 2 +-
kernel/sched_rt.c | 2 +-
6 files changed, 587 insertions(+), 12 deletions(-)
create mode 100644 kernel/sched_deadline.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac9837c..20e1a6a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,7 @@
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
+#define SCHED_DEADLINE 6
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000

@@ -159,6 +160,7 @@ extern unsigned long get_parent_ip(unsigned long addr);

struct seq_file;
struct cfs_rq;
+struct dl_rq;
struct task_group;
#ifdef CONFIG_SCHED_DEBUG
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
@@ -1218,6 +1220,27 @@ struct sched_rt_entity {
#endif
};

+#define DL_NEW 0x00000001
+#define DL_THROTTLED 0x00000002
+#define DL_BOOSTED 0x00000004
+
+struct sched_dl_entity {
+ struct rb_node rb_node;
+ /* actual scheduling parameters */
+ s64 runtime;
+ u64 deadline;
+ unsigned int flags;
+
+ /* original parameters taken from sched_param_ex */
+ u64 sched_runtime;
+ u64 sched_deadline;
+ u64 sched_period;
+ u64 bw;
+
+ int nr_cpus_allowed;
+ struct hrtimer dl_timer;
+};
+
struct rcu_node;

struct task_struct {
@@ -1240,6 +1263,7 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+ struct sched_dl_entity dl;

#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
@@ -1583,6 +1607,18 @@ static inline int rt_task(struct task_struct *p)
return rt_prio(p->prio);
}

+static inline int deadline_policy(int policy)
+{
+ if (unlikely(policy == SCHED_DEADLINE))
+ return 1;
+ return 0;
+}
+
+static inline int deadline_task(struct task_struct *p)
+{
+ return deadline_policy(p->policy);
+}
+
static inline struct pid *task_pid(struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 3e1c36e..bf6a3b1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1537,7 +1537,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
unsigned long slack;

slack = current->timer_slack_ns;
- if (rt_task(current))
+ if (deadline_task(current) || rt_task(current))
slack = 0;

hrtimer_init_on_stack(&t.timer, clockid, mode);
diff --git a/kernel/sched.c b/kernel/sched.c
index e886895..adf1414 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -131,6 +131,11 @@ static inline int task_has_rt_policy(struct task_struct *p)
return rt_policy(p->policy);
}

+static inline int task_has_deadline_policy(struct task_struct *p)
+{
+ return deadline_policy(p->policy);
+}
+
/*
* This is the priority-queue data structure of the RT scheduling class:
*/
@@ -481,6 +486,14 @@ struct rt_rq {
#endif
};

+struct dl_rq {
+ unsigned long dl_nr_running;
+
+ /* runqueue is an rbtree, ordered by deadline */
+ struct rb_root rb_root;
+ struct rb_node *rb_leftmost;
+};
+
#ifdef CONFIG_SMP

/*
@@ -545,6 +558,7 @@ struct rq {

struct cfs_rq cfs;
struct rt_rq rt;
+ struct dl_rq dl;

#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
@@ -1818,11 +1832,12 @@ static void calc_load_account_active(struct rq *this_rq);
#include "sched_idletask.c"
#include "sched_fair.c"
#include "sched_rt.c"
+#include "sched_deadline.c"
#ifdef CONFIG_SCHED_DEBUG
# include "sched_debug.c"
#endif

-#define sched_class_highest (&rt_sched_class)
+#define sched_class_highest (&deadline_sched_class)
#define for_each_class(class) \
for (class = sched_class_highest; class; class = class->next)

@@ -1838,7 +1853,7 @@ static void dec_nr_running(struct rq *rq)

static void set_load_weight(struct task_struct *p)
{
- if (task_has_rt_policy(p)) {
+ if (task_has_deadline_policy(p) || task_has_rt_policy(p)) {
p->se.load.weight = prio_to_weight[0] * 2;
p->se.load.inv_weight = prio_to_wmult[0] >> 1;
return;
@@ -2523,7 +2538,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
* Revert to default priority/policy on fork if requested.
*/
if (unlikely(p->sched_reset_on_fork)) {
- if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
+ if (deadline_policy(p->policy) ||
+ p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
p->policy = SCHED_NORMAL;
p->normal_prio = p->static_prio;
}
@@ -5966,10 +5982,14 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->put_prev_task(rq, p);

- if (rt_prio(prio))
- p->sched_class = &rt_sched_class;
- else
- p->sched_class = &fair_sched_class;
+ if (deadline_task(p))
+ p->sched_class = &deadline_sched_class;
+ else {
+ if (rt_prio(prio))
+ p->sched_class = &rt_sched_class;
+ else
+ p->sched_class = &fair_sched_class;
+ }

p->prio = prio;

@@ -6003,9 +6023,9 @@ void set_user_nice(struct task_struct *p, long nice)
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
- * SCHED_FIFO/SCHED_RR:
+ * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
*/
- if (task_has_rt_policy(p)) {
+ if (unlikely(task_has_deadline_policy(p) || task_has_rt_policy(p))) {
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
}
@@ -9259,6 +9279,11 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#endif
}

+static void init_deadline_rq(struct dl_rq *dl_rq, struct rq *rq)
+{
+ dl_rq->rb_root = RB_ROOT;
+}
+
#ifdef CONFIG_FAIR_GROUP_SCHED
static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
struct sched_entity *se, int cpu, int add,
@@ -9417,6 +9442,7 @@ void __init sched_init(void)
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
+ init_deadline_rq(&rq->dl, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
init_task_group.shares = init_task_group_load;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
diff --git a/kernel/sched_deadline.c b/kernel/sched_deadline.c
new file mode 100644
index 0000000..5430c48
--- /dev/null
+++ b/kernel/sched_deadline.c
@@ -0,0 +1,513 @@
+/*
+ * Deadline Scheduling Class (SCHED_DEADLINE policy)
+ *
+ * This scheduling class implements the Earliest Deadline First (EDF)
+ * scheduling algorithm, suited for hard and soft real-time tasks.
+ *
+ * The strategy used to confine each task inside its bandwidth reservation
+ * is the Constant Bandwidth Server (CBS) scheduling, a slight variation on
+ * EDF that makes this possible.
+ *
+ * Correct behavior, i.e., no task missing any deadline, is only guaranteed
+ * if the task's parameters are:
+ * - correctly assigned, so that the system is not overloaded,
+ * - respected during actual execution.
+ * However, thanks to bandwidth isolation, overruns and deadline misses
+ * remains local, and does not affect any other task in the system.
+ *
+ * Copyright (C) 2009 Dario Faggioli, Michael Trimarchi
+ */
+
+static const struct sched_class deadline_sched_class;
+
+static inline struct task_struct *deadline_task_of(struct sched_dl_entity *dl_se)
+{
+ return container_of(dl_se, struct task_struct, dl);
+}
+
+static inline struct rq *rq_of_deadline_rq(struct dl_rq *dl_rq)
+{
+ return container_of(dl_rq, struct rq, dl);
+}
+
+static inline struct dl_rq *deadline_rq_of_se(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = deadline_task_of(dl_se);
+ struct rq *rq = task_rq(p);
+
+ return &rq->dl;
+}
+
+/*
+ * FIXME:
+ * This is broken for now, correct implementation of a BWI/PEP
+ * solution is needed here!
+ */
+static inline int deadline_se_boosted(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = deadline_task_of(dl_se);
+
+ return p->prio != p->normal_prio;
+}
+
+static inline int on_deadline_rq(struct sched_dl_entity *dl_se)
+{
+ return !RB_EMPTY_NODE(&dl_se->rb_node);
+}
+
+#define for_each_leaf_deadline_rq(dl_rq, rq) \
+ for (dl_rq = &rq->dl; dl_rq; dl_rq = NULL)
+
+static inline int deadline_time_before(u64 a, u64 b)
+{
+ return (s64)(a - b) < 0;
+}
+
+static inline u64 deadline_max_deadline(u64 a, u64 b)
+{
+ s64 delta = (s64)(b - a);
+ if (delta > 0)
+ a = b;
+
+ return a;
+}
+
+static void enqueue_deadline_entity(struct sched_dl_entity *dl_se);
+static void dequeue_deadline_entity(struct sched_dl_entity *dl_se);
+static void check_deadline_preempt_curr(struct task_struct *p, struct rq *rq);
+
+/*
+ * setup a new SCHED_DEADLINE task instance.
+ */
+static inline void setup_new_deadline_entity(struct sched_dl_entity *dl_se)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rq *rq = rq_of_deadline_rq(dl_rq);
+
+ dl_se->flags &= ~DL_NEW;
+ dl_se->deadline = max(dl_se->deadline, rq->clock) +
+ dl_se->sched_deadline;
+ dl_se->runtime = dl_se->sched_runtime;
+}
+
+/*
+ * gives a SCHED_DEADLINE task that run out of runtime the possibility
+ * of restarting executing, with a refilled runtime and a new
+ * (postponed) deadline.
+ */
+static void replenish_deadline_entity(struct sched_dl_entity *dl_se)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rq *rq = rq_of_deadline_rq(dl_rq);
+
+ /*
+ * Keep moving the deadline and replenishing runtime by the
+ * proper amount until the runtime becomes positive.
+ */
+ while (dl_se->runtime < 0) {
+ dl_se->deadline += dl_se->sched_deadline;
+ dl_se->runtime += dl_se->sched_runtime;
+ }
+
+ WARN_ON(dl_se->runtime > dl_se->sched_runtime);
+ WARN_ON(deadline_time_before(dl_se->deadline, rq->clock));
+}
+
+static void update_deadline_entity(struct sched_dl_entity *dl_se)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rq *rq = rq_of_deadline_rq(dl_rq);
+ u64 left, right;
+
+ if (dl_se->flags & DL_NEW) {
+ setup_new_deadline_entity(dl_se);
+ return;
+ }
+
+ /*
+ * Update the deadline of the task only if:
+ * - the budget has been completely exhausted;
+ * - using the ramaining budget, with the current deadline, would
+ * make the task exceed its bandwidth;
+ * - the deadline itself is in the past.
+ *
+ * For the second condition to hold, we check if:
+ * runtime / (deadline - rq->clock) >= sched_runtime / sched_deadline
+ *
+ * Which basically says if, in the time left before the current
+ * deadline, the tasks overcome its expected runtime by using the
+ * residual budget (left and right are the two sides of the equation,
+ * after a bit of shuffling to use multiplications instead of
+ * divisions).
+ */
+ if (deadline_time_before(dl_se->deadline, rq->clock))
+ goto update;
+
+ left = dl_se->sched_deadline * dl_se->runtime;
+ right = (dl_se->deadline - rq->clock) * dl_se->sched_runtime;
+
+ if (deadline_time_before(right, left)) {
+update:
+ dl_se->deadline = rq->clock + dl_se->sched_deadline;
+ dl_se->runtime = dl_se->sched_runtime;
+ }
+}
+
+/*
+ * the task just depleted its runtime, so we try to post the
+ * replenishment timer to fire at the next absolute deadline.
+ *
+ * In fact, the task was allowed to execute for at most sched_runtime
+ * over each period of sched_deadline length.
+ */
+static int start_deadline_timer(struct sched_dl_entity *dl_se, u64 wakeup)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rq *rq = rq_of_deadline_rq(dl_rq);
+ ktime_t now, act;
+ s64 delta;
+
+ act = ns_to_ktime(wakeup);
+ now = hrtimer_cb_get_time(&dl_se->dl_timer);
+ delta = ktime_to_ns(now) - rq->clock;
+ act = ktime_add_ns(act, delta);
+
+ hrtimer_set_expires(&dl_se->dl_timer, act);
+ hrtimer_start_expires(&dl_se->dl_timer, HRTIMER_MODE_ABS);
+
+ return hrtimer_active(&dl_se->dl_timer);
+}
+
+static enum hrtimer_restart deadline_timer(struct hrtimer *timer)
+{
+ struct sched_dl_entity *dl_se = container_of(timer,
+ struct sched_dl_entity,
+ dl_timer);
+ struct task_struct *p = deadline_task_of(dl_se);
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rq *rq = rq_of_deadline_rq(dl_rq);
+
+ spin_lock(&rq->lock);
+
+ /*
+ * the task might have changed scheduling policy
+ * through setscheduler_ex, in what case we just do nothing.
+ */
+ if (!deadline_task(p))
+ goto unlock;
+
+ /*
+ * the task can't be enqueued any the SCHED_DEADLINE runqueue,
+ * and needs to be enqueued back there --with its new deadline--
+ * only if it is active.
+ */
+ dl_se->flags &= ~DL_THROTTLED;
+ if (p->se.on_rq) {
+ replenish_deadline_entity(dl_se);
+ enqueue_deadline_entity(dl_se);
+ check_deadline_preempt_curr(p, rq);
+ }
+unlock:
+ spin_unlock(&rq->lock);
+
+ return HRTIMER_NORESTART;
+}
+
+static
+int deadline_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
+{
+ if (dl_se->runtime >= 0 || deadline_se_boosted(dl_se))
+ return 0;
+
+ dequeue_deadline_entity(dl_se);
+ if (!start_deadline_timer(dl_se, dl_se->deadline)) {
+ replenish_deadline_entity(dl_se);
+ enqueue_deadline_entity(dl_se);
+ } else
+ dl_se->flags |= DL_THROTTLED;
+
+ return 1;
+}
+
+static void update_curr_deadline(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ struct sched_dl_entity *dl_se = &curr->dl;
+ u64 delta_exec;
+
+ if (!deadline_task(curr) || !on_deadline_rq(dl_se))
+ return;
+
+ delta_exec = rq->clock - curr->se.exec_start;
+ if (unlikely((s64)delta_exec < 0))
+ delta_exec = 0;
+
+ schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
+
+ curr->se.sum_exec_runtime += delta_exec;
+ account_group_exec_runtime(curr, delta_exec);
+
+ curr->se.exec_start = rq->clock;
+ cpuacct_charge(curr, delta_exec);
+
+ dl_se->runtime -= delta_exec;
+ if (deadline_runtime_exceeded(rq, dl_se))
+ resched_task(curr);
+}
+
+static void enqueue_deadline_entity(struct sched_dl_entity *dl_se)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+ struct rb_node **link = &dl_rq->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct sched_dl_entity *entry;
+ int leftmost = 1;
+
+ BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
+
+ while (*link) {
+ parent = *link;
+ entry = rb_entry(parent, struct sched_dl_entity, rb_node);
+ if (!deadline_time_before(entry->deadline, dl_se->deadline))
+ link = &parent->rb_left;
+ else {
+ link = &parent->rb_right;
+ leftmost = 0;
+ }
+ }
+
+ if (leftmost)
+ dl_rq->rb_leftmost = &dl_se->rb_node;
+
+ rb_link_node(&dl_se->rb_node, parent, link);
+ rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
+
+ dl_rq->dl_nr_running++;
+}
+
+static void dequeue_deadline_entity(struct sched_dl_entity *dl_se)
+{
+ struct dl_rq *dl_rq = deadline_rq_of_se(dl_se);
+
+ if (RB_EMPTY_NODE(&dl_se->rb_node))
+ return;
+
+ if (dl_rq->rb_leftmost == &dl_se->rb_node) {
+ struct rb_node *next_node;
+ struct sched_dl_entity *next;
+
+ next_node = rb_next(&dl_se->rb_node);
+ dl_rq->rb_leftmost = next_node;
+
+ if (next_node)
+ next = rb_entry(next_node, struct sched_dl_entity,
+ rb_node);
+ }
+
+ rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
+ RB_CLEAR_NODE(&dl_se->rb_node);
+
+ dl_rq->dl_nr_running--;
+}
+
+static void check_preempt_curr_deadline(struct rq *rq, struct task_struct *p,
+ int sync)
+{
+ if (deadline_task(p) &&
+ deadline_time_before(p->dl.deadline, rq->curr->dl.deadline))
+ resched_task(rq->curr);
+}
+
+/*
+ * there are a few cases where is important to check if a SCHED_DEADLINE
+ * task p should preempt the current task of a runqueue (e.g., inside the
+ * replenishment timer code).
+ */
+static void check_deadline_preempt_curr(struct task_struct *p, struct rq *rq)
+{
+ if (!deadline_task(rq->curr) ||
+ deadline_time_before(p->dl.deadline, rq->curr->dl.deadline))
+ resched_task(rq->curr);
+}
+
+static void
+enqueue_task_deadline(struct rq *rq, struct task_struct *p, int wakeup)
+{
+ struct sched_dl_entity *dl_se = &p->dl;
+
+ BUG_ON(on_deadline_rq(dl_se));
+
+ /*
+ * Only enqueue entities with some remaining runtime.
+ */
+ if (dl_se->flags & DL_THROTTLED)
+ return;
+
+ update_deadline_entity(dl_se);
+ enqueue_deadline_entity(dl_se);
+}
+
+static void
+dequeue_task_deadline(struct rq *rq, struct task_struct *p, int sleep)
+{
+ struct sched_dl_entity *dl_se = &p->dl;
+
+ if (!on_deadline_rq(dl_se))
+ return;
+
+ update_curr_deadline(rq);
+ dequeue_deadline_entity(dl_se);
+}
+
+static void yield_task_deadline(struct rq *rq)
+{
+}
+
+#ifdef CONFIG_SCHED_HRTICK
+static void start_hrtick_deadline(struct rq *rq, struct task_struct *p)
+{
+ struct sched_dl_entity *dl_se = &p->dl;
+ s64 delta;
+
+ delta = dl_se->sched_runtime - dl_se->runtime;
+
+ if (delta > 10000)
+ hrtick_start(rq, delta);
+}
+#else
+static void start_hrtick_deadline(struct rq *rq, struct task_struct *p)
+{
+}
+#endif
+
+static struct sched_dl_entity *pick_next_deadline_entity(struct rq *rq,
+ struct dl_rq *dl_rq)
+{
+ struct rb_node *left = dl_rq->rb_leftmost;
+
+ if (!left)
+ return NULL;
+
+ return rb_entry(left, struct sched_dl_entity, rb_node);
+}
+
+struct task_struct *pick_next_task_deadline(struct rq *rq)
+{
+ struct sched_dl_entity *dl_se;
+ struct task_struct *p;
+ struct dl_rq *dl_rq;
+
+ dl_rq = &rq->dl;
+
+ if (likely(!dl_rq->dl_nr_running))
+ return NULL;
+
+ dl_se = pick_next_deadline_entity(rq, dl_rq);
+ BUG_ON(!dl_se);
+
+ p = deadline_task_of(dl_se);
+ p->se.exec_start = rq->clock;
+#ifdef CONFIG_SCHED_HRTICK
+ if (hrtick_enabled(rq))
+ start_hrtick_deadline(rq, p);
+#endif
+ return p;
+}
+
+static void put_prev_task_deadline(struct rq *rq, struct task_struct *p)
+{
+ update_curr_deadline(rq);
+ p->se.exec_start = 0;
+}
+
+static void task_tick_deadline(struct rq *rq, struct task_struct *p, int queued)
+{
+ update_curr_deadline(rq);
+
+#ifdef CONFIG_SCHED_HRTICK
+ if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)
+ start_hrtick_deadline(rq, p);
+#endif
+}
+
+static void set_curr_task_deadline(struct rq *rq)
+{
+ struct task_struct *p = rq->curr;
+
+ p->se.exec_start = rq->clock;
+}
+
+static void prio_changed_deadline(struct rq *rq, struct task_struct *p,
+ int oldprio, int running)
+{
+ check_deadline_preempt_curr(p, rq);
+}
+
+static void switched_to_deadline(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ check_deadline_preempt_curr(p, rq);
+}
+
+#ifdef CONFIG_SMP
+static int select_task_rq_deadline(struct task_struct *p,
+ int sd_flag, int flags)
+{
+ return task_cpu(p);
+}
+
+static unsigned long
+load_balance_deadline(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, int *this_best_prio)
+{
+ /* for now, don't touch SCHED_DEADLINE tasks */
+ return 0;
+}
+
+static int
+move_one_task_deadline(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ struct sched_domain *sd, enum cpu_idle_type idle)
+{
+ return 0;
+}
+
+static void set_cpus_allowed_deadline(struct task_struct *p,
+ const struct cpumask *new_mask)
+{
+ int weight = cpumask_weight(new_mask);
+
+ BUG_ON(!deadline_task(p));
+
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ p->dl.nr_cpus_allowed = weight;
+}
+#endif
+
+static const struct sched_class deadline_sched_class = {
+ .next = &rt_sched_class,
+ .enqueue_task = enqueue_task_deadline,
+ .dequeue_task = dequeue_task_deadline,
+ .yield_task = yield_task_deadline,
+
+ .check_preempt_curr = check_preempt_curr_deadline,
+
+ .pick_next_task = pick_next_task_deadline,
+ .put_prev_task = put_prev_task_deadline,
+
+#ifdef CONFIG_SMP
+ .select_task_rq = select_task_rq_deadline,
+
+ .load_balance = load_balance_deadline,
+ .move_one_task = move_one_task_deadline,
+ .set_cpus_allowed = set_cpus_allowed_deadline,
+#endif
+
+ .set_curr_task = set_curr_task_deadline,
+ .task_tick = task_tick_deadline,
+
+ .prio_changed = prio_changed_deadline,
+ .switched_to = switched_to_deadline,
+};
+
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4e777b4..8144cb4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1571,7 +1571,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_

update_curr(cfs_rq);

- if (unlikely(rt_prio(p->prio))) {
+ if (unlikely(deadline_task(p) || rt_prio(p->prio))) {
resched_task(curr);
return;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a4d790c..65cef57 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1004,7 +1004,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
*/
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
{
- if (p->prio < rq->curr->prio) {
+ if (deadline_task(p) || p->prio < rq->curr->prio) {
resched_task(rq->curr);
return;
}
--
1.6.0.4

--
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)

http://blog.linux.it/raistlin / raistlin@xxxxxxxxx /
dario.faggioli@xxxxxxxxxx

Attachment: signature.asc
Description: This is a digitally signed message part