[ RFC, PATCH 3/5 ] CPU controller - deal with movement of tasks

From: Srivatsa Vaddagiri
Date: Fri Aug 04 2006 - 01:04:42 EST



When a task moves between groups (as initiated by an administrator), it has to
be removed from the runqueue of its old group and added to the runqueue of the
new group. This patch defines this move operation.

Also during this move operation identity of a task can be ambiguous, since
identity is usually derived from some pointer in the task_struct. The pointer
can point to either the old group or new group and not both! Hence
pass explicitly into de(en)queue_task() functions the task-group runqueue from
which the task is being removed and added to respectively.


Signed-off-by : Srivatsa Vaddagiri <vatsa@xxxxxxxxxx>



include/linux/sched.h | 1
kernel/sched.c | 90 ++++++++++++++++++++++++++++++++++++--------------
2 files changed, 66 insertions(+), 25 deletions(-)

diff -puN kernel/sched.c~cpu_ctlr_move_task kernel/sched.c
--- linux-2.6.18-rc3/kernel/sched.c~cpu_ctlr_move_task 2006-08-04 07:59:08.000000000 +0530
+++ linux-2.6.18-rc3-root/kernel/sched.c 2006-08-04 08:09:15.000000000 +0530
@@ -755,15 +755,14 @@ static inline void update_task_grp_prio(
/*
* Adding/removing a task to/from a priority array:
*/
-static void dequeue_task(struct task_struct *p, struct prio_array *array)
+static void dequeue_task(struct task_struct *p, struct prio_array *array,
+ struct task_grp_rq *tgrq)
{
array->nr_active--;
list_del(&p->run_list);
if (list_empty(array->queue + p->prio)) {
__clear_bit(p->prio, array->bitmap);
if (p->prio == array->best_dyn_prio) {
- struct task_grp_rq *tgrq = task_grp(p)->rq[task_cpu(p)];
-
array->best_dyn_prio =
sched_find_first_bit(array->bitmap);
if (array == tgrq->active || !tgrq->active->nr_active)
@@ -772,7 +771,8 @@ static void dequeue_task(struct task_str
}
}

-static void enqueue_task(struct task_struct *p, struct prio_array *array)
+static void enqueue_task(struct task_struct *p, struct prio_array *array,
+ struct task_grp_rq *tgrq)
{
sched_info_queued(p);
list_add_tail(&p->run_list, array->queue + p->prio);
@@ -781,8 +781,6 @@ static void enqueue_task(struct task_str
p->array = array;

if (p->prio < array->best_dyn_prio) {
- struct task_grp_rq *tgrq = task_grp(p)->rq[task_cpu(p)];
-
array->best_dyn_prio = p->prio;
if (array == tgrq->active || !tgrq->active->nr_active)
update_task_grp_prio(tgrq, task_rq(p), 0);
@@ -799,7 +797,8 @@ static void requeue_task(struct task_str
}

static inline void
-enqueue_task_head(struct task_struct *p, struct prio_array *array)
+enqueue_task_head(struct task_struct *p, struct prio_array *array,
+ struct task_grp_rq *tgrq)
{
list_add(&p->run_list, array->queue + p->prio);
__set_bit(p->prio, array->bitmap);
@@ -807,8 +806,6 @@ enqueue_task_head(struct task_struct *p,
p->array = array;

if (p->prio < array->best_dyn_prio) {
- struct task_grp_rq *tgrq = task_grp(p)->rq[task_cpu(p)];
-
array->best_dyn_prio = p->prio;
if (array == tgrq->active || !tgrq->active->nr_active)
update_task_grp_prio(tgrq, task_rq(p), 1);
@@ -956,7 +953,7 @@ static void __activate_task(struct task_

if (batch_task(p))
target = tgrq->expired;
- enqueue_task(p, target);
+ enqueue_task(p, target, tgrq);
inc_nr_running(p, rq);
}

@@ -968,7 +965,7 @@ static inline void __activate_idle_task(
struct task_grp_rq *tgrq = task_grp(p)->rq[task_cpu(p)];
struct prio_array *target = tgrq->active;

- enqueue_task_head(p, target);
+ enqueue_task_head(p, target, tgrq);
inc_nr_running(p, rq);
}

@@ -1097,8 +1094,10 @@ static void activate_task(struct task_st
*/
static void deactivate_task(struct task_struct *p, struct rq *rq)
{
+ struct task_grp_rq *tgrq = task_grp(p)->rq[task_cpu(p)];
+
dec_nr_running(p, rq);
- dequeue_task(p, p->array);
+ dequeue_task(p, p->array, tgrq);
p->array = NULL;
}

@@ -2151,11 +2150,15 @@ static void pull_task(struct rq *src_rq,
struct task_struct *p, struct rq *this_rq,
struct prio_array *this_array, int this_cpu)
{
- dequeue_task(p, src_array);
+ struct task_grp_rq *tgrq;
+
+ tgrq = task_grp(p)->rq[task_cpu(p)];
+ dequeue_task(p, src_array, tgrq);
dec_nr_running(p, src_rq);
set_task_cpu(p, this_cpu);
inc_nr_running(p, this_rq);
- enqueue_task(p, this_array);
+ tgrq = task_grp(p)->rq[task_cpu(p)];
+ enqueue_task(p, this_array, tgrq);
p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ this_rq->timestamp_last_tick;
/*
@@ -3159,7 +3162,7 @@ void scheduler_tick(void)
goto out_unlock;
}
if (!--p->time_slice) {
- dequeue_task(p, tgrq->active);
+ dequeue_task(p, tgrq->active, tgrq);
set_tsk_need_resched(p);
p->prio = effective_prio(p);
p->time_slice = task_timeslice(p);
@@ -3168,12 +3171,12 @@ void scheduler_tick(void)
if (!tgrq->expired_timestamp)
tgrq->expired_timestamp = jiffies;
if (!TASK_INTERACTIVE(p) || expired_starving(tgrq)) {
- enqueue_task(p, tgrq->expired);
+ enqueue_task(p, tgrq->expired, tgrq);
if (p->static_prio < tgrq->expired->best_static_prio)
tgrq->expired->best_static_prio =
p->static_prio;
} else
- enqueue_task(p, tgrq->active);
+ enqueue_task(p, tgrq->active, tgrq);
} else {
/*
* Prevent a too long timeslice allowing a task to monopolize
@@ -3523,9 +3526,11 @@ need_resched_nonpreemptible:
new_prio = recalc_task_prio(next, next->timestamp + delta);

if (unlikely(next->prio != new_prio)) {
- dequeue_task(next, array);
+ struct task_grp_rq *tgrq =
+ task_grp(next)->rq[task_cpu(next)];
+ dequeue_task(next, array, tgrq);
next->prio = new_prio;
- enqueue_task(next, array);
+ enqueue_task(next, array, tgrq);
}
}
next->sleep_type = SLEEP_NORMAL;
@@ -3982,16 +3987,18 @@ void rt_mutex_setprio(struct task_struct
struct prio_array *array;
unsigned long flags;
struct rq *rq;
+ struct task_grp_rq *tgrq;
int oldprio;

BUG_ON(prio < 0 || prio > MAX_PRIO);

rq = task_rq_lock(p, &flags);
+ tgrq = task_grp(p)->rq[task_cpu(p)];

oldprio = p->prio;
array = p->array;
if (array)
- dequeue_task(p, array);
+ dequeue_task(p, array, tgrq);
p->prio = prio;

if (array) {
@@ -4001,7 +4008,7 @@ void rt_mutex_setprio(struct task_struct
*/
if (rt_task(p))
array = rq->active;
- enqueue_task(p, array);
+ enqueue_task(p, array, tgrq);
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
@@ -4024,6 +4031,7 @@ void set_user_nice(struct task_struct *p
int old_prio, delta;
unsigned long flags;
struct rq *rq;
+ struct task_grp_rq *tgrq;

if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
return;
@@ -4032,6 +4040,8 @@ void set_user_nice(struct task_struct *p
* the task might be in the middle of scheduling on another CPU.
*/
rq = task_rq_lock(p, &flags);
+ tgrq = task_grp(p)->rq[task_cpu(p)];
+
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -4044,7 +4054,7 @@ void set_user_nice(struct task_struct *p
}
array = p->array;
if (array) {
- dequeue_task(p, array);
+ dequeue_task(p, array, tgrq);
dec_raw_weighted_load(rq, p);
}

@@ -4055,7 +4065,7 @@ void set_user_nice(struct task_struct *p
delta = p->prio - old_prio;

if (array) {
- enqueue_task(p, array);
+ enqueue_task(p, array, tgrq);
inc_raw_weighted_load(rq, p);
/*
* If the task increased its priority or is running and
@@ -4591,8 +4601,8 @@ asmlinkage long sys_sched_yield(void)
schedstat_inc(rq, yld_exp_empty);

if (array != target) {
- dequeue_task(current, array);
- enqueue_task(current, target);
+ dequeue_task(current, array, tgrq);
+ enqueue_task(current, target, tgrq);
} else
/*
* requeue_task is cheaper so perform that if possible.
@@ -7132,10 +7142,40 @@ int sched_get_quota(void *grp)
return quota;
}

+/* Move a task from one task_grp_rq to another */
+void sched_move_task(struct task_struct *tsk, void *grp_old, void *grp_new)
+{
+ struct rq *rq;
+ unsigned long flags;
+ struct task_grp *tg_old = (struct task_grp *)grp_old,
+ *tg_new = (struct task_grp *)grp_new;
+ struct task_grp_rq *tgrq;
+
+ if (tg_new == tg_old)
+ return;
+
+ if (!tg_new)
+ tg_new = &init_task_grp;
+ if (!tg_old)
+ tg_old = &init_task_grp;
+
+ rq = task_rq_lock(tsk, &flags);
+
+ if (tsk->array) {
+ tgrq = tg_old->rq[task_cpu(tsk)];
+ dequeue_task(tsk, tsk->array, tgrq);
+ tgrq = tg_new->rq[task_cpu(tsk)];
+ enqueue_task(tsk, tg_new->rq[task_cpu(tsk)]->active, tgrq);
+ }
+
+ task_rq_unlock(rq, &flags);
+}
+
static struct task_grp_ops sched_grp_ops = {
.alloc_group = sched_alloc_group,
.dealloc_group = sched_dealloc_group,
.assign_quota = sched_assign_quota,
+ .move_task = sched_move_task,
.get_quota = sched_get_quota,
};

diff -puN include/linux/sched.h~cpu_ctlr_move_task include/linux/sched.h
--- linux-2.6.18-rc3/include/linux/sched.h~cpu_ctlr_move_task 2006-08-04 08:09:27.000000000 +0530
+++ linux-2.6.18-rc3-root/include/linux/sched.h 2006-08-04 08:09:41.000000000 +0530
@@ -1610,6 +1610,7 @@ struct task_grp_ops {
void *(*alloc_group)(void);
void (*dealloc_group)(void *grp);
void (*assign_quota)(void *grp, int quota);
+ void (*move_task)(struct task_struct *tsk, void *old, void *new);
int (*get_quota)(void *grp);
};


_
--
Regards,
vatsa
--
Regards,
vatsa
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/