[RFC tg_shares_up improvements - v1 04/12] sched: fix load corruption from update_cfs_shares

From: pjt
Date: Sat Oct 16 2010 - 00:55:06 EST


as part of enqueue_entity both a new entity weight and its contribution to the
queuing cfs_rq / rq are updated. Since update_cfs_shares will only update the
queueing weights when the entity is on_rq (which in this case it is not yet),
there's a dependency loop here:

update_cfs_shares needs account_entity_enqueue to update cfs_rq->load.weight
account_entity_enqueue needs the updated weight for the queuing cfs_rq load[*]

Fix this and avoid spurious dequeue/enqueues by issuing update_cfs_shares as
if we had accounted the enqueue already.

This was also resulting in rq->load corruption previously.

[*]: this dependency also exists when using the group cfs_rq w/
update_cfs_shares as the weight of the enqueued entity changes without the
load being updated.

Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>
---
kernel/sched_fair.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -718,7 +718,7 @@ static void reweight_entity(struct cfs_r
account_entity_enqueue(cfs_rq, se);
}

-static void update_cfs_shares(struct cfs_rq *cfs_rq)
+static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
{
struct task_group *tg;
struct sched_entity *se;
@@ -732,7 +732,7 @@ static void update_cfs_shares(struct cfs
if (!se)
return;

- load = cfs_rq->load.weight;
+ load = cfs_rq->load.weight + weight_delta;

load_weight = atomic_read(&tg->load_weight);
load_weight -= cfs_rq->load_contribution;
@@ -754,7 +754,7 @@ static inline void update_cfs_load(struc
{
}

-static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
{
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -881,8 +881,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
*/
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
+ update_cfs_shares(cfs_rq_of(se), se->load.weight);
account_entity_enqueue(cfs_rq, se);
- update_cfs_shares(cfs_rq_of(se));

if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0);
@@ -944,7 +944,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
- update_cfs_shares(cfs_rq_of(se));
+ update_cfs_shares(cfs_rq_of(se), 0);

/*
* Normalize the entity after updating the min_vruntime because the
@@ -1177,7 +1177,7 @@ enqueue_task_fair(struct rq *rq, struct
struct cfs_rq *cfs_rq = cfs_rq_of(se);

update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq);
+ update_cfs_shares(cfs_rq, 0);
}

hrtick_update(rq);
@@ -1207,7 +1207,7 @@ static void dequeue_task_fair(struct rq
struct cfs_rq *cfs_rq = cfs_rq_of(se);

update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq);
+ update_cfs_shares(cfs_rq, 0);
}

hrtick_update(rq);
@@ -2030,7 +2030,7 @@ static int tg_shares_up(struct task_grou
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
- update_cfs_shares(cfs_rq);
+ update_cfs_shares(cfs_rq, 0);

raw_spin_unlock_irqrestore(&rq->lock, flags);


--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/