[PATCH v3] sched/fair: fix unthrottle_cfs_rq for leaf_cfs_rq list

From: Vincent Guittot
Date: Wed May 13 2020 - 09:55:37 EST


Although not exactly identical, unthrottle_cfs_rq() and enqueue_task_fair()
are quite close and follow the same sequence for enqueuing an entity in the
cfs hierarchy. Modify unthrottle_cfs_rq() to use the same pattern as
enqueue_task_fair(). This fixes a problem already faced with the latter and
add an optimization in the last for_each_sched_entity loop.

Reported-by Tao Zhou <zohooouoto@xxxxxxxxxxx>
Reviewed-by: Phil Auld <pauld@xxxxxxxxxx>
Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---

v3 changes:
- remove the unused enqueue variable

kernel/sched/fair.c | 42 ++++++++++++++++++++++++++++++------------
1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4e12ba882663..9a58874ef104 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4792,7 +4792,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct rq *rq = rq_of(cfs_rq);
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
- int enqueue = 1;
long task_delta, idle_task_delta;

se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4816,26 +4815,44 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
idle_task_delta = cfs_rq->idle_h_nr_running;
for_each_sched_entity(se) {
if (se->on_rq)
- enqueue = 0;
+ break;
+ cfs_rq = cfs_rq_of(se);
+ enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);

+ cfs_rq->h_nr_running += task_delta;
+ cfs_rq->idle_h_nr_running += idle_task_delta;
+
+ /* end evaluation on encountering a throttled cfs_rq */
+ if (cfs_rq_throttled(cfs_rq))
+ goto unthrottle_throttle;
+ }
+
+ for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- if (enqueue) {
- enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
- } else {
- update_load_avg(cfs_rq, se, 0);
- se_update_runnable(se);
- }
+
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+ se_update_runnable(se);

cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta;

+
+ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
- break;
+ goto unthrottle_throttle;
+
+ /*
+ * One parent has been throttled and cfs_rq removed from the
+ * list. Add it back to not break the leaf list.
+ */
+ if (throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
}

- if (!se)
- add_nr_running(rq, task_delta);
+ /* At this point se is NULL and we are at root level*/
+ add_nr_running(rq, task_delta);

+unthrottle_throttle:
/*
* The cfs_rq_throttled() breaks in the above iteration can result in
* incomplete leaf list maintenance, resulting in triggering the
@@ -4844,7 +4861,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);

- list_add_leaf_cfs_rq(cfs_rq);
+ if (list_add_leaf_cfs_rq(cfs_rq))
+ break;
}

assert_list_leaf_cfs_rq(rq);
--
2.17.1