[PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case

From: Chengming Zhou
Date: Mon Aug 08 2022 - 08:58:56 EST


commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
fixed two load tracking problems for new task, including detach on
unattached new task problem.

There still left another detach on unattached task problem for the task
which has been woken up by try_to_wake_up() and waiting for actually
being woken up by sched_ttwu_pending().

try_to_wake_up(p)
cpu = select_task_rq(p)
if (task_cpu(p) != cpu)
set_task_cpu(p, cpu)
migrate_task_rq_fair()
remove_entity_load_avg() --> unattached
se->avg.last_update_time = 0;
__set_task_cpu()
ttwu_queue(p, cpu)
ttwu_queue_wakelist()
__ttwu_queue_wakelist()

task_change_group_fair()
detach_task_cfs_rq()
detach_entity_cfs_rq()
detach_entity_load_avg() --> detach on unattached task
set_task_rq()
attach_task_cfs_rq()
attach_entity_cfs_rq()
attach_entity_load_avg()

The reason of this problem is similar, we should check in detach_entity_cfs_rq()
that se->avg.last_update_time != 0, before do detach_entity_load_avg().

This patch move detach/attach_entity_cfs_rq() functions up to be together
with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.

Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
1 file changed, 68 insertions(+), 64 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f52e7dc7f22d..4bc76d95a99d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
void post_init_entity_util_avg(struct task_struct *p)
{
}
-static void update_tg_load_avg(struct cfs_rq *cfs_rq)
-{
-}
#endif /* CONFIG_SMP */

/*
@@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
load->inv_weight = sched_prio_to_wmult[prio];
}

+static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);

#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
}

+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Propagate the changes of the sched_entity across the tg tree to make it
+ * visible to the root
+ */
+static void propagate_entity_cfs_rq(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ if (cfs_rq_throttled(cfs_rq))
+ return;
+
+ if (!throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
+
+ /* Start to propagate at parent */
+ se = se->parent;
+
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+
+ if (cfs_rq_throttled(cfs_rq))
+ break;
+
+ if (!throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
+ }
+}
+#else
+static void propagate_entity_cfs_rq(struct sched_entity *se) { }
+#endif
+
+static void detach_entity_cfs_rq(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /*
+ * In case the task sched_avg hasn't been attached:
+ * - A forked task which hasn't been woken up by wake_up_new_task().
+ * - A task which has been woken up by try_to_wake_up() but is
+ * waiting for actually being woken up by sched_ttwu_pending().
+ */
+ if (!se->avg.last_update_time)
+ return;
+
+ /* Catch up with the cfs_rq and remove our load when we leave */
+ update_load_avg(cfs_rq, se, 0);
+ detach_entity_load_avg(cfs_rq, se);
+ update_tg_load_avg(cfs_rq);
+ propagate_entity_cfs_rq(se);
+}
+
+static void attach_entity_cfs_rq(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /* Synchronize entity with its cfs_rq */
+ update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
+ attach_entity_load_avg(cfs_rq, se);
+ update_tg_load_avg(cfs_rq);
+ propagate_entity_cfs_rq(se);
+}
+
static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
{
return cfs_rq->avg.runnable_avg;
@@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
}

static inline void remove_entity_load_avg(struct sched_entity *se) {}
-
-static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
-static inline void
-detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
+static inline void attach_entity_cfs_rq(struct sched_entity *se) {}

static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
{
@@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
return false;
}

-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * Propagate the changes of the sched_entity across the tg tree to make it
- * visible to the root
- */
-static void propagate_entity_cfs_rq(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
- if (cfs_rq_throttled(cfs_rq))
- return;
-
- if (!throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
-
- /* Start to propagate at parent */
- se = se->parent;
-
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
-
- update_load_avg(cfs_rq, se, UPDATE_TG);
-
- if (cfs_rq_throttled(cfs_rq))
- break;
-
- if (!throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
- }
-}
-#else
-static void propagate_entity_cfs_rq(struct sched_entity *se) { }
-#endif
-
-static void detach_entity_cfs_rq(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
- /* Catch up with the cfs_rq and remove our load when we leave */
- update_load_avg(cfs_rq, se, 0);
- detach_entity_load_avg(cfs_rq, se);
- update_tg_load_avg(cfs_rq);
- propagate_entity_cfs_rq(se);
-}
-
-static void attach_entity_cfs_rq(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
- /* Synchronize entity with its cfs_rq */
- update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
- attach_entity_load_avg(cfs_rq, se);
- update_tg_load_avg(cfs_rq);
- propagate_entity_cfs_rq(se);
-}
-
static void detach_task_cfs_rq(struct task_struct *p)
{
struct sched_entity *se = &p->se;
--
2.36.1