Re: [PATCH v2] sched/fair: Fix insertion in rq->leaf_cfs_rq_list

From: Peter Zijlstra
Date: Wed Jan 30 2019 - 08:40:29 EST


On Wed, Jan 30, 2019 at 02:29:42PM +0100, Vincent Guittot wrote:
> On Wed, 30 Jan 2019 at 14:27, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> >
> > On Wed, Jan 30, 2019 at 02:06:20PM +0100, Peter Zijlstra wrote:
> > > On Wed, Jan 30, 2019 at 02:04:10PM +0100, Peter Zijlstra wrote:
> >
> > > > So I don't much like this; at all. But maybe I misunderstand, this is
> > > > somewhat tricky stuff and I've not looked at it in a while.
> > > >
> > > > So per normal we do:
> > > >
> > > > enqueue_task_fair()
> > > > for_each_sched_entity() {
> > > > if (se->on_rq)
> > > > break;
> > > > enqueue_entity()
> > > > list_add_leaf_cfs_rq();
> > > > }
> > > >
> > > > This ensures that all parents are already enqueued, right? because this
> > > > is what enqueues those parents.
> > > >
> > > > And in this case you add an unconditional second
> > > > for_each_sched_entity(); even though it is completely redundant, afaict.
> > >
> > > Ah, it doesn't do a second iteration; it continues where the previous
> > > two left off.
> > >
> > > Still, why isn't this in unthrottle?
> >
> > Aah, I see, because we need:
> >
> > rq->tmp_alone_branch == &rq->lead_cfs_rq_list
> >
> > at the end of enqueue_task_fair(); having had that assertion would've
>
> Yes exactly.

How's this ?

---
kernel/sched/fair.c | 125 +++++++++++++++++++++++++++++-----------------------
1 file changed, 69 insertions(+), 56 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e2ff4b69dcf6..747976ca84ea 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -284,64 +284,66 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)

static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
- if (!cfs_rq->on_list) {
- struct rq *rq = rq_of(cfs_rq);
- int cpu = cpu_of(rq);
+ struct rq *rq = rq_of(cfs_rq);
+ int cpu = cpu_of(rq);
+
+ if (cfs_rq->on_list)
+ return;
+
+ /*
+ * Ensure we either appear before our parent (if already
+ * enqueued) or force our parent to appear after us when it is
+ * enqueued. The fact that we always enqueue bottom-up
+ * reduces this to two cases and a special case for the root
+ * cfs_rq. Furthermore, it also means that we will always reset
+ * tmp_alone_branch either when the branch is connected
+ * to a tree or when we reach the top of the tree
+ */
+ if (cfs_rq->tg->parent &&
+ cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
/*
- * Ensure we either appear before our parent (if already
- * enqueued) or force our parent to appear after us when it is
- * enqueued. The fact that we always enqueue bottom-up
- * reduces this to two cases and a special case for the root
- * cfs_rq. Furthermore, it also means that we will always reset
- * tmp_alone_branch either when the branch is connected
- * to a tree or when we reach the beg of the tree
+ * If parent is already on the list, we add the child
+ * just before. Thanks to circular linked property of
+ * the list, this means to put the child at the tail
+ * of the list that starts by parent.
*/
- if (cfs_rq->tg->parent &&
- cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
- /*
- * If parent is already on the list, we add the child
- * just before. Thanks to circular linked property of
- * the list, this means to put the child at the tail
- * of the list that starts by parent.
- */
- list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
- &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
- /*
- * The branch is now connected to its tree so we can
- * reset tmp_alone_branch to the beginning of the
- * list.
- */
- rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
- } else if (!cfs_rq->tg->parent) {
- /*
- * cfs rq without parent should be put
- * at the tail of the list.
- */
- list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
- &rq->leaf_cfs_rq_list);
- /*
- * We have reach the beg of a tree so we can reset
- * tmp_alone_branch to the beginning of the list.
- */
- rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
- } else {
- /*
- * The parent has not already been added so we want to
- * make sure that it will be put after us.
- * tmp_alone_branch points to the beg of the branch
- * where we will add parent.
- */
- list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
- rq->tmp_alone_branch);
- /*
- * update tmp_alone_branch to points to the new beg
- * of the branch
- */
- rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
- }
-
- cfs_rq->on_list = 1;
+ list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+ &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
+ /*
+ * The branch is now connected to its tree so we can
+ * reset tmp_alone_branch to the beginning of the
+ * list.
+ */
+ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ } else if (!cfs_rq->tg->parent) {
+ /*
+ * cfs rq without parent should be put
+ * at the tail of the list.
+ */
+ list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+ &rq->leaf_cfs_rq_list);
+ /*
+ * We have reach the top of a tree so we can reset
+ * tmp_alone_branch to the beginning of the list.
+ */
+ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ } else {
+ /*
+ * The parent has not already been added so we want to
+ * make sure that it will be put after us.
+ * tmp_alone_branch points to the begin of the branch
+ * where we will add parent.
+ */
+ list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+ rq->tmp_alone_branch);
+ /*
+ * update tmp_alone_branch to points to the new begin
+ * of the branch
+ */
+ rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
}
+
+ cfs_rq->on_list = 1;
}

static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
@@ -352,7 +354,12 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
}
}

-/* Iterate through all leaf cfs_rq's on a runqueue: */
+static inline void assert_list_leaf_cfs_rq(struct rq *rq)
+{
+ SCHED_WARN_ON(rq->tmp_alone_branch != &rq->lead_cfs_rq_list);
+}
+
+/* Iterate through all cfs_rq's on a runqueue in bottom-up order */
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)

@@ -446,6 +453,10 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}

+static inline void assert_list_leaf_cfs_rq(struct rq *rq)
+{
+}
+
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)

@@ -5179,6 +5190,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

}

+ assert_list_leaf_cfs_rq(rq);
+
hrtick_update(rq);
}