Re: [PATCH] sched/fair: fix pick_eevdf to always find the correct se

From: Abel Wu
Date: Wed Oct 11 2023 - 08:15:32 EST


On 9/30/23 8:09 AM, Benjamin Segall Wrote:
The old pick_eevdf could fail to find the actual earliest eligible
deadline when it descended to the right looking for min_deadline, but it
turned out that that min_deadline wasn't actually eligible. In that case
we need to go back and search through any left branches we skipped
looking for the actual best _eligible_ min_deadline.

This is more expensive, but still O(log n), and at worst should only
involve descending two branches of the rbtree.

I've run this through a userspace stress test (thank you
tools/lib/rbtree.c), so hopefully this implementation doesn't miss any
corner cases.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Signed-off-by: Ben Segall <bsegall@xxxxxxxxxx>
---
kernel/sched/fair.c | 72 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0c31cda0712f..77e9440b8ab3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -864,18 +864,20 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
*
* se->min_deadline = min(se->deadline, se->{left,right}->min_deadline)
*
* Which allows an EDF like search on (sub)trees.
*/
-static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
{
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
struct sched_entity *curr = cfs_rq->curr;
struct sched_entity *best = NULL;
+ struct sched_entity *best_left = NULL;
if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
curr = NULL;
+ best = curr;
/*
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
@@ -892,45 +894,87 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
node = node->rb_left;
continue;
}
/*
- * If this entity has an earlier deadline than the previous
- * best, take this one. If it also has the earliest deadline
- * of its subtree, we're done.
+ * Now we heap search eligible trees for the best (min_)deadline
*/
- if (!best || deadline_gt(deadline, best, se)) {
+ if (!best || deadline_gt(deadline, best, se))
best = se;
- if (best->deadline == best->min_deadline)
- break;
- }
/*
- * If the earlest deadline in this subtree is in the fully
- * eligible left half of our space, go there.
+ * Every se in a left branch is eligible, keep track of the
+ * branch with the best min_deadline
*/
+ if (node->rb_left) {
+ struct sched_entity *left = __node_2_se(node->rb_left);
+
+ if (!best_left || deadline_gt(min_deadline, best_left, left))
+ best_left = left;
+
+ /*
+ * min_deadline is in the left branch. rb_left and all
+ * descendants are eligible, so immediately switch to the second
+ * loop.
+ */
+ if (left->min_deadline == se->min_deadline)
+ break;
+ }
+
+ /* min_deadline is at this node, no need to look right */
+ if (se->deadline == se->min_deadline)
+ break;
+
+ /* else min_deadline is in the right branch. */
+ node = node->rb_right;
+ }
+
+ /*
+ * We ran into an eligible node which is itself the best.
+ * (Or nr_running == 0 and both are NULL)
+ */
+ if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0)
+ return best;
+
+ /*
+ * Now best_left and all of its children are eligible, and we are just
+ * looking for deadline == min_deadline
+ */
+ node = &best_left->run_node;
+ while (node) {
+ struct sched_entity *se = __node_2_se(node);
+
+ /* min_deadline is the current node */
+ if (se->deadline == se->min_deadline)
+ return se;

IMHO it would be better tiebreak on vruntime by moving this hunk to ..

+
+ /* min_deadline is in the left branch */
if (node->rb_left &&
__node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
node = node->rb_left;
continue;
}

.. here, thoughts?

+ /* else min_deadline is in the right branch */
node = node->rb_right;
}
+ return NULL;

Why not 'best'? Since ..

+}
- if (!best || (curr && deadline_gt(deadline, best, curr)))
- best = curr;
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *se = __pick_eevdf(cfs_rq);
- if (unlikely(!best)) {
+ if (!se) {
struct sched_entity *left = __pick_first_entity(cfs_rq);

.. cfs_rq->curr isn't considered here.

if (left) {
pr_err("EEVDF scheduling fail, picking leftmost\n");
return left;
}
}
- return best;
+ return se;
}
#ifdef CONFIG_SCHED_DEBUG
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{

The implementation of __pick_eevdf() now is quite complicated which
makes it really hard to maintain. I'm trying my best to refactor this
part, hopefully can do some help. Below is only for illustration, I
need to test more.

static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
{
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
struct sched_entity *curr = cfs_rq->curr;
struct sched_entity *best = NULL;
struct sched_entity *cand = NULL;
bool all_eligible = false;

if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
curr = NULL;

/*
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
return curr;

while (node) {
struct sched_entity *se = __node_2_se(node);

/*
* If this entity is not eligible, try the left subtree.
*/
if (!all_eligible && !entity_eligible(cfs_rq, se)) {
node = node->rb_left;
continue;
}

if (node->rb_left) {
struct sched_entity *left = __node_2_se(node->rb_left);

BUG_ON(left->min_deadline < se->min_deadline);

/* Tiebreak on vruntime */
if (left->min_deadline == se->min_deadline) {
node = node->rb_left;
all_eligible = true;
continue;
}

if (!all_eligible) {
/*
* We're going to search right subtree and the one
* with min_deadline can be non-eligible, so record
* the left subtree as a candidate.
*/
if (!cand || deadline_gt(min_deadline, cand, left))
cand = left;
}
}

/* min_deadline is at this node, no need to look right */
if (se->deadline == se->min_deadline) {
best = se;
break;
}

node = node->rb_right;

if (!node && cand) {
node = cand;
all_eligible = true;
cand = NULL;
}
}

if (!best || (curr && deadline_gt(deadline, best, curr)))
best = curr;

return best;
}