[tip: sched/core] sched/fair: Implement ENQUEUE_DELAYED

From: tip-bot2 for Peter Zijlstra
Date: Sun Aug 18 2024 - 02:24:49 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 781773e3b68031bd001c0c18aa72e8470c225ebd
Gitweb: https://git.kernel.org/tip/781773e3b68031bd001c0c18aa72e8470c225ebd
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu, 23 May 2024 11:57:43 +02:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Sat, 17 Aug 2024 11:06:43 +02:00

sched/fair: Implement ENQUEUE_DELAYED

Doing a wakeup on a delayed dequeue task is about as simple as it
sounds -- remove the delayed mark and enjoy the fact it was actually
still on the runqueue.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx>
Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20240727105029.888107381@xxxxxxxxxxxxx
---
kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a4f1f79..25b14df 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5290,6 +5290,9 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
static inline bool cfs_bandwidth_used(void);

static void
+requeue_delayed_entity(struct sched_entity *se);
+
+static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
bool curr = cfs_rq->curr == se;
@@ -5922,8 +5925,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
for_each_sched_entity(se) {
struct cfs_rq *qcfs_rq = cfs_rq_of(se);

- if (se->on_rq)
+ if (se->on_rq) {
+ SCHED_WARN_ON(se->sched_delayed);
break;
+ }
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);

if (cfs_rq_is_idle(group_cfs_rq(se)))
@@ -6773,6 +6778,22 @@ static int sched_idle_cpu(int cpu)
}
#endif

+static void
+requeue_delayed_entity(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /*
+ * se->sched_delayed should imply: se->on_rq == 1.
+ * Because a delayed entity is one that is still on
+ * the runqueue competing until elegibility.
+ */
+ SCHED_WARN_ON(!se->sched_delayed);
+ SCHED_WARN_ON(!se->on_rq);
+
+ se->sched_delayed = 0;
+}
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -6787,6 +6808,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int task_new = !(flags & ENQUEUE_WAKEUP);
int rq_h_nr_running = rq->cfs.h_nr_running;

+ if (flags & ENQUEUE_DELAYED) {
+ requeue_delayed_entity(se);
+ return;
+ }
+
/*
* The code below (indirectly) updates schedutil which looks at
* the cfs_rq utilization to select a frequency.
@@ -6804,8 +6830,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);

for_each_sched_entity(se) {
- if (se->on_rq)
+ if (se->on_rq) {
+ if (se->sched_delayed)
+ requeue_delayed_entity(se);
break;
+ }
cfs_rq = cfs_rq_of(se);
enqueue_entity(cfs_rq, se, flags);