Re: [PATCH v3 2/4] sched: account number of SCHED_IDLE entities on each cfs_rq

From: Vincent Guittot
Date: Tue Aug 24 2021 - 03:58:03 EST


On Fri, 20 Aug 2021 at 03:04, Josh Don <joshdon@xxxxxxxxxx> wrote:
>
> Adds cfs_rq->idle_nr_running, which accounts the number of idle entities
> directly enqueued on the cfs_rq.
>
> Signed-off-by: Josh Don <joshdon@xxxxxxxxxx>

Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>

> ---
> kernel/sched/debug.c | 2 ++
> kernel/sched/fair.c | 25 ++++++++++++++++++++++++-
> kernel/sched/sched.h | 1 +
> 3 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 49716228efb4..33538579db9a 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -608,6 +608,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
> cfs_rq->nr_spread_over);
> SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
> SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
> + SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running",
> + cfs_rq->idle_nr_running);
> SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running",
> cfs_rq->idle_h_nr_running);
> SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5aa3cfd15a2e..19a9244c140f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -2995,6 +2995,8 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
> }
> #endif
> cfs_rq->nr_running++;
> + if (se_is_idle(se))
> + cfs_rq->idle_nr_running++;
> }
>
> static void
> @@ -3008,6 +3010,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
> }
> #endif
> cfs_rq->nr_running--;
> + if (se_is_idle(se))
> + cfs_rq->idle_nr_running--;
> }
>
> /*
> @@ -5573,6 +5577,17 @@ static int sched_idle_rq(struct rq *rq)
> rq->nr_running);
> }
>
> +/*
> + * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use
> + * of idle_nr_running, which does not consider idle descendants of normal
> + * entities.
> + */
> +static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq)
> +{
> + return cfs_rq->nr_running &&
> + cfs_rq->nr_running == cfs_rq->idle_nr_running;
> +}
> +
> #ifdef CONFIG_SMP
> static int sched_idle_cpu(int cpu)
> {
> @@ -11556,7 +11571,7 @@ int sched_group_set_idle(struct task_group *tg, long idle)
> for_each_possible_cpu(i) {
> struct rq *rq = cpu_rq(i);
> struct sched_entity *se = tg->se[i];
> - struct cfs_rq *grp_cfs_rq = tg->cfs_rq[i];
> + struct cfs_rq *parent_cfs_rq, *grp_cfs_rq = tg->cfs_rq[i];
> bool was_idle = cfs_rq_is_idle(grp_cfs_rq);
> long idle_task_delta;
> struct rq_flags rf;
> @@ -11567,6 +11582,14 @@ int sched_group_set_idle(struct task_group *tg, long idle)
> if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq)))
> goto next_cpu;
>
> + if (se->on_rq) {
> + parent_cfs_rq = cfs_rq_of(se);
> + if (cfs_rq_is_idle(grp_cfs_rq))
> + parent_cfs_rq->idle_nr_running++;
> + else
> + parent_cfs_rq->idle_nr_running--;
> + }
> +
> idle_task_delta = grp_cfs_rq->h_nr_running -
> grp_cfs_rq->idle_h_nr_running;
> if (!cfs_rq_is_idle(grp_cfs_rq))
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 8dfad8fb756c..6af039e433fb 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -530,6 +530,7 @@ struct cfs_rq {
> struct load_weight load;
> unsigned int nr_running;
> unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
> + unsigned int idle_nr_running; /* SCHED_IDLE */
> unsigned int idle_h_nr_running; /* SCHED_IDLE */
>
> u64 exec_clock;
> --
> 2.33.0.rc2.250.ged5fa647cd-goog
>