Re: [PATCH] sched/fair: Fix ftq noise bench regression

From: Vincent Guittot
Date: Wed Mar 22 2017 - 05:32:56 EST


On 21 March 2017 at 18:46, Dietmar Eggemann <dietmar.eggemann@xxxxxxx> wrote:
> Hi Vincent,
>
> On 17/03/17 13:47, Vincent Guittot wrote:
>
> [...]
>
>> Reported-by: ying.huang@xxxxxxxxxxxxxxx
>> Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
>> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")
>
> I thought I can see a difference by running:
>
> perf stat --null --repeat 10 -- perf bench sched messaging -g 50 -l
> 5000
>
> on an Ubuntu 16.10 server system.
>
> Number of entries in the rq->leaf_cfs_rq_list per cpu: ~40
>
> Target: Intel i5-3320M (4 logical cpus)
>
> tip/sched/core: 42.119140365 seconds time elapsed ( +- 0.33% )
>
> + patch : 42.089557108 seconds time elapsed ( +- 0.37% )
>
> Maybe I need a CPU with more 'logical cpus' or a different test case.
> Anyway, couldn't spot any regression.

ok. I haven't been able to reproduce the regression on my platforms too

>
>> ---
>> kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 36 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 2805bd7..007df59 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>> return 1;
>> }
>>
>> +/*
>> + * Check if we need to update the load and the utilization of a blocked
>> + * group_entity
>> + */
>> +static inline bool skip_blocked_update(struct sched_entity *se)
>> +{
>> + struct cfs_rq *gcfs_rq = group_cfs_rq(se);
>> +
>> + /*
>> + * If sched_entity still have not null load or utilization, we have to
>> + * decay it.
>> + */
>> + if (se->avg.load_avg || se->avg.util_avg)
>> + return false;
>> +
>> + /*
>> + * If there is a pending propagation, we have to update the load and
>> + * the utilizaion of the sched_entity
>
> nit pick: s/utilizaion/utilization

yes


>
>> + */
>> + if (gcfs_rq->propagate_avg)
>> + return false;
>> +
>> + /*
>> + * Other wise, the load and the utilization of the sched_entity is
>> + * already null and there is no pending propagation so it will be a
>> + * waste of time to try to decay it.
>> + */
>> + return true;
>> +}
>> +
>> #else /* CONFIG_FAIR_GROUP_SCHED */
>>
>> static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
>> @@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
>> * list_add_leaf_cfs_rq() for details.
>> */
>> for_each_leaf_cfs_rq(rq, cfs_rq) {
>> + struct sched_entity *se;
>> +
>> /* throttled entities do not contribute to load */
>> if (throttled_hierarchy(cfs_rq))
>> continue;
>> @@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
>> if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>> update_tg_load_avg(cfs_rq, 0);
>>
>> - /* Propagate pending load changes to the parent */
>> - if (cfs_rq->tg->se[cpu])
>> - update_load_avg(cfs_rq->tg->se[cpu], 0);
>> + /* Propagate pending load changes to the parent if any */
>> + se = cfs_rq->tg->se[cpu];
>> + if (se && !skip_blocked_update(se))
>> + update_load_avg(se, 0);
>> }
>> rq_unlock_irqrestore(rq, &rf);
>> }
>>
>
> Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
> Saves a couple of patch lines:

Are you sure that we are saving some patch lines ?

I tend to agree on the name and but not on parameters.
IMO, it's easier to understand the purpose of
blocked_update_needed(se) compared to blocked_update_needed(cpu,
cfs_rq)


>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 007df5953d1a..8001eeb4ec18 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3177,30 +3177,34 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
> * Check if we need to update the load and the utilization of a blocked
> * group_entity
> */
> -static inline bool skip_blocked_update(struct sched_entity *se)
> +static inline bool blocked_update_needed(int cpu, struct cfs_rq *cfs_rq)
> {
> - struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> + struct sched_entity *se = cfs_rq->tg->se[cpu];
> +
> + /* cfs_rq of a root task_group has no sched_entity counterpart */
> + if (!se)
> + return false;
>
> /*
> * If sched_entity still have not null load or utilization, we have to
> * decay it.
> */
> if (se->avg.load_avg || se->avg.util_avg)
> - return false;
> + return true;
>
> /*
> * If there is a pending propagation, we have to update the load and
> * the utilizaion of the sched_entity
> */
> - if (gcfs_rq->propagate_avg)
> - return false;
> + if (cfs_rq->propagate_avg)
> + return true;
>
> /*
> * Other wise, the load and the utilization of the sched_entity is
> * already null and there is no pending propagation so it will be a
> * waste of time to try to decay it.
> */
> - return true;
> + return false;
> }
>
> #else /* CONFIG_FAIR_GROUP_SCHED */
> @@ -6991,8 +6995,6 @@ static void update_blocked_averages(int cpu)
> * list_add_leaf_cfs_rq() for details.
> */
> for_each_leaf_cfs_rq(rq, cfs_rq) {
> - struct sched_entity *se;
> -
> /* throttled entities do not contribute to load */
> if (throttled_hierarchy(cfs_rq))
> continue;
> @@ -7001,9 +7003,8 @@ static void update_blocked_averages(int cpu)
> update_tg_load_avg(cfs_rq, 0);
>
> /* Propagate pending load changes to the parent if any */
> - se = cfs_rq->tg->se[cpu];
> - if (se && !skip_blocked_update(se))
> - update_load_avg(se, 0);
> + if (blocked_update_needed(cpu, cfs_rq))
> + update_load_avg(cfs_rq->tg->se[cpu], 0);
> }
> rq_unlock_irqrestore(rq, &rf);
> }
>
>
>
>