On 01/02/2024 14:11, Hongyan Xia wrote:
[...]
/*
* The code below (indirectly) updates schedutil which looks at
@@ -6769,6 +6770,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_UCLAMP_TASK
util_uclamp_enqueue(&rq->cfs.avg, p);
update_util_uclamp(0, 0, 0, &rq->cfs.avg, p);
+ if (migrated)
IMHO, you don't need 'bool __maybe_unused migrated'. You can use:
if (flags & ENQUEUE_MIGRATED)
+ rq->root_cfs_util_uclamp += p->se.avg.util_avg_uclamp;
+ rq->root_cfs_util_uclamp = max(rq->root_cfs_util_uclamp,
+ rq->cfs.avg.util_avg_uclamp);
/* TODO: Better skip the frequency update in the for loop above. */
cpufreq_update_util(rq, 0);
#endif
@@ -8252,6 +8257,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
migrate_se_pelt_lag(se);
}
+ remove_root_cfs_util_uclamp(p);
You can't always do this here. In the '!task_on_rq_migrating()' case we
don't hold the 'old' rq->lock.
Have a look into remove_entity_load_avg() for what we do for PELT in
this case.
And:
144d8487bc6e ("sched/fair: Implement synchonous PELT detach on load-balance migrate")
e1f078f50478 ("sched/fair: Combine detach into dequeue when migrating task")
@@ -3081,6 +3081,8 @@ static inline void remove_root_cfs_util_uclamp(struct task_struct *p)
unsigned int root_util = READ_ONCE(rq->root_cfs_util_uclamp);
unsigned int p_util = READ_ONCE(p->se.avg.util_avg_uclamp), new_util;
+ lockdep_assert_rq_held(task_rq(p));
+
new_util = (root_util > p_util) ? root_util - p_util : 0;
new_util = max(new_util, READ_ONCE(rq->cfs.avg.util_avg_uclamp));
WRITE_ONCE(rq->root_cfs_util_uclamp, new_util);
[...]
/* avg must belong to the queue this se is on. */
-void update_util_uclamp(struct sched_avg *avg, struct task_struct *p)
+void update_util_uclamp(u64 now,
+ u64 last_update_time,
+ u32 period_contrib,
+ struct sched_avg *avg,
+ struct task_struct *p)
{
I was wondering why you use such a long parameter list for this
function.
IMHO
update_util_uclamp(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
would work as well. You could check whether se represents a task inside
update_util_uclamp() as well as get last_update_time and period_contrib.
The only reason I see is that you want to use this function for the RT
class as well later, where you have to deal with 'struct rt_rq' and
'struct sched_rt_entity'.
IMHO, it's always better to keep the implementation to the minimum and
only introduce changes which are related to the functionality you
present. This would make reviewing so much easier.
unsigned int util, uclamp_min, uclamp_max;
int delta;
- if (!p->se.on_rq)
+ if (!p->se.on_rq) {
+ ___update_util_uclamp_towards(now,
+ last_update_time,
+ period_contrib,
+ &p->se.avg.util_avg_uclamp,
+ 0);
return;
+ }
You decay 'p->se.avg.util_avg_uclamp' which is not really related to
root_cfs_util_uclamp (patch header). IMHO, this would belong to 2/7.
This is the util_avg_uclamp handling for a se (task):
enqueue_task_fair()
util_uclamp_enqueue()
update_util_uclamp() (1)
if (!p->se.on_rq) (x)
___update_util_uclamp_towards() (2)
dequeue_task_fair()
util_uclamp_dequeue()
__update_load_avg_blocked_se()
update_util_uclamp()
(x)
__update_load_avg_se()
update_util_uclamp()
(x)
Why is it so unbalanced? Why do you need (1) and (2)?
Isn't this just an indication that the se util_avg_uclamp
is done at the wrong places?
Is there an other way to provide a task/rq signal as the base
for uclamp sum aggregation?
[...]