[PATCH RFC 2/5] sched/fair: Skip frequency update if CPU about to idle

From: Joel Fernandes
Date: Sat Oct 28 2017 - 06:00:23 EST


Updating CPU frequency on last dequeue of a CPU is useless. Because the
utilization since CPU came out of idle can increase till the last dequeue, this
means we are requesting for a higher frequency before entering idle which is
not very meaningful or useful. It causes unwanted wakeups of the schedutil
governor kthread in slow-switch systems resulting in large number of wake ups
that could have been avoided. In an Android application playing music where the
music app's thread wakes up and sleeps periodically on an Android device, its
seen that the frequency increases slightly on the dequeue and is reduced when
the task wakes up again. This oscillation continues between 300Mhz and 350Mhz,
and while the task is running, its at 300MHz the whole time. This is pointless.
Adding to that, these are unnecessary wake ups. Infact most of the time when
the sugov thread wakes up, all the CPUs are idle - so it can hurt power by
disturbing the cluster when it is idling.

This patch prevents a frequency update on the last dequeue. With this the
number of schedutil governor thread wake ups are reduces more than 2 times
(1389 -> 527).

Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx>
Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Joel Fernandes <joelaf@xxxxxxxxxx>
---
kernel/sched/fair.c | 25 ++++++++++++++++++++++---
kernel/sched/sched.h | 1 +
2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f97693fe8b6e..4c06e52935d3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3725,6 +3725,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
#define UPDATE_TG 0x1
#define SKIP_AGE_LOAD 0x2
#define DO_ATTACH 0x4
+#define SKIP_CPUFREQ 0x8

/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -3741,7 +3742,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
__update_load_avg_se(now, cpu, cfs_rq, se);

- decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !(flags & SKIP_CPUFREQ));
decayed |= propagate_entity_load_avg(se);

if (!se->avg.last_update_time && (flags & DO_ATTACH)) {
@@ -3839,6 +3840,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
#define UPDATE_TG 0x0
#define SKIP_AGE_LOAD 0x0
#define DO_ATTACH 0x0
+#define SKIP_CPUFREQ 0x0

static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
{
@@ -4060,6 +4062,8 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+ int update_flags;
+
/*
* Update run-time statistics of the 'current'.
*/
@@ -4073,7 +4077,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(cfs_rq, se, update_flags);
dequeue_runnable_load_avg(cfs_rq, se);

update_stats_dequeue(cfs_rq, se, flags);
@@ -5220,6 +5229,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
int task_sleep = flags & DEQUEUE_SLEEP;

+ if (task_sleep && rq->nr_running == 1)
+ flags |= DEQUEUE_IDLE;
+
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
@@ -5250,13 +5262,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}

for_each_sched_entity(se) {
+ int update_flags;
+
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running--;

if (cfs_rq_throttled(cfs_rq))
break;

- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(cfs_rq, se, update_flags);
update_cfs_group(se);
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8aa24b41f652..68f5cd102744 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1394,6 +1394,7 @@ extern const u32 sched_prio_to_wmult[40];
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */
+#define DEQUEUE_IDLE 0x10

#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02
--
2.15.0.rc2.357.g7e34df9404-goog