[PATCH RFC v4] cpufreq: schedutil: Make iowait boost more energy efficient
From: Joel Fernandes
Date: Sun Jul 09 2017 - 13:08:59 EST
Currently the iowait_boost feature in schedutil makes the frequency go to max.
This feature was added to handle a case that Peter described where the
throughput of operations involving continuous I/O requests [1] is reduced due
to running at a lower frequency, however the lower throughput itself causes
utilization to be low and hence causing frequency to be low hence its "stuck".
Instead of going to max, its also possible to achieve the same effect by
ramping up to max if there are repeated in_iowait wakeups happening. This patch
is an attempt to do that. We start from a lower frequency (iowait_boost_min)
and double the boost for every consecutive iowait update until we reach the
maximum iowait boost frequency (iowait_boost_max).
I ran a synthetic test on an x86 machine with intel_pstate in passive mode
using schedutil. The patch achieves the desired effect as the existing
behavior. Also tested on ARM64 platform and see that there the transient iowait
requests aren't causing frequency spikes.
[1] https://patchwork.kernel.org/patch/9735885/
Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
Cc: Len Brown <lenb@xxxxxxxxxx>
Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx>
Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Joel Fernandes <joelaf@xxxxxxxxxx>
---
Changes since v1:
- not using tunables to plainly turn off iowait boost anymore
kernel/sched/cpufreq_schedutil.c | 52 ++++++++++++++++++++++++++++++++++------
1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 622eed1b7658..4d9e8b96bed1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -53,7 +53,9 @@ struct sugov_cpu {
struct update_util_data update_util;
struct sugov_policy *sg_policy;
+ bool prev_iowait_boost;
unsigned long iowait_boost;
+ unsigned long iowait_boost_min;
unsigned long iowait_boost_max;
u64 last_update;
@@ -168,22 +170,47 @@ static void sugov_get_util(unsigned long *util, unsigned long *max)
*max = cfs_max;
}
+static void sugov_decay_iowait_boost(struct sugov_cpu *sg_cpu)
+{
+ sg_cpu->iowait_boost >>= 1;
+
+ if (sg_cpu->iowait_boost < sg_cpu->iowait_boost_min)
+ sg_cpu->iowait_boost = 0;
+}
+
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
if (flags & SCHED_CPUFREQ_IOWAIT) {
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ /* Remember for next time that we did an iowait boost */
+ sg_cpu->prev_iowait_boost = true;
+ if (sg_cpu->iowait_boost) {
+ sg_cpu->iowait_boost <<= 1;
+ sg_cpu->iowait_boost = min(sg_cpu->iowait_boost,
+ sg_cpu->iowait_boost_max);
+ } else {
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_min;
+ }
} else if (sg_cpu->iowait_boost) {
s64 delta_ns = time - sg_cpu->last_update;
/* Clear iowait_boost if the CPU apprears to have been idle. */
if (delta_ns > TICK_NSEC)
sg_cpu->iowait_boost = 0;
+
+ /*
+ * Since we don't decay iowait_boost when its consumed during
+ * the previous SCHED_CPUFREQ_IOWAIT update, decay it now.
+ */
+ if (sg_cpu->prev_iowait_boost) {
+ sugov_decay_iowait_boost(sg_cpu);
+ sg_cpu->prev_iowait_boost = false;
+ }
}
}
static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
- unsigned long *max)
+ unsigned long *max, unsigned int flags)
{
unsigned long boost_util = sg_cpu->iowait_boost;
unsigned long boost_max = sg_cpu->iowait_boost_max;
@@ -195,7 +222,16 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
*util = boost_util;
*max = boost_max;
}
- sg_cpu->iowait_boost >>= 1;
+
+ /*
+ * Incase iowait boost just happened on this CPU, don't reduce it right
+ * away since then the iowait boost will never increase on subsequent
+ * in_iowait wakeups.
+ */
+ if (flags & SCHED_CPUFREQ_IOWAIT && this_cpu_ptr(&sugov_cpu) == sg_cpu)
+ return;
+
+ sugov_decay_iowait_boost(sg_cpu);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -233,7 +269,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
next_f = policy->cpuinfo.max_freq;
} else {
sugov_get_util(&util, &max);
- sugov_iowait_boost(sg_cpu, &util, &max);
+ sugov_iowait_boost(sg_cpu, &util, &max, flags);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -245,7 +281,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
sugov_update_commit(sg_policy, time, next_f);
}
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
@@ -279,7 +316,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
max = j_max;
}
- sugov_iowait_boost(j_sg_cpu, &util, &max);
+ sugov_iowait_boost(j_sg_cpu, &util, &max, flags);
}
return get_next_freq(sg_policy, util, max);
@@ -308,7 +345,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
if (flags & SCHED_CPUFREQ_RT_DL)
next_f = sg_policy->policy->cpuinfo.max_freq;
else
- next_f = sugov_next_freq_shared(sg_cpu, time);
+ next_f = sugov_next_freq_shared(sg_cpu, time, flags);
sugov_update_commit(sg_policy, time, next_f);
}
@@ -612,6 +649,7 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->sg_policy = sg_policy;
sg_cpu->flags = SCHED_CPUFREQ_RT;
+ sg_cpu->iowait_boost_min = policy->cpuinfo.min_freq;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
policy_is_shared(policy) ?
--
2.13.2.725.g09c95d1e9-goog