[RFC v2 6/8] sched/tune: compute and keep track of per CPU boost value

From: Patrick Bellasi
Date: Thu Oct 27 2016 - 13:41:42 EST


When per task boosting is enabled, we can have multiple RUNNABLE tasks
which are concurrently scheduled on the same CPU but each one with a
different boost value.
For example, we could have a scenarios like this:

Task SchedTune CGroup Boost Value
T1 root 0
T2 low-priority 10
T3 interactive 90

In these conditions we expect a CPU to be configured according to a
proper "aggregation" of the required boost values for all the tasks
currently RUNNABLE on this CPU.

A simple aggregation function is the one which tracks the MAX boost
value for all the tasks RUNNABLE on a CPU. This approach allows to
always satisfy the most boost demanding task while at the same time:
a) boosting all its co-scheduled tasks, thus reducing potential
side-effects on most boost demanding tasks.
b) reduces the number of frequency switch requested by schedutil,
thus being more friendly to architectures with slow frequency
switching times.

Every time a task enters/exits the RQ of a CPU the max boost value
should be updated considering all the boost groups currently "affecting"
that CPU, i.e. which have at least one RUNNABLE task currently allocated
on a RQ of that CPU.

This patch introduces the required support to keep track of the boost
groups currently affecting a CPU. Thanks to the limited number of boost
groups, a small and memory efficient per-cpu array of boost groups
values (cpu_boost_groups) is updated by schedtune_boostgroup_update()
but only when a schedtune CGroup boost value is changed. However, this
is expected to be an infrequent operation, perhaps done just one time at
system boot time, or whenever user-space need to tune the boost value
for a specific group of tasks (e.g. touch boost behavior on Android
systems).

Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Patrick Bellasi <patrick.bellasi@xxxxxxx>
---
kernel/sched/fair.c | 2 +-
kernel/sched/tune.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/tune.h | 14 ++++++++++
3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26c3911..313a815 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5581,7 +5581,7 @@ schedtune_margin(unsigned long signal, unsigned int boost)
static inline unsigned long
schedtune_cpu_margin(unsigned long util, int cpu)
{
- unsigned int boost = get_sysctl_sched_cfs_boost();
+ unsigned int boost = schedtune_cpu_boost(cpu);

if (boost == 0)
return 0UL;
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 4eaea1d..6a51a4d 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -104,6 +104,73 @@ struct boost_groups {
/* Boost groups affecting each CPU in the system */
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);

+static void
+schedtune_cpu_update(int cpu)
+{
+ struct boost_groups *bg;
+ unsigned int boost_max;
+ int idx;
+
+ bg = &per_cpu(cpu_boost_groups, cpu);
+
+ /* The root boost group is always active */
+ boost_max = bg->group[0].boost;
+ for (idx = 1; idx < boostgroups_max; ++idx) {
+ /*
+ * A boost group affects a CPU only if it has
+ * RUNNABLE tasks on that CPU
+ */
+ if (bg->group[idx].tasks == 0)
+ continue;
+ boost_max = max(boost_max, bg->group[idx].boost);
+ }
+
+ bg->boost_max = boost_max;
+}
+
+static void
+schedtune_boostgroup_update(int idx, int boost)
+{
+ struct boost_groups *bg;
+ int cur_boost_max;
+ int old_boost;
+ int cpu;
+
+ /* Update per CPU boost groups */
+ for_each_possible_cpu(cpu) {
+ bg = &per_cpu(cpu_boost_groups, cpu);
+
+ /*
+ * Keep track of current boost values to compute the per CPU
+ * maximum only when it has been affected by the new value of
+ * the updated boost group
+ */
+ cur_boost_max = bg->boost_max;
+ old_boost = bg->group[idx].boost;
+
+ /* Update the boost value of this boost group */
+ bg->group[idx].boost = boost;
+
+ /* Check if this update increase current max */
+ if (boost > cur_boost_max && bg->group[idx].tasks) {
+ bg->boost_max = boost;
+ continue;
+ }
+
+ /* Check if this update has decreased current max */
+ if (cur_boost_max == old_boost && old_boost > boost)
+ schedtune_cpu_update(cpu);
+ }
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+ struct boost_groups *bg;
+
+ bg = &per_cpu(cpu_boost_groups, cpu);
+ return bg->boost_max;
+}
+
static u64
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
@@ -123,6 +190,9 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
st->boost = boost;
if (css == &root_schedtune.css)
sysctl_sched_cfs_boost = boost;
+ /* Update CPU boost */
+ schedtune_boostgroup_update(st->idx, st->boost);
+
return 0;
}

@@ -199,6 +269,9 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
static void
schedtune_boostgroup_release(struct schedtune *st)
{
+ /* Reset this boost group */
+ schedtune_boostgroup_update(st->idx, 0);
+
/* Keep track of allocated boost groups */
allocated_group[st->idx] = NULL;
}
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
index 515d02a..e936b91 100644
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -10,4 +10,18 @@

extern struct reciprocal_value schedtune_spc_rdiv;

+#ifdef CONFIG_CGROUP_SCHED_TUNE
+
+int schedtune_cpu_boost(int cpu);
+
+#else /* CONFIG_CGROUP_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu) get_sysctl_sched_cfs_boost()
+
+#endif /* CONFIG_CGROUP_SCHED_TUNE */
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu) 0
+
#endif /* CONFIG_SCHED_TUNE */
--
2.10.1