Re: [PATCH v2 3/6] cpufreq: schedutil: ensure max frequency while running RT/DL tasks

From: Viresh Kumar
Date: Wed Jul 05 2017 - 02:01:59 EST


On 04-07-17, 18:34, Patrick Bellasi wrote:
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 004ae18..98704d8 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -216,6 +216,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
> struct cpufreq_policy *policy = sg_policy->policy;
> unsigned long util, max;
> unsigned int next_f;
> + bool rt_mode;
> bool busy;
>
> /* Skip updates generated by sugov kthreads */
> @@ -230,7 +231,15 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
>
> busy = sugov_cpu_is_busy(sg_cpu);
>
> - if (flags & SCHED_CPUFREQ_RT_DL) {
> + /*
> + * While RT/DL tasks are running we do not want FAIR tasks to
> + * overvrite this CPU's flags, still we can update utilization and
> + * frequency (if required/possible) to be fair with these tasks.
> + */
> + rt_mode = task_has_dl_policy(current) ||
> + task_has_rt_policy(current) ||
> + (flags & SCHED_CPUFREQ_RT_DL);

We may want to create a separate inline function for above, as it is already
used twice in this patch.

But I was wondering if we can get some help from the scheduler to avoid such
code here. I understand that we don't want to do the aggregation in the
scheduler to keep it clean and keep such governor specific thing here.

But what about clearing the sched-class's flag from .pick_next_task() callback
when they return NULL ?

What about something like this instead (completely untested), with which we
don't need the 2/3 patch as well:

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index d2be2ccbb372..e81a6b5591f5 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -11,6 +11,10 @@
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)

+#define SCHED_CPUFREQ_CLEAR (1U << 31)
+#define SCHED_CPUFREQ_CLEAR_RT (SCHED_CPUFREQ_CLEAR | SCHED_CPUFREQ_RT)
+#define SCHED_CPUFREQ_CLEAR_DL (SCHED_CPUFREQ_CLEAR | SCHED_CPUFREQ_DL)
+
#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)

#ifdef CONFIG_CPU_FREQ
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 076a2e31951c..f32e15d59d62 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -218,6 +218,9 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
unsigned int next_f;
bool busy;

+ if (flags & SCHED_CPUFREQ_CLEAR)
+ return;
+
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;

@@ -296,7 +299,13 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,

sg_cpu->util = util;
sg_cpu->max = max;
- sg_cpu->flags = flags;
+
+ if (unlikely(flags & SCHED_CPUFREQ_CLEAR)) {
+ sg_cpu->flags &= ~(flags & ~SCHED_CPUFREQ_CLEAR);
+ return;
+ }
+
+ sg_cpu->flags |= flags;

sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce59015642..441d6153d654 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1203,8 +1203,10 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (prev->sched_class == &dl_sched_class)
update_curr_dl(rq);

- if (unlikely(!dl_rq->dl_nr_running))
+ if (unlikely(!dl_rq->dl_nr_running)) {
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_CLEAR_DL);
return NULL;
+ }

put_prev_task(rq, prev);

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 979b7341008a..bca9e4bb7ec4 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1556,8 +1556,10 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (prev->sched_class == &rt_sched_class)
update_curr_rt(rq);

- if (!rt_rq->rt_queued)
+ if (!rt_rq->rt_queued) {
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_CLEAR_RT);
return NULL;
+ }

put_prev_task(rq, prev);

--
viresh