[PATCH v3 2/3] sched/rt: add rt_rq utilization tracking

From: Vincent Guittot
Date: Wed Nov 22 2017 - 09:36:35 EST


schedutil governor relies on cfs_rq's util_avg to choose the OPP when cfs
tasks are running. When the CPU is overloaded by cfs and rt tasks, cfs tasks
are preempted by rt tasks and in this case util_avg reflects the remaining
capacity that is used by cfs task but not what cfs want to use. In such case,
schedutil can select a lower OPP whereas the CPU is overloaded. In order to
have a more accurate view of the utilization of the CPU, we track
the utilization that is "stolen" by RT tasks.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 2 ++
kernel/sched/pelt.c | 23 +++++++++++++++++++++++
kernel/sched/pelt.h | 7 +++++++
kernel/sched/rt.c | 9 +++++++++
kernel/sched/sched.h | 1 +
5 files changed, 42 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b88550e..57d486a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7029,6 +7029,7 @@ static void update_blocked_averages(int cpu)
if (cfs_rq_is_decayed(cfs_rq))
list_del_leaf_cfs_rq(cfs_rq);
}
+ update_rt_rq_load_avg(rq_clock_task(rq), cpu, &rq->rt, 0);
rq_unlock_irqrestore(rq, &rf);
}

@@ -7088,6 +7089,7 @@ static inline void update_blocked_averages(int cpu)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ update_rt_rq_load_avg(rq_clock_task(rq), cpu, &rq->rt, 0);
rq_unlock_irqrestore(rq, &rf);
}

diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index da6d84f..c8b5d23 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -306,3 +306,26 @@ int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)

return 0;
}
+
+/*
+ * rt_rq:
+ *
+ * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
+ * util_sum = cpu_scale * load_sum
+ * runnable_load_sum = load_sum
+ *
+ */
+
+int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running)
+{
+ if (___update_load_sum(now, cpu, &rt_rq->avg,
+ running,
+ running,
+ running)) {
+
+ ___update_load_avg(&rt_rq->avg, 1, 1);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index c312d8c..78a2107 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -3,6 +3,7 @@
int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se);
int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se);
int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
+int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running);

#else

@@ -12,6 +13,12 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
return 0;
}

+static inline int
+update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running)
+{
+ return 0;
+}
+
#endif


diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d8c43d7..2ddcc27 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -9,6 +9,8 @@
#include <linux/slab.h>
#include <linux/irq_work.h>

+#include "pelt.h"
+
int sched_rr_timeslice = RR_TIMESLICE;
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;

@@ -1569,6 +1571,10 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)

queue_push_tasks(rq);

+ if (p)
+ update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), rt_rq,
+ rq->curr->sched_class == &rt_sched_class);
+
return p;
}

@@ -1576,6 +1582,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);

+ update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);
+
/*
* The previous task needs to be made eligible for pushing
* if it is still active
@@ -2283,6 +2291,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
struct sched_rt_entity *rt_se = &p->rt;

update_curr_rt(rq);
+ update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);

watchdog(rq, p);

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6fefef6..c7bd5dd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -529,6 +529,7 @@ struct rt_rq {
int overloaded;
struct plist_head pushable_tasks;

+ struct sched_avg avg;
#endif /* CONFIG_SMP */
int rt_queued;

--
2.7.4