[PATCH 5/7] sched/fair: global idleness counter for cpu.headroom

From: Song Liu
Date: Mon Apr 08 2019 - 17:46:26 EST


This patch introduces a global idleness counter in fair.c for the
cpu.headroom knob. This counter is based on per cpu get_idle_time().

The counter is used via function call:

unsigned long cfs_global_idleness_update(u64 now, u64 period);

The function returns global idleness in fixed-point percentage since
previous call of the function. If the time between previous call of the
function is called and @now is shorter than @period, the function will
return idleness calculated in previous call.

cfs_global_idleness_update() will be called from a non-preemptible
context, struct cfs_global_idleness uses raw_spin_lock instead of
spin_lock.

Signed-off-by: Song Liu <songliubraving@xxxxxx>
---
fs/proc/stat.c | 4 +--
include/linux/kernel_stat.h | 2 ++
kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++
3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 80c305f206bb..b327ffdb169f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -23,7 +23,7 @@

#ifdef arch_idle_time

-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
{
u64 idle;

@@ -45,7 +45,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)

#else

-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
{
u64 idle, idle_usecs = -1ULL;

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 7ee2bb43b251..337135272391 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -97,4 +97,6 @@ extern void account_process_tick(struct task_struct *, int user);

extern void account_idle_ticks(unsigned long ticks);

+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
+
#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 65aa9d3b665f..49c68daffe7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -116,6 +116,62 @@ static unsigned int capacity_margin = 1280;
* (default: 5 msec, units: microseconds)
*/
unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
+
+/* tracking global idlenesss for cpu.headroom */
+struct cfs_global_idleness {
+ u64 prev_total_idle_time;
+ u64 prev_timestamp;
+ unsigned long idle_percent; /* fixed-point */
+ raw_spinlock_t lock;
+};
+
+static struct cfs_global_idleness global_idleness;
+
+/*
+ * Calculate global idleness in fixed-point percentage since previous call
+ * of the function. If the time between previous call of the function is
+ * called and @now is shorter than @period, return idleness calculated in
+ * previous call.
+ */
+static unsigned long cfs_global_idleness_update(u64 now, u64 period)
+{
+ u64 prev_timestamp, total_idle_time, delta_idle_time;
+ unsigned long idle_percent;
+ int cpu;
+
+ /*
+ * Fastpath: if idleness has been updated within the last period
+ * of time, just return previous idleness.
+ */
+ prev_timestamp = READ_ONCE(global_idleness.prev_timestamp);
+ if (prev_timestamp + period >= now)
+ return READ_ONCE(global_idleness.idle_percent);
+
+ raw_spin_lock_irq(&global_idleness.lock);
+ if (global_idleness.prev_timestamp + period >= now) {
+ idle_percent = global_idleness.idle_percent;
+ goto out;
+ }
+
+ /* Slowpath: calculate the average idleness since prev_timestamp */
+ total_idle_time = 0;
+ for_each_online_cpu(cpu)
+ total_idle_time += get_idle_time(&kcpustat_cpu(cpu), cpu);
+
+ delta_idle_time = total_idle_time -
+ global_idleness.prev_total_idle_time;
+
+ idle_percent = div64_u64((delta_idle_time << FSHIFT) * 100,
+ num_online_cpus() *
+ (now - global_idleness.prev_timestamp));
+
+ WRITE_ONCE(global_idleness.prev_total_idle_time, total_idle_time);
+ WRITE_ONCE(global_idleness.prev_timestamp, now);
+ WRITE_ONCE(global_idleness.idle_percent, idle_percent);
+out:
+ raw_spin_unlock_irq(&global_idleness.lock);
+ return idle_percent;
+}
#endif

static inline void update_load_add(struct load_weight *lw, unsigned long inc)
@@ -4293,6 +4349,11 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
cfs_b->runtime = cfs_b->quota;
cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
cfs_b->expires_seq++;
+
+ if (cfs_b->target_idle == 0)
+ return;
+
+ cfs_global_idleness_update(now, cfs_b->period);
}

static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -10676,4 +10737,7 @@ __init void init_sched_fair_class(void)
#endif
#endif /* SMP */

+#ifdef CONFIG_CFS_BANDWIDTH
+ raw_spin_lock_init(&global_idleness.lock);
+#endif
}
--
2.17.1