[RFC][PATCH 7/7] sched: debug muck -- not for merging
From: Peter Zijlstra
Date: Mon May 09 2016 - 06:58:47 EST
Add a few knobs to poke while playing with the new code.
Not-Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/sched/sysctl.h | 1
kernel/sched/fair.c | 86 ++++++++++++++++++++++++++++++++++---------
kernel/sched/features.h | 10 +++++
kernel/sysctl.c | 7 +++
4 files changed, 86 insertions(+), 18 deletions(-)
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -37,6 +37,7 @@ extern unsigned int sysctl_sched_migrati
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_sched_shares_window;
+extern unsigned int sysctl_sched_shift;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -114,6 +114,8 @@ unsigned int __read_mostly sysctl_sched_
unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+const_debug unsigned int sysctl_sched_shift = 9;
+
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
lw->weight += inc;
@@ -5354,18 +5356,24 @@ static inline int select_idle_smt(struct
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
- u64 avg_idle = this_rq()->avg_idle;
- u64 avg_cost = this_sd->avg_scan_cost;
u64 time, cost;
s64 delta;
int cpu, wrap;
- /*
- * Due to large variance we need a large fuzz factor; hackbench in
- * particularly is sensitive here.
- */
- if ((avg_idle / 512) < avg_cost)
- return -1;
+ if (sched_feat(AVG_CPU)) {
+ u64 avg_idle = this_rq()->avg_idle;
+ u64 avg_cost = this_sd->avg_scan_cost;
+
+ if (sched_feat(PRINT_AVG))
+ trace_printk("idle: %Ld cost: %Ld\n", avg_idle, avg_cost);
+
+ /*
+ * Due to large variance we need a large fuzz factor; hackbench in
+ * particularly is sensitive here.
+ */
+ if ((avg_idle >> sysctl_sched_shift) < avg_cost)
+ return -1;
+ }
time = local_clock();
@@ -5379,6 +5387,7 @@ static int select_idle_cpu(struct task_s
time = local_clock() - time;
cost = this_sd->avg_scan_cost;
delta = (s64)(time - cost) / 8;
+ /* trace_printk("time: %Ld cost: %Ld delta: %Ld\n", time, cost, delta); */
this_sd->avg_scan_cost += delta;
return cpu;
@@ -5390,7 +5399,7 @@ static int select_idle_cpu(struct task_s
static int select_idle_sibling(struct task_struct *p, int target)
{
struct sched_domain *sd;
- int i = task_cpu(p);
+ int start, i = task_cpu(p);
if (idle_cpu(target))
return target;
@@ -5401,21 +5410,62 @@ static int select_idle_sibling(struct ta
if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
return i;
+ start = target;
+ if (sched_feat(ORDER_IDLE))
+ start = per_cpu(sd_llc_id, target); /* first cpu in llc domain */
+
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
- i = select_idle_core(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
+ if (sched_feat(OLD_IDLE)) {
+ struct sched_group *sg;
- i = select_idle_cpu(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
+ for_each_lower_domain(sd) {
+ sg = sd->groups;
+ do {
+ if (!cpumask_intersects(sched_group_cpus(sg),
+ tsk_cpus_allowed(p)))
+ goto next;
+
+ /* Ensure the entire group is idle */
+ for_each_cpu(i, sched_group_cpus(sg)) {
+ if (i == target || !idle_cpu(i))
+ goto next;
+ }
- i = select_idle_smt(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
+ /*
+ * It doesn't matter which cpu we pick, the
+ * whole group is idle.
+ */
+ target = cpumask_first_and(sched_group_cpus(sg),
+ tsk_cpus_allowed(p));
+ goto done;
+next:
+ sg = sg->next;
+ } while (sg != sd->groups);
+ }
+done:
+ return target;
+ }
+
+ if (sched_feat(IDLE_CORE)) {
+ i = select_idle_core(p, sd, start);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
+ }
+
+ if (sched_feat(IDLE_CPU)) {
+ i = select_idle_cpu(p, sd, start);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
+ }
+
+ if (sched_feat(IDLE_SMT)) {
+ i = select_idle_smt(p, sd, start);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
+ }
return target;
}
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -69,3 +69,13 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
SCHED_FEAT(LB_MIN, false)
SCHED_FEAT(ATTACH_AGE_LOAD, true)
+SCHED_FEAT(OLD_IDLE, false)
+SCHED_FEAT(ORDER_IDLE, false)
+
+SCHED_FEAT(IDLE_CORE, true)
+SCHED_FEAT(IDLE_CPU, true)
+SCHED_FEAT(AVG_CPU, true)
+SCHED_FEAT(PRINT_AVG, false)
+
+SCHED_FEAT(IDLE_SMT, true)
+
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -334,6 +334,13 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "sched_shift",
+ .data = &sysctl_sched_shift,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "sched_nr_migrate",
.data = &sysctl_sched_nr_migrate,
.maxlen = sizeof(unsigned int),