[PATCH 1/3] sched,fair: Move record_wakee()

From: Peter Zijlstra
Date: Tue May 10 2016 - 13:49:40 EST


Since I want to make ->task_woken() conditional on the task getting
migrated, we cannot use it to call record_wakee().

Move it to select_task_rq_fair(), which gets called in almost all the
same conditions. The only exception is if the woken task (@p) is
cpu-bound (as per the nr_cpus_allowed test in select_task_rq()).

Cc: Pavan Kondeti <pkondeti@xxxxxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx>
Cc: Mike Galbraith <umgwanakikbuti@xxxxxxxxx>
Cc: Morten Rasmussen <morten.rasmussen@xxxxxxx>
Cc: Paul Turner <pjt@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: byungchul.park@xxxxxxx
Cc: Andrew Hunter <ahh@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 61 ++++++++++++++++++++++++++++------------------------
1 file changed, 33 insertions(+), 28 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4811,24 +4811,6 @@ static unsigned long cpu_avg_load_per_ta
return 0;
}

-static void record_wakee(struct task_struct *p)
-{
- /*
- * Rough decay (wiping) for cost saving, don't worry
- * about the boundary, really active task won't care
- * about the loss.
- */
- if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
- current->wakee_flips >>= 1;
- current->wakee_flip_decay_ts = jiffies;
- }
-
- if (current->last_wakee != p) {
- current->last_wakee = p;
- current->wakee_flips++;
- }
-}
-
static void task_waking_fair(struct task_struct *p)
{
struct sched_entity *se = &p->se;
@@ -4848,7 +4830,6 @@ static void task_waking_fair(struct task
#endif

se->vruntime -= min_vruntime;
- record_wakee(p);
}

#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4966,17 +4947,39 @@ static long effective_load(struct task_g

#endif

+static void record_wakee(struct task_struct *p)
+{
+ /*
+ * Only decay a single time; tasks that have less then 1 wakeup per
+ * jiffy will not have built up many flips.
+ */
+ if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
+ current->wakee_flips >>= 1;
+ current->wakee_flip_decay_ts = jiffies;
+ }
+
+ if (current->last_wakee != p) {
+ current->last_wakee = p;
+ current->wakee_flips++;
+ }
+}
+
/*
* Detect M:N waker/wakee relationships via a switching-frequency heuristic.
+ *
* A waker of many should wake a different task than the one last awakened
- * at a frequency roughly N times higher than one of its wakees. In order
- * to determine whether we should let the load spread vs consolodating to
- * shared cache, we look for a minimum 'flip' frequency of llc_size in one
- * partner, and a factor of lls_size higher frequency in the other. With
- * both conditions met, we can be relatively sure that the relationship is
- * non-monogamous, with partner count exceeding socket size. Waker/wakee
- * being client/server, worker/dispatcher, interrupt source or whatever is
- * irrelevant, spread criteria is apparent partner count exceeds socket size.
+ * at a frequency roughly N times higher than one of its wakees.
+ *
+ * In order to determine whether we should let the load spread vs consolidating
+ * to shared cache, we look for a minimum 'flip' frequency of llc_size in one
+ * partner, and a factor of lls_size higher frequency in the other.
+ *
+ * With both conditions met, we can be relatively sure that the relationship is
+ * non-monogamous, with partner count exceeding socket size.
+ *
+ * Waker/wakee being client/server, worker/dispatcher, interrupt source or
+ * whatever is irrelevant, spread criteria is apparent partner count exceeds
+ * socket size.
*/
static int wake_wide(struct task_struct *p)
{
@@ -5281,8 +5284,10 @@ select_task_rq_fair(struct task_struct *
int want_affine = 0;
int sync = wake_flags & WF_SYNC;

- if (sd_flag & SD_BALANCE_WAKE)
+ if (sd_flag & SD_BALANCE_WAKE) {
+ record_wakee(p);
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+ }

rcu_read_lock();
for_each_domain(cpu, tmp) {