On Thu, May 08, 2014 at 01:23:29PM -0400, riel@xxxxxxxxxx wrote:
@@ -930,7 +987,7 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
*/
static inline unsigned long task_weight(struct task_struct *p, int nid)
{
- unsigned long total_faults;
+ unsigned long total_faults, score;
if (!p->numa_faults_memory)
return 0;
@@ -940,15 +997,32 @@ static inline unsigned long task_weight(struct task_struct *p, int nid)
if (!total_faults)
return 0;
- return 1000 * task_faults(p, nid) / total_faults;
+ score = 1000 * task_faults(p, nid);
+ score += nearby_nodes_score(p, nid, true);
+
+ score /= total_faults;
+
+ return score;
}
static inline unsigned long group_weight(struct task_struct *p, int nid)
{
- if (!p->numa_group || !p->numa_group->total_faults)
+ unsigned long total_faults, score;
+
+ if (!p->numa_group)
+ return 0;
+
+ total_faults = p->numa_group->total_faults;
+
+ if (!total_faults)
return 0;
- return 1000 * group_faults(p, nid) / p->numa_group->total_faults;
+ score = 1000 * group_faults(p, nid);
+ score += nearby_nodes_score(p, nid, false);
+
+ score /= total_faults;
+
+ return score;
}
OK, and that's just sad..
See task_numa_placement(), which does:
for_each_online_node(nid) {
weight = task_weight(p, nid) + group_weight(p, nid);
if (weight > max_weight) {
max_weight = weight;
max_nid = nid;
}
}
So not only is that loop now O(nr_nodes^2), the inner loops doubly
iterates all nodes.
Also, {task,group}_weight() functions were like cheap-ish (/me mumbles
something about people using !2^n scaling factors for no sane reason).
And they're used all over with that in mind.
But look what you did to migrate_improves_locality(), that will now
iterate all nodes _4_ times, and its called for every single task we try
and migrate during load balance, while holding rq->lock.