[GIT pull] scheduler updates for 4.5

From: Thomas Gleixner
Date: Sun Jan 31 2016 - 07:42:30 EST


Linus,

please pull the latest sched-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched-urgent-for-linus

Three small fixes in the scheduler/core
- Use after free in the numa code
- Crash in the numa init code
- A simple spelling fix

Thanks,

tglx

------------------>
Gavin Guo (1):
sched/numa: Fix use-after-free bug in the task_numa_compare

Raghavendra K T (1):
sched: Fix crash in sched_init_numa()

Zhen Lei (1):
pid: Fix spelling in comments


kernel/pid.c | 2 +-
kernel/sched/core.c | 2 +-
kernel/sched/fair.c | 30 +++++++++++++++++++++++-------
3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index 78b3d9f80d44..e793d091f680 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -588,7 +588,7 @@ void __init pidhash_init(void)

void __init pidmap_init(void)
{
- /* Veryify no one has done anything silly */
+ /* Verify no one has done anything silly: */
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

/* bump default and minimum pid_max based on number of cpus */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253adb3c36..474658b51fe6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)

sched_domains_numa_masks[i][j] = mask;

- for (k = 0; k < nr_node_ids; k++) {
+ for_each_node(k) {
if (node_distance(j, k) > sched_domains_numa_distance[i])
continue;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1926606ece80..56b7d4b83947 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
{
if (env->best_task)
put_task_struct(env->best_task);
- if (p)
- get_task_struct(p);

env->best_task = p;
env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
long imp = env->p->numa_group ? groupimp : taskimp;
long moveimp = imp;
int dist = env->dist;
+ bool assigned = false;

rcu_read_lock();

raw_spin_lock_irq(&dst_rq->lock);
cur = dst_rq->curr;
/*
- * No need to move the exiting task, and this ensures that ->curr
- * wasn't reaped and thus get_task_struct() in task_numa_assign()
- * is safe under RCU read lock.
- * Note that rcu_read_lock() itself can't protect from the final
- * put_task_struct() after the last schedule().
+ * No need to move the exiting task or idle task.
*/
if ((cur->flags & PF_EXITING) || is_idle_task(cur))
cur = NULL;
+ else {
+ /*
+ * The task_struct must be protected here to protect the
+ * p->numa_faults access in the task_weight since the
+ * numa_faults could already be freed in the following path:
+ * finish_task_switch()
+ * --> put_task_struct()
+ * --> __put_task_struct()
+ * --> task_numa_free()
+ */
+ get_task_struct(cur);
+ }
+
raw_spin_unlock_irq(&dst_rq->lock);

/*
@@ -1386,6 +1394,7 @@ balance:
*/
if (!load_too_imbalanced(src_load, dst_load, env)) {
imp = moveimp - 1;
+ put_task_struct(cur);
cur = NULL;
goto assign;
}
@@ -1411,9 +1420,16 @@ balance:
env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);

assign:
+ assigned = true;
task_numa_assign(env, cur, imp);
unlock:
rcu_read_unlock();
+ /*
+ * The dst_rq->curr isn't assigned. The protection for task_struct is
+ * finished.
+ */
+ if (cur && !assigned)
+ put_task_struct(cur);
}

static void task_numa_find_cpu(struct task_numa_env *env,