[PATCH 3/10] explict migrate tasks for cpu removal

From: Shaohua Li
Date: Mon May 08 2006 - 01:47:18 EST



Make cpu_down explictly migrate tasks off dead cpus, this is to fix a dead
lock in bulk cpu hotremoval (An example of the deadlock is one dead cpu is
notifing udev the cpu is dead and khelper thread is on another dead cpu).
Detail info is in the code.

Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
Signed-off-by: Shaohua Li <shaohua.li@xxxxxxxxx>
---

linux-2.6.17-rc3-root/include/linux/sched.h | 1
linux-2.6.17-rc3-root/kernel/cpu.c | 7 ++
linux-2.6.17-rc3-root/kernel/sched.c | 67 +++++++++++++++++-----------
3 files changed, 48 insertions(+), 27 deletions(-)

diff -puN include/linux/sched.h~explict-migrate-tasks include/linux/sched.h
--- linux-2.6.17-rc3/include/linux/sched.h~explict-migrate-tasks 2006-05-07 07:45:14.000000000 +0800
+++ linux-2.6.17-rc3-root/include/linux/sched.h 2006-05-07 07:45:14.000000000 +0800
@@ -997,6 +997,7 @@ extern void sched_exec(void);

#ifdef CONFIG_HOTPLUG_CPU
extern void idle_task_exit(void);
+extern void migrate_tasks_off_dead_cpu(int cpu);
#else
static inline void idle_task_exit(void) {}
#endif
diff -puN kernel/cpu.c~explict-migrate-tasks kernel/cpu.c
--- linux-2.6.17-rc3/kernel/cpu.c~explict-migrate-tasks 2006-05-07 07:45:14.000000000 +0800
+++ linux-2.6.17-rc3-root/kernel/cpu.c 2006-05-07 07:45:14.000000000 +0800
@@ -173,7 +173,12 @@ int cpu_down(unsigned int cpu)
kthread_bind(p, get_cpu());
put_cpu();

- /* CPU is completely dead: tell everyone. Too late to complain. */
+ /*
+ * CPU is completely dead: tell everyone. Too late to complain.
+ * we explictly call migrate_tasks here, otherwise there is a deadlock,
+ * one cpu's notifier handler is waiting for tasks in other dead CPUs
+ */
+ migrate_tasks_off_dead_cpu(cpu);
if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
diff -puN kernel/sched.c~explict-migrate-tasks kernel/sched.c
--- linux-2.6.17-rc3/kernel/sched.c~explict-migrate-tasks 2006-05-07 07:45:14.000000000 +0800
+++ linux-2.6.17-rc3-root/kernel/sched.c 2006-05-07 07:45:14.000000000 +0800
@@ -4739,6 +4739,39 @@ static void migrate_dead_tasks(unsigned
}
}
}
+
+void migrate_tasks_off_dead_cpu(int cpu)
+{
+ struct runqueue *rq;
+ unsigned long flags;
+
+ migrate_live_tasks(cpu);
+ rq = cpu_rq(cpu);
+ kthread_stop(rq->migration_thread);
+ rq->migration_thread = NULL;
+ /* Idle task back to normal (off runqueue, low prio) */
+ rq = task_rq_lock(rq->idle, &flags);
+ deactivate_task(rq->idle, rq);
+ rq->idle->static_prio = MAX_PRIO;
+ __setscheduler(rq->idle, SCHED_NORMAL, 0);
+ migrate_dead_tasks(cpu);
+ task_rq_unlock(rq, &flags);
+ migrate_nr_uninterruptible(rq);
+ BUG_ON(rq->nr_running != 0);
+
+ /* No need to migrate the tasks: it was best-effort if
+ * they didn't do lock_cpu_hotplug(). Just wake up
+ * the requestors. */
+ spin_lock_irq(&rq->lock);
+ while (!list_empty(&rq->migration_queue)) {
+ migration_req_t *req;
+ req = list_entry(rq->migration_queue.next,
+ migration_req_t, list);
+ list_del_init(&req->list);
+ complete(&req->done);
+ }
+ spin_unlock_irq(&rq->lock);
+}
#endif /* CONFIG_HOTPLUG_CPU */

/*
@@ -4779,32 +4812,14 @@ static int migration_call(struct notifie
cpu_rq(cpu)->migration_thread = NULL;
break;
case CPU_DEAD:
- migrate_live_tasks(cpu);
- rq = cpu_rq(cpu);
- kthread_stop(rq->migration_thread);
- rq->migration_thread = NULL;
- /* Idle task back to normal (off runqueue, low prio) */
- rq = task_rq_lock(rq->idle, &flags);
- deactivate_task(rq->idle, rq);
- rq->idle->static_prio = MAX_PRIO;
- __setscheduler(rq->idle, SCHED_NORMAL, 0);
- migrate_dead_tasks(cpu);
- task_rq_unlock(rq, &flags);
- migrate_nr_uninterruptible(rq);
- BUG_ON(rq->nr_running != 0);
-
- /* No need to migrate the tasks: it was best-effort if
- * they didn't do lock_cpu_hotplug(). Just wake up
- * the requestors. */
- spin_lock_irq(&rq->lock);
- while (!list_empty(&rq->migration_queue)) {
- migration_req_t *req;
- req = list_entry(rq->migration_queue.next,
- migration_req_t, list);
- list_del_init(&req->list);
- complete(&req->done);
- }
- spin_unlock_irq(&rq->lock);
+ /*
+ * Shouldn't call migrate_tasks_off_dead_cpu here. We explictly
+ * migrate tasks off dead cpu in cpu_down, otherwise there is
+ * a deadlock in bulk cpu removal. When one dead CPU's notifier
+ * handler is waitting for tasks in other dead CPUs' runqueue
+ * and in the meantime other dead CPUs' tasks aren't migrated
+ * to online cpus, the deadlock occurs.
+ */
break;
#endif
}
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/